初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/a1-stackexchange_tor Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: other
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: sft_a1_stackexchange_tor__Qwen3-8B
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# sft_a1_stackexchange_tor__Qwen3-8B
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--stackexchange-tor-sandboxes_glm_4.7_traces_jupiter/snapshots/a0044e399194e17c864e3a35296dd1520fdddef0_thinking_preprocessed dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 128
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.1+cu130
|
||||
- Datasets 4.7.0
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.0026187013159839974,
|
||||
"achieved_tflops_per_gpu_theoretical": 555.6163483029995,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.24078989028930664,
|
||||
"mfu_percent": 0.00018506723081158992,
|
||||
"mfu_percent_theoretical": 39.266173024946966,
|
||||
"total_flos": 1125904534470656.0,
|
||||
"train_loss": 0.35793851753530226,
|
||||
"train_runtime": 26871.7295,
|
||||
"train_samples_per_second": 2.627,
|
||||
"train_steps_per_second": 0.164,
|
||||
"valid_targets_mean": 2557.6,
|
||||
"valid_targets_min": 1102
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1ee203b2adad5db5247639dcacba86cbe9d82243fc3c8a68f9b17ca91e07f57f
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:51cf6f6b396f64f67195c78387f646ff9b91d10220bca3949dc0be386e9670ce
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:abfbd276d5a2416d0172ec545249db99a48ea3cea484cc3b74a8af0873649ac5
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7bb2f2689a0bf7a6d33fee46325cc4a9646656a5a96ca49f7d01d0ccbe54f02d
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "a0044e399194e17c864e3a35296dd1520fdddef0_thinking_preprocessed",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "raoof1",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--stackexchange-tor-sandboxes_glm_4.7_traces_jupiter/snapshots/a0044e399194e17c864e3a35296dd1520fdddef0_thinking_preprocessed",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/DCAgent/a1-stackexchange_tor/blob/main/config.json",
|
||||
"wandb_link": null,
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
||||
size 11422654
|
||||
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.0026187013159839974,
|
||||
"achieved_tflops_per_gpu_theoretical": 555.6163483029995,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.24078989028930664,
|
||||
"mfu_percent": 0.00018506723081158992,
|
||||
"mfu_percent_theoretical": 39.266173024946966,
|
||||
"total_flos": 1125904534470656.0,
|
||||
"train_loss": 0.35793851753530226,
|
||||
"train_runtime": 26871.7295,
|
||||
"train_samples_per_second": 2.627,
|
||||
"train_steps_per_second": 0.164,
|
||||
"valid_targets_mean": 2557.6,
|
||||
"valid_targets_min": 1102
|
||||
}
|
||||
884
trainer_log.jsonl
Normal file
884
trainer_log.jsonl
Normal file
@@ -0,0 +1,884 @@
|
||||
{"current_steps": 5, "total_steps": 4417, "loss": 0.9909, "lr": 3.619909502262444e-07, "epoch": 0.00792393026941363, "percentage": 0.11, "elapsed_time": "0:00:29", "remaining_time": "7:08:40"}
|
||||
{"current_steps": 10, "total_steps": 4417, "loss": 0.9684, "lr": 8.144796380090498e-07, "epoch": 0.01584786053882726, "percentage": 0.23, "elapsed_time": "0:01:00", "remaining_time": "7:26:54"}
|
||||
{"current_steps": 15, "total_steps": 4417, "loss": 0.928, "lr": 1.2669683257918552e-06, "epoch": 0.02377179080824089, "percentage": 0.34, "elapsed_time": "0:01:33", "remaining_time": "7:35:21"}
|
||||
{"current_steps": 20, "total_steps": 4417, "loss": 0.9344, "lr": 1.7194570135746609e-06, "epoch": 0.03169572107765452, "percentage": 0.45, "elapsed_time": "0:02:03", "remaining_time": "7:32:53"}
|
||||
{"current_steps": 25, "total_steps": 4417, "loss": 0.896, "lr": 2.171945701357466e-06, "epoch": 0.039619651347068144, "percentage": 0.57, "elapsed_time": "0:02:39", "remaining_time": "7:46:05"}
|
||||
{"current_steps": 30, "total_steps": 4417, "loss": 0.8017, "lr": 2.624434389140272e-06, "epoch": 0.04754358161648178, "percentage": 0.68, "elapsed_time": "0:03:16", "remaining_time": "7:58:15"}
|
||||
{"current_steps": 35, "total_steps": 4417, "loss": 0.7712, "lr": 3.0769230769230774e-06, "epoch": 0.0554675118858954, "percentage": 0.79, "elapsed_time": "0:03:48", "remaining_time": "7:57:12"}
|
||||
{"current_steps": 40, "total_steps": 4417, "loss": 0.7427, "lr": 3.529411764705883e-06, "epoch": 0.06339144215530904, "percentage": 0.91, "elapsed_time": "0:04:17", "remaining_time": "7:49:19"}
|
||||
{"current_steps": 45, "total_steps": 4417, "loss": 0.7016, "lr": 3.9819004524886875e-06, "epoch": 0.07131537242472266, "percentage": 1.02, "elapsed_time": "0:04:50", "remaining_time": "7:50:43"}
|
||||
{"current_steps": 50, "total_steps": 4417, "loss": 0.697, "lr": 4.434389140271493e-06, "epoch": 0.07923930269413629, "percentage": 1.13, "elapsed_time": "0:05:22", "remaining_time": "7:49:22"}
|
||||
{"current_steps": 55, "total_steps": 4417, "loss": 0.6765, "lr": 4.886877828054299e-06, "epoch": 0.08716323296354993, "percentage": 1.25, "elapsed_time": "0:05:55", "remaining_time": "7:49:59"}
|
||||
{"current_steps": 60, "total_steps": 4417, "loss": 0.6928, "lr": 5.339366515837105e-06, "epoch": 0.09508716323296355, "percentage": 1.36, "elapsed_time": "0:06:26", "remaining_time": "7:47:49"}
|
||||
{"current_steps": 65, "total_steps": 4417, "loss": 0.6466, "lr": 5.79185520361991e-06, "epoch": 0.10301109350237718, "percentage": 1.47, "elapsed_time": "0:07:02", "remaining_time": "7:51:16"}
|
||||
{"current_steps": 70, "total_steps": 4417, "loss": 0.6493, "lr": 6.244343891402716e-06, "epoch": 0.1109350237717908, "percentage": 1.58, "elapsed_time": "0:07:34", "remaining_time": "7:50:18"}
|
||||
{"current_steps": 75, "total_steps": 4417, "loss": 0.6203, "lr": 6.69683257918552e-06, "epoch": 0.11885895404120443, "percentage": 1.7, "elapsed_time": "0:08:02", "remaining_time": "7:45:14"}
|
||||
{"current_steps": 80, "total_steps": 4417, "loss": 0.6207, "lr": 7.1493212669683265e-06, "epoch": 0.12678288431061807, "percentage": 1.81, "elapsed_time": "0:08:32", "remaining_time": "7:43:15"}
|
||||
{"current_steps": 85, "total_steps": 4417, "loss": 0.6579, "lr": 7.601809954751131e-06, "epoch": 0.1347068145800317, "percentage": 1.92, "elapsed_time": "0:09:03", "remaining_time": "7:41:19"}
|
||||
{"current_steps": 90, "total_steps": 4417, "loss": 0.62, "lr": 8.054298642533938e-06, "epoch": 0.14263074484944532, "percentage": 2.04, "elapsed_time": "0:09:34", "remaining_time": "7:40:31"}
|
||||
{"current_steps": 95, "total_steps": 4417, "loss": 0.6039, "lr": 8.506787330316743e-06, "epoch": 0.15055467511885895, "percentage": 2.15, "elapsed_time": "0:10:05", "remaining_time": "7:39:23"}
|
||||
{"current_steps": 100, "total_steps": 4417, "loss": 0.6062, "lr": 8.95927601809955e-06, "epoch": 0.15847860538827258, "percentage": 2.26, "elapsed_time": "0:10:36", "remaining_time": "7:37:37"}
|
||||
{"current_steps": 105, "total_steps": 4417, "loss": 0.5937, "lr": 9.411764705882354e-06, "epoch": 0.1664025356576862, "percentage": 2.38, "elapsed_time": "0:11:05", "remaining_time": "7:35:31"}
|
||||
{"current_steps": 110, "total_steps": 4417, "loss": 0.5676, "lr": 9.86425339366516e-06, "epoch": 0.17432646592709986, "percentage": 2.49, "elapsed_time": "0:11:38", "remaining_time": "7:36:01"}
|
||||
{"current_steps": 115, "total_steps": 4417, "loss": 0.5769, "lr": 1.0316742081447966e-05, "epoch": 0.18225039619651348, "percentage": 2.6, "elapsed_time": "0:12:07", "remaining_time": "7:33:47"}
|
||||
{"current_steps": 120, "total_steps": 4417, "loss": 0.585, "lr": 1.076923076923077e-05, "epoch": 0.1901743264659271, "percentage": 2.72, "elapsed_time": "0:12:47", "remaining_time": "7:37:52"}
|
||||
{"current_steps": 125, "total_steps": 4417, "loss": 0.5752, "lr": 1.1221719457013576e-05, "epoch": 0.19809825673534073, "percentage": 2.83, "elapsed_time": "0:13:25", "remaining_time": "7:40:46"}
|
||||
{"current_steps": 130, "total_steps": 4417, "loss": 0.5427, "lr": 1.1674208144796382e-05, "epoch": 0.20602218700475436, "percentage": 2.94, "elapsed_time": "0:13:58", "remaining_time": "7:40:46"}
|
||||
{"current_steps": 135, "total_steps": 4417, "loss": 0.5473, "lr": 1.2126696832579185e-05, "epoch": 0.21394611727416799, "percentage": 3.06, "elapsed_time": "0:14:34", "remaining_time": "7:42:20"}
|
||||
{"current_steps": 140, "total_steps": 4417, "loss": 0.5497, "lr": 1.2579185520361991e-05, "epoch": 0.2218700475435816, "percentage": 3.17, "elapsed_time": "0:15:03", "remaining_time": "7:39:54"}
|
||||
{"current_steps": 145, "total_steps": 4417, "loss": 0.5247, "lr": 1.3031674208144797e-05, "epoch": 0.22979397781299524, "percentage": 3.28, "elapsed_time": "0:15:30", "remaining_time": "7:36:51"}
|
||||
{"current_steps": 150, "total_steps": 4417, "loss": 0.5628, "lr": 1.3484162895927604e-05, "epoch": 0.23771790808240886, "percentage": 3.4, "elapsed_time": "0:16:03", "remaining_time": "7:36:37"}
|
||||
{"current_steps": 155, "total_steps": 4417, "loss": 0.5325, "lr": 1.3936651583710408e-05, "epoch": 0.24564183835182252, "percentage": 3.51, "elapsed_time": "0:16:40", "remaining_time": "7:38:40"}
|
||||
{"current_steps": 160, "total_steps": 4417, "loss": 0.5614, "lr": 1.4389140271493213e-05, "epoch": 0.25356576862123614, "percentage": 3.62, "elapsed_time": "0:17:14", "remaining_time": "7:38:42"}
|
||||
{"current_steps": 165, "total_steps": 4417, "loss": 0.555, "lr": 1.484162895927602e-05, "epoch": 0.26148969889064977, "percentage": 3.74, "elapsed_time": "0:17:45", "remaining_time": "7:37:27"}
|
||||
{"current_steps": 170, "total_steps": 4417, "loss": 0.55, "lr": 1.5294117647058822e-05, "epoch": 0.2694136291600634, "percentage": 3.85, "elapsed_time": "0:18:17", "remaining_time": "7:36:49"}
|
||||
{"current_steps": 175, "total_steps": 4417, "loss": 0.5179, "lr": 1.574660633484163e-05, "epoch": 0.277337559429477, "percentage": 3.96, "elapsed_time": "0:18:50", "remaining_time": "7:36:52"}
|
||||
{"current_steps": 180, "total_steps": 4417, "loss": 0.4977, "lr": 1.6199095022624435e-05, "epoch": 0.28526148969889065, "percentage": 4.08, "elapsed_time": "0:19:24", "remaining_time": "7:37:02"}
|
||||
{"current_steps": 185, "total_steps": 4417, "loss": 0.5391, "lr": 1.6651583710407243e-05, "epoch": 0.2931854199683043, "percentage": 4.19, "elapsed_time": "0:19:59", "remaining_time": "7:37:24"}
|
||||
{"current_steps": 190, "total_steps": 4417, "loss": 0.5302, "lr": 1.7104072398190047e-05, "epoch": 0.3011093502377179, "percentage": 4.3, "elapsed_time": "0:20:33", "remaining_time": "7:37:11"}
|
||||
{"current_steps": 195, "total_steps": 4417, "loss": 0.5174, "lr": 1.7556561085972852e-05, "epoch": 0.3090332805071315, "percentage": 4.41, "elapsed_time": "0:20:59", "remaining_time": "7:34:38"}
|
||||
{"current_steps": 200, "total_steps": 4417, "loss": 0.5504, "lr": 1.8009049773755657e-05, "epoch": 0.31695721077654515, "percentage": 4.53, "elapsed_time": "0:21:29", "remaining_time": "7:33:14"}
|
||||
{"current_steps": 205, "total_steps": 4417, "loss": 0.5109, "lr": 1.8461538461538465e-05, "epoch": 0.3248811410459588, "percentage": 4.64, "elapsed_time": "0:21:58", "remaining_time": "7:31:40"}
|
||||
{"current_steps": 210, "total_steps": 4417, "loss": 0.5039, "lr": 1.891402714932127e-05, "epoch": 0.3328050713153724, "percentage": 4.75, "elapsed_time": "0:22:30", "remaining_time": "7:30:55"}
|
||||
{"current_steps": 215, "total_steps": 4417, "loss": 0.504, "lr": 1.9366515837104074e-05, "epoch": 0.34072900158478603, "percentage": 4.87, "elapsed_time": "0:23:00", "remaining_time": "7:29:35"}
|
||||
{"current_steps": 220, "total_steps": 4417, "loss": 0.5304, "lr": 1.981900452488688e-05, "epoch": 0.3486529318541997, "percentage": 4.98, "elapsed_time": "0:23:24", "remaining_time": "7:26:28"}
|
||||
{"current_steps": 225, "total_steps": 4417, "loss": 0.5235, "lr": 2.0271493212669683e-05, "epoch": 0.35657686212361334, "percentage": 5.09, "elapsed_time": "0:23:52", "remaining_time": "7:24:54"}
|
||||
{"current_steps": 230, "total_steps": 4417, "loss": 0.5407, "lr": 2.072398190045249e-05, "epoch": 0.36450079239302696, "percentage": 5.21, "elapsed_time": "0:24:25", "remaining_time": "7:24:43"}
|
||||
{"current_steps": 235, "total_steps": 4417, "loss": 0.5032, "lr": 2.1176470588235296e-05, "epoch": 0.3724247226624406, "percentage": 5.32, "elapsed_time": "0:24:55", "remaining_time": "7:23:37"}
|
||||
{"current_steps": 240, "total_steps": 4417, "loss": 0.5213, "lr": 2.16289592760181e-05, "epoch": 0.3803486529318542, "percentage": 5.43, "elapsed_time": "0:25:27", "remaining_time": "7:22:59"}
|
||||
{"current_steps": 245, "total_steps": 4417, "loss": 0.5073, "lr": 2.2081447963800908e-05, "epoch": 0.38827258320126784, "percentage": 5.55, "elapsed_time": "0:25:57", "remaining_time": "7:21:57"}
|
||||
{"current_steps": 250, "total_steps": 4417, "loss": 0.5124, "lr": 2.2533936651583713e-05, "epoch": 0.39619651347068147, "percentage": 5.66, "elapsed_time": "0:26:28", "remaining_time": "7:21:08"}
|
||||
{"current_steps": 255, "total_steps": 4417, "loss": 0.4913, "lr": 2.2986425339366517e-05, "epoch": 0.4041204437400951, "percentage": 5.77, "elapsed_time": "0:27:06", "remaining_time": "7:22:21"}
|
||||
{"current_steps": 260, "total_steps": 4417, "loss": 0.5018, "lr": 2.3438914027149325e-05, "epoch": 0.4120443740095087, "percentage": 5.89, "elapsed_time": "0:27:38", "remaining_time": "7:21:59"}
|
||||
{"current_steps": 265, "total_steps": 4417, "loss": 0.4818, "lr": 2.3891402714932127e-05, "epoch": 0.41996830427892234, "percentage": 6.0, "elapsed_time": "0:28:16", "remaining_time": "7:22:54"}
|
||||
{"current_steps": 270, "total_steps": 4417, "loss": 0.4938, "lr": 2.434389140271493e-05, "epoch": 0.42789223454833597, "percentage": 6.11, "elapsed_time": "0:28:53", "remaining_time": "7:23:47"}
|
||||
{"current_steps": 275, "total_steps": 4417, "loss": 0.4837, "lr": 2.479638009049774e-05, "epoch": 0.4358161648177496, "percentage": 6.23, "elapsed_time": "0:29:28", "remaining_time": "7:23:56"}
|
||||
{"current_steps": 280, "total_steps": 4417, "loss": 0.4725, "lr": 2.5248868778280544e-05, "epoch": 0.4437400950871632, "percentage": 6.34, "elapsed_time": "0:30:00", "remaining_time": "7:23:27"}
|
||||
{"current_steps": 285, "total_steps": 4417, "loss": 0.508, "lr": 2.5701357466063352e-05, "epoch": 0.45166402535657685, "percentage": 6.45, "elapsed_time": "0:30:29", "remaining_time": "7:22:08"}
|
||||
{"current_steps": 290, "total_steps": 4417, "loss": 0.503, "lr": 2.6153846153846157e-05, "epoch": 0.4595879556259905, "percentage": 6.57, "elapsed_time": "0:30:52", "remaining_time": "7:19:17"}
|
||||
{"current_steps": 295, "total_steps": 4417, "loss": 0.4563, "lr": 2.660633484162896e-05, "epoch": 0.4675118858954041, "percentage": 6.68, "elapsed_time": "0:31:27", "remaining_time": "7:19:27"}
|
||||
{"current_steps": 300, "total_steps": 4417, "loss": 0.4595, "lr": 2.705882352941177e-05, "epoch": 0.4754358161648177, "percentage": 6.79, "elapsed_time": "0:31:59", "remaining_time": "7:18:58"}
|
||||
{"current_steps": 305, "total_steps": 4417, "loss": 0.4776, "lr": 2.7511312217194574e-05, "epoch": 0.48335974643423135, "percentage": 6.91, "elapsed_time": "0:32:30", "remaining_time": "7:18:10"}
|
||||
{"current_steps": 310, "total_steps": 4417, "loss": 0.4906, "lr": 2.7963800904977375e-05, "epoch": 0.49128367670364503, "percentage": 7.02, "elapsed_time": "0:32:57", "remaining_time": "7:16:38"}
|
||||
{"current_steps": 315, "total_steps": 4417, "loss": 0.4946, "lr": 2.8416289592760183e-05, "epoch": 0.49920760697305866, "percentage": 7.13, "elapsed_time": "0:33:26", "remaining_time": "7:15:31"}
|
||||
{"current_steps": 320, "total_steps": 4417, "loss": 0.4741, "lr": 2.8868778280542988e-05, "epoch": 0.5071315372424723, "percentage": 7.24, "elapsed_time": "0:33:58", "remaining_time": "7:14:55"}
|
||||
{"current_steps": 325, "total_steps": 4417, "loss": 0.482, "lr": 2.9321266968325792e-05, "epoch": 0.5150554675118859, "percentage": 7.36, "elapsed_time": "0:34:22", "remaining_time": "7:12:44"}
|
||||
{"current_steps": 330, "total_steps": 4417, "loss": 0.4667, "lr": 2.97737556561086e-05, "epoch": 0.5229793977812995, "percentage": 7.47, "elapsed_time": "0:34:48", "remaining_time": "7:11:02"}
|
||||
{"current_steps": 335, "total_steps": 4417, "loss": 0.5061, "lr": 3.0226244343891405e-05, "epoch": 0.5309033280507132, "percentage": 7.58, "elapsed_time": "0:35:20", "remaining_time": "7:10:34"}
|
||||
{"current_steps": 340, "total_steps": 4417, "loss": 0.4835, "lr": 3.067873303167421e-05, "epoch": 0.5388272583201268, "percentage": 7.7, "elapsed_time": "0:35:51", "remaining_time": "7:10:02"}
|
||||
{"current_steps": 345, "total_steps": 4417, "loss": 0.5017, "lr": 3.1131221719457014e-05, "epoch": 0.5467511885895404, "percentage": 7.81, "elapsed_time": "0:36:26", "remaining_time": "7:10:09"}
|
||||
{"current_steps": 350, "total_steps": 4417, "loss": 0.5038, "lr": 3.158371040723982e-05, "epoch": 0.554675118858954, "percentage": 7.92, "elapsed_time": "0:36:49", "remaining_time": "7:07:51"}
|
||||
{"current_steps": 355, "total_steps": 4417, "loss": 0.4813, "lr": 3.203619909502263e-05, "epoch": 0.5625990491283677, "percentage": 8.04, "elapsed_time": "0:37:21", "remaining_time": "7:07:24"}
|
||||
{"current_steps": 360, "total_steps": 4417, "loss": 0.4977, "lr": 3.248868778280543e-05, "epoch": 0.5705229793977813, "percentage": 8.15, "elapsed_time": "0:37:52", "remaining_time": "7:06:44"}
|
||||
{"current_steps": 365, "total_steps": 4417, "loss": 0.4842, "lr": 3.294117647058824e-05, "epoch": 0.5784469096671949, "percentage": 8.26, "elapsed_time": "0:38:24", "remaining_time": "7:06:17"}
|
||||
{"current_steps": 370, "total_steps": 4417, "loss": 0.4761, "lr": 3.339366515837105e-05, "epoch": 0.5863708399366085, "percentage": 8.38, "elapsed_time": "0:38:50", "remaining_time": "7:04:54"}
|
||||
{"current_steps": 375, "total_steps": 4417, "loss": 0.4981, "lr": 3.384615384615385e-05, "epoch": 0.5942947702060222, "percentage": 8.49, "elapsed_time": "0:39:20", "remaining_time": "7:04:06"}
|
||||
{"current_steps": 380, "total_steps": 4417, "loss": 0.4827, "lr": 3.429864253393665e-05, "epoch": 0.6022187004754358, "percentage": 8.6, "elapsed_time": "0:39:44", "remaining_time": "7:02:15"}
|
||||
{"current_steps": 385, "total_steps": 4417, "loss": 0.474, "lr": 3.475113122171946e-05, "epoch": 0.6101426307448494, "percentage": 8.72, "elapsed_time": "0:40:16", "remaining_time": "7:01:47"}
|
||||
{"current_steps": 390, "total_steps": 4417, "loss": 0.4903, "lr": 3.5203619909502266e-05, "epoch": 0.618066561014263, "percentage": 8.83, "elapsed_time": "0:40:44", "remaining_time": "7:00:42"}
|
||||
{"current_steps": 395, "total_steps": 4417, "loss": 0.4516, "lr": 3.5656108597285074e-05, "epoch": 0.6259904912836767, "percentage": 8.94, "elapsed_time": "0:41:13", "remaining_time": "6:59:48"}
|
||||
{"current_steps": 400, "total_steps": 4417, "loss": 0.506, "lr": 3.6108597285067875e-05, "epoch": 0.6339144215530903, "percentage": 9.06, "elapsed_time": "0:41:49", "remaining_time": "6:59:58"}
|
||||
{"current_steps": 405, "total_steps": 4417, "loss": 0.438, "lr": 3.656108597285068e-05, "epoch": 0.6418383518225039, "percentage": 9.17, "elapsed_time": "0:42:22", "remaining_time": "6:59:43"}
|
||||
{"current_steps": 410, "total_steps": 4417, "loss": 0.5022, "lr": 3.701357466063349e-05, "epoch": 0.6497622820919176, "percentage": 9.28, "elapsed_time": "0:42:55", "remaining_time": "6:59:30"}
|
||||
{"current_steps": 415, "total_steps": 4417, "loss": 0.4701, "lr": 3.746606334841629e-05, "epoch": 0.6576862123613312, "percentage": 9.4, "elapsed_time": "0:43:31", "remaining_time": "6:59:47"}
|
||||
{"current_steps": 420, "total_steps": 4417, "loss": 0.4813, "lr": 3.791855203619909e-05, "epoch": 0.6656101426307448, "percentage": 9.51, "elapsed_time": "0:44:03", "remaining_time": "6:59:20"}
|
||||
{"current_steps": 425, "total_steps": 4417, "loss": 0.4519, "lr": 3.837104072398191e-05, "epoch": 0.6735340729001584, "percentage": 9.62, "elapsed_time": "0:44:33", "remaining_time": "6:58:35"}
|
||||
{"current_steps": 430, "total_steps": 4417, "loss": 0.4603, "lr": 3.882352941176471e-05, "epoch": 0.6814580031695721, "percentage": 9.74, "elapsed_time": "0:45:03", "remaining_time": "6:57:50"}
|
||||
{"current_steps": 435, "total_steps": 4417, "loss": 0.478, "lr": 3.927601809954751e-05, "epoch": 0.6893819334389857, "percentage": 9.85, "elapsed_time": "0:45:35", "remaining_time": "6:57:18"}
|
||||
{"current_steps": 440, "total_steps": 4417, "loss": 0.4656, "lr": 3.972850678733032e-05, "epoch": 0.6973058637083994, "percentage": 9.96, "elapsed_time": "0:46:08", "remaining_time": "6:57:07"}
|
||||
{"current_steps": 445, "total_steps": 4417, "loss": 0.4536, "lr": 3.99999750146533e-05, "epoch": 0.705229793977813, "percentage": 10.07, "elapsed_time": "0:46:46", "remaining_time": "6:57:26"}
|
||||
{"current_steps": 450, "total_steps": 4417, "loss": 0.4598, "lr": 3.999969393021975e-05, "epoch": 0.7131537242472267, "percentage": 10.19, "elapsed_time": "0:47:13", "remaining_time": "6:56:15"}
|
||||
{"current_steps": 455, "total_steps": 4417, "loss": 0.4637, "lr": 3.999910053407327e-05, "epoch": 0.7210776545166403, "percentage": 10.3, "elapsed_time": "0:47:46", "remaining_time": "6:56:01"}
|
||||
{"current_steps": 460, "total_steps": 4417, "loss": 0.4695, "lr": 3.999819483548022e-05, "epoch": 0.7290015847860539, "percentage": 10.41, "elapsed_time": "0:48:16", "remaining_time": "6:55:14"}
|
||||
{"current_steps": 465, "total_steps": 4417, "loss": 0.4776, "lr": 3.999697684858384e-05, "epoch": 0.7369255150554676, "percentage": 10.53, "elapsed_time": "0:48:45", "remaining_time": "6:54:22"}
|
||||
{"current_steps": 470, "total_steps": 4417, "loss": 0.462, "lr": 3.9995446592403994e-05, "epoch": 0.7448494453248812, "percentage": 10.64, "elapsed_time": "0:49:13", "remaining_time": "6:53:19"}
|
||||
{"current_steps": 475, "total_steps": 4417, "loss": 0.4321, "lr": 3.9993604090836905e-05, "epoch": 0.7527733755942948, "percentage": 10.75, "elapsed_time": "0:49:44", "remaining_time": "6:52:49"}
|
||||
{"current_steps": 480, "total_steps": 4417, "loss": 0.4752, "lr": 3.999144937265473e-05, "epoch": 0.7606973058637084, "percentage": 10.87, "elapsed_time": "0:50:17", "remaining_time": "6:52:28"}
|
||||
{"current_steps": 485, "total_steps": 4417, "loss": 0.4463, "lr": 3.998898247150519e-05, "epoch": 0.768621236133122, "percentage": 10.98, "elapsed_time": "0:50:47", "remaining_time": "6:51:46"}
|
||||
{"current_steps": 490, "total_steps": 4417, "loss": 0.4541, "lr": 3.9986203425910976e-05, "epoch": 0.7765451664025357, "percentage": 11.09, "elapsed_time": "0:51:19", "remaining_time": "6:51:19"}
|
||||
{"current_steps": 495, "total_steps": 4417, "loss": 0.4455, "lr": 3.998311227926918e-05, "epoch": 0.7844690966719493, "percentage": 11.21, "elapsed_time": "0:51:52", "remaining_time": "6:51:00"}
|
||||
{"current_steps": 500, "total_steps": 4417, "loss": 0.5113, "lr": 3.997970907985059e-05, "epoch": 0.7923930269413629, "percentage": 11.32, "elapsed_time": "0:52:19", "remaining_time": "6:49:54"}
|
||||
{"current_steps": 505, "total_steps": 4417, "loss": 0.4497, "lr": 3.9975993880799e-05, "epoch": 0.8003169572107766, "percentage": 11.43, "elapsed_time": "0:52:48", "remaining_time": "6:49:08"}
|
||||
{"current_steps": 510, "total_steps": 4417, "loss": 0.4495, "lr": 3.9971966740130284e-05, "epoch": 0.8082408874801902, "percentage": 11.55, "elapsed_time": "0:53:21", "remaining_time": "6:48:42"}
|
||||
{"current_steps": 515, "total_steps": 4417, "loss": 0.4417, "lr": 3.996762772073156e-05, "epoch": 0.8161648177496038, "percentage": 11.66, "elapsed_time": "0:53:55", "remaining_time": "6:48:33"}
|
||||
{"current_steps": 520, "total_steps": 4417, "loss": 0.4691, "lr": 3.996297689036022e-05, "epoch": 0.8240887480190174, "percentage": 11.77, "elapsed_time": "0:54:22", "remaining_time": "6:47:27"}
|
||||
{"current_steps": 525, "total_steps": 4417, "loss": 0.4741, "lr": 3.995801432164279e-05, "epoch": 0.8320126782884311, "percentage": 11.89, "elapsed_time": "0:54:45", "remaining_time": "6:45:55"}
|
||||
{"current_steps": 530, "total_steps": 4417, "loss": 0.4474, "lr": 3.9952740092073895e-05, "epoch": 0.8399366085578447, "percentage": 12.0, "elapsed_time": "0:55:14", "remaining_time": "6:45:09"}
|
||||
{"current_steps": 535, "total_steps": 4417, "loss": 0.472, "lr": 3.9947154284014955e-05, "epoch": 0.8478605388272583, "percentage": 12.11, "elapsed_time": "0:55:48", "remaining_time": "6:44:54"}
|
||||
{"current_steps": 540, "total_steps": 4417, "loss": 0.4679, "lr": 3.994125698469298e-05, "epoch": 0.8557844690966719, "percentage": 12.23, "elapsed_time": "0:56:15", "remaining_time": "6:43:54"}
|
||||
{"current_steps": 545, "total_steps": 4417, "loss": 0.4354, "lr": 3.9935048286199166e-05, "epoch": 0.8637083993660856, "percentage": 12.34, "elapsed_time": "0:56:50", "remaining_time": "6:43:50"}
|
||||
{"current_steps": 550, "total_steps": 4417, "loss": 0.4391, "lr": 3.992852828548745e-05, "epoch": 0.8716323296354992, "percentage": 12.45, "elapsed_time": "0:57:23", "remaining_time": "6:43:30"}
|
||||
{"current_steps": 555, "total_steps": 4417, "loss": 0.4496, "lr": 3.992169708437304e-05, "epoch": 0.8795562599049128, "percentage": 12.57, "elapsed_time": "0:57:52", "remaining_time": "6:42:42"}
|
||||
{"current_steps": 560, "total_steps": 4417, "loss": 0.4494, "lr": 3.9914554789530753e-05, "epoch": 0.8874801901743264, "percentage": 12.68, "elapsed_time": "0:58:25", "remaining_time": "6:42:22"}
|
||||
{"current_steps": 565, "total_steps": 4417, "loss": 0.4493, "lr": 3.990710151249342e-05, "epoch": 0.8954041204437401, "percentage": 12.79, "elapsed_time": "0:58:51", "remaining_time": "6:41:13"}
|
||||
{"current_steps": 570, "total_steps": 4417, "loss": 0.4577, "lr": 3.989933736965011e-05, "epoch": 0.9033280507131537, "percentage": 12.9, "elapsed_time": "0:59:25", "remaining_time": "6:41:06"}
|
||||
{"current_steps": 575, "total_steps": 4417, "loss": 0.4571, "lr": 3.9891262482244315e-05, "epoch": 0.9112519809825673, "percentage": 13.02, "elapsed_time": "0:59:59", "remaining_time": "6:40:54"}
|
||||
{"current_steps": 580, "total_steps": 4417, "loss": 0.4444, "lr": 3.988287697637206e-05, "epoch": 0.919175911251981, "percentage": 13.13, "elapsed_time": "1:00:31", "remaining_time": "6:40:21"}
|
||||
{"current_steps": 585, "total_steps": 4417, "loss": 0.423, "lr": 3.987418098297993e-05, "epoch": 0.9270998415213946, "percentage": 13.24, "elapsed_time": "1:00:59", "remaining_time": "6:39:28"}
|
||||
{"current_steps": 590, "total_steps": 4417, "loss": 0.4142, "lr": 3.986517463786304e-05, "epoch": 0.9350237717908082, "percentage": 13.36, "elapsed_time": "1:01:36", "remaining_time": "6:39:38"}
|
||||
{"current_steps": 595, "total_steps": 4417, "loss": 0.4453, "lr": 3.985585808166289e-05, "epoch": 0.9429477020602218, "percentage": 13.47, "elapsed_time": "1:02:06", "remaining_time": "6:38:54"}
|
||||
{"current_steps": 600, "total_steps": 4417, "loss": 0.4256, "lr": 3.9846231459865195e-05, "epoch": 0.9508716323296355, "percentage": 13.58, "elapsed_time": "1:02:34", "remaining_time": "6:38:03"}
|
||||
{"current_steps": 605, "total_steps": 4417, "loss": 0.4518, "lr": 3.983629492279759e-05, "epoch": 0.9587955625990491, "percentage": 13.7, "elapsed_time": "1:03:04", "remaining_time": "6:37:23"}
|
||||
{"current_steps": 610, "total_steps": 4417, "loss": 0.4385, "lr": 3.982604862562729e-05, "epoch": 0.9667194928684627, "percentage": 13.81, "elapsed_time": "1:03:32", "remaining_time": "6:36:32"}
|
||||
{"current_steps": 615, "total_steps": 4417, "loss": 0.4333, "lr": 3.9815492728358674e-05, "epoch": 0.9746434231378764, "percentage": 13.92, "elapsed_time": "1:04:07", "remaining_time": "6:36:24"}
|
||||
{"current_steps": 620, "total_steps": 4417, "loss": 0.4216, "lr": 3.980462739583079e-05, "epoch": 0.9825673534072901, "percentage": 14.04, "elapsed_time": "1:04:32", "remaining_time": "6:35:18"}
|
||||
{"current_steps": 625, "total_steps": 4417, "loss": 0.4152, "lr": 3.9793452797714765e-05, "epoch": 0.9904912836767037, "percentage": 14.15, "elapsed_time": "1:05:04", "remaining_time": "6:34:48"}
|
||||
{"current_steps": 630, "total_steps": 4417, "loss": 0.4323, "lr": 3.9781969108511156e-05, "epoch": 0.9984152139461173, "percentage": 14.26, "elapsed_time": "1:05:34", "remaining_time": "6:34:10"}
|
||||
{"current_steps": 635, "total_steps": 4417, "loss": 0.4287, "lr": 3.9770176507547246e-05, "epoch": 1.006339144215531, "percentage": 14.38, "elapsed_time": "1:06:07", "remaining_time": "6:33:48"}
|
||||
{"current_steps": 640, "total_steps": 4417, "loss": 0.4257, "lr": 3.975807517897422e-05, "epoch": 1.0142630744849446, "percentage": 14.49, "elapsed_time": "1:06:36", "remaining_time": "6:33:06"}
|
||||
{"current_steps": 645, "total_steps": 4417, "loss": 0.4053, "lr": 3.9745665311764305e-05, "epoch": 1.0221870047543582, "percentage": 14.6, "elapsed_time": "1:06:59", "remaining_time": "6:31:45"}
|
||||
{"current_steps": 650, "total_steps": 4417, "loss": 0.424, "lr": 3.973294709970781e-05, "epoch": 1.0301109350237718, "percentage": 14.72, "elapsed_time": "1:07:32", "remaining_time": "6:31:23"}
|
||||
{"current_steps": 655, "total_steps": 4417, "loss": 0.4231, "lr": 3.97199207414101e-05, "epoch": 1.0380348652931854, "percentage": 14.83, "elapsed_time": "1:08:04", "remaining_time": "6:30:58"}
|
||||
{"current_steps": 660, "total_steps": 4417, "loss": 0.4549, "lr": 3.970658644028851e-05, "epoch": 1.045958795562599, "percentage": 14.94, "elapsed_time": "1:08:34", "remaining_time": "6:30:23"}
|
||||
{"current_steps": 665, "total_steps": 4417, "loss": 0.4158, "lr": 3.969294440456915e-05, "epoch": 1.0538827258320127, "percentage": 15.06, "elapsed_time": "1:09:02", "remaining_time": "6:29:31"}
|
||||
{"current_steps": 670, "total_steps": 4417, "loss": 0.4183, "lr": 3.9678994847283636e-05, "epoch": 1.0618066561014263, "percentage": 15.17, "elapsed_time": "1:09:29", "remaining_time": "6:28:37"}
|
||||
{"current_steps": 675, "total_steps": 4417, "loss": 0.4003, "lr": 3.9664737986265825e-05, "epoch": 1.06973058637084, "percentage": 15.28, "elapsed_time": "1:10:01", "remaining_time": "6:28:11"}
|
||||
{"current_steps": 680, "total_steps": 4417, "loss": 0.4136, "lr": 3.9650174044148364e-05, "epoch": 1.0776545166402536, "percentage": 15.4, "elapsed_time": "1:10:37", "remaining_time": "6:28:09"}
|
||||
{"current_steps": 685, "total_steps": 4417, "loss": 0.4343, "lr": 3.9635303248359206e-05, "epoch": 1.0855784469096672, "percentage": 15.51, "elapsed_time": "1:11:06", "remaining_time": "6:27:24"}
|
||||
{"current_steps": 690, "total_steps": 4417, "loss": 0.4148, "lr": 3.9620125831118105e-05, "epoch": 1.0935023771790808, "percentage": 15.62, "elapsed_time": "1:11:39", "remaining_time": "6:27:03"}
|
||||
{"current_steps": 695, "total_steps": 4417, "loss": 0.4221, "lr": 3.960464202943293e-05, "epoch": 1.1014263074484945, "percentage": 15.73, "elapsed_time": "1:12:11", "remaining_time": "6:26:37"}
|
||||
{"current_steps": 700, "total_steps": 4417, "loss": 0.4217, "lr": 3.958885208509601e-05, "epoch": 1.109350237717908, "percentage": 15.85, "elapsed_time": "1:12:48", "remaining_time": "6:26:37"}
|
||||
{"current_steps": 705, "total_steps": 4417, "loss": 0.3903, "lr": 3.957275624468037e-05, "epoch": 1.1172741679873217, "percentage": 15.96, "elapsed_time": "1:13:21", "remaining_time": "6:26:16"}
|
||||
{"current_steps": 710, "total_steps": 4417, "loss": 0.3989, "lr": 3.9556354759535806e-05, "epoch": 1.1251980982567353, "percentage": 16.07, "elapsed_time": "1:13:52", "remaining_time": "6:25:42"}
|
||||
{"current_steps": 715, "total_steps": 4417, "loss": 0.4271, "lr": 3.9539647885785056e-05, "epoch": 1.133122028526149, "percentage": 16.19, "elapsed_time": "1:14:27", "remaining_time": "6:25:31"}
|
||||
{"current_steps": 720, "total_steps": 4417, "loss": 0.4155, "lr": 3.952263588431971e-05, "epoch": 1.1410459587955626, "percentage": 16.3, "elapsed_time": "1:15:02", "remaining_time": "6:25:17"}
|
||||
{"current_steps": 725, "total_steps": 4417, "loss": 0.3988, "lr": 3.950531902079622e-05, "epoch": 1.1489698890649762, "percentage": 16.41, "elapsed_time": "1:15:32", "remaining_time": "6:24:39"}
|
||||
{"current_steps": 730, "total_steps": 4417, "loss": 0.4215, "lr": 3.948769756563167e-05, "epoch": 1.1568938193343898, "percentage": 16.53, "elapsed_time": "1:15:56", "remaining_time": "6:23:33"}
|
||||
{"current_steps": 735, "total_steps": 4417, "loss": 0.4347, "lr": 3.9469771793999625e-05, "epoch": 1.1648177496038035, "percentage": 16.64, "elapsed_time": "1:16:34", "remaining_time": "6:23:34"}
|
||||
{"current_steps": 740, "total_steps": 4417, "loss": 0.4364, "lr": 3.9451541985825786e-05, "epoch": 1.172741679873217, "percentage": 16.75, "elapsed_time": "1:17:06", "remaining_time": "6:23:10"}
|
||||
{"current_steps": 745, "total_steps": 4417, "loss": 0.4036, "lr": 3.9433008425783644e-05, "epoch": 1.1806656101426307, "percentage": 16.87, "elapsed_time": "1:17:31", "remaining_time": "6:22:05"}
|
||||
{"current_steps": 750, "total_steps": 4417, "loss": 0.4236, "lr": 3.941417140329002e-05, "epoch": 1.1885895404120443, "percentage": 16.98, "elapsed_time": "1:18:01", "remaining_time": "6:21:28"}
|
||||
{"current_steps": 755, "total_steps": 4417, "loss": 0.4444, "lr": 3.939503121250055e-05, "epoch": 1.196513470681458, "percentage": 17.09, "elapsed_time": "1:18:28", "remaining_time": "6:20:37"}
|
||||
{"current_steps": 760, "total_steps": 4417, "loss": 0.4309, "lr": 3.937558815230511e-05, "epoch": 1.2044374009508716, "percentage": 17.21, "elapsed_time": "1:19:00", "remaining_time": "6:20:12"}
|
||||
{"current_steps": 765, "total_steps": 4417, "loss": 0.3922, "lr": 3.935584252632311e-05, "epoch": 1.2123613312202852, "percentage": 17.32, "elapsed_time": "1:19:32", "remaining_time": "6:19:43"}
|
||||
{"current_steps": 770, "total_steps": 4417, "loss": 0.4229, "lr": 3.933579464289877e-05, "epoch": 1.2202852614896988, "percentage": 17.43, "elapsed_time": "1:19:56", "remaining_time": "6:18:40"}
|
||||
{"current_steps": 775, "total_steps": 4417, "loss": 0.4123, "lr": 3.931544481509634e-05, "epoch": 1.2282091917591125, "percentage": 17.55, "elapsed_time": "1:20:33", "remaining_time": "6:18:34"}
|
||||
{"current_steps": 780, "total_steps": 4417, "loss": 0.4087, "lr": 3.929479336069515e-05, "epoch": 1.236133122028526, "percentage": 17.66, "elapsed_time": "1:21:08", "remaining_time": "6:18:20"}
|
||||
{"current_steps": 785, "total_steps": 4417, "loss": 0.4283, "lr": 3.9273840602184704e-05, "epoch": 1.2440570522979397, "percentage": 17.77, "elapsed_time": "1:21:34", "remaining_time": "6:17:24"}
|
||||
{"current_steps": 790, "total_steps": 4417, "loss": 0.4202, "lr": 3.925258686675959e-05, "epoch": 1.2519809825673534, "percentage": 17.89, "elapsed_time": "1:22:06", "remaining_time": "6:16:56"}
|
||||
{"current_steps": 795, "total_steps": 4417, "loss": 0.424, "lr": 3.9231032486314424e-05, "epoch": 1.259904912836767, "percentage": 18.0, "elapsed_time": "1:22:36", "remaining_time": "6:16:19"}
|
||||
{"current_steps": 800, "total_steps": 4417, "loss": 0.4251, "lr": 3.920917779743863e-05, "epoch": 1.2678288431061806, "percentage": 18.11, "elapsed_time": "1:23:03", "remaining_time": "6:15:32"}
|
||||
{"current_steps": 805, "total_steps": 4417, "loss": 0.4236, "lr": 3.9187023141411174e-05, "epoch": 1.2757527733755942, "percentage": 18.23, "elapsed_time": "1:23:35", "remaining_time": "6:15:03"}
|
||||
{"current_steps": 810, "total_steps": 4417, "loss": 0.4231, "lr": 3.916456886419531e-05, "epoch": 1.2836767036450079, "percentage": 18.34, "elapsed_time": "1:24:08", "remaining_time": "6:14:40"}
|
||||
{"current_steps": 815, "total_steps": 4417, "loss": 0.4208, "lr": 3.914181531643308e-05, "epoch": 1.2916006339144215, "percentage": 18.45, "elapsed_time": "1:24:45", "remaining_time": "6:14:35"}
|
||||
{"current_steps": 820, "total_steps": 4417, "loss": 0.4159, "lr": 3.9118762853439896e-05, "epoch": 1.299524564183835, "percentage": 18.56, "elapsed_time": "1:25:20", "remaining_time": "6:14:23"}
|
||||
{"current_steps": 825, "total_steps": 4417, "loss": 0.3973, "lr": 3.909541183519897e-05, "epoch": 1.3074484944532487, "percentage": 18.68, "elapsed_time": "1:25:47", "remaining_time": "6:13:30"}
|
||||
{"current_steps": 830, "total_steps": 4417, "loss": 0.4347, "lr": 3.907176262635573e-05, "epoch": 1.3153724247226624, "percentage": 18.79, "elapsed_time": "1:26:12", "remaining_time": "6:12:35"}
|
||||
{"current_steps": 835, "total_steps": 4417, "loss": 0.3865, "lr": 3.904781559621205e-05, "epoch": 1.3232963549920762, "percentage": 18.9, "elapsed_time": "1:26:41", "remaining_time": "6:11:55"}
|
||||
{"current_steps": 840, "total_steps": 4417, "loss": 0.4387, "lr": 3.9023571118720556e-05, "epoch": 1.3312202852614896, "percentage": 19.02, "elapsed_time": "1:27:14", "remaining_time": "6:11:28"}
|
||||
{"current_steps": 845, "total_steps": 4417, "loss": 0.4319, "lr": 3.899902957247876e-05, "epoch": 1.3391442155309035, "percentage": 19.13, "elapsed_time": "1:27:43", "remaining_time": "6:10:50"}
|
||||
{"current_steps": 850, "total_steps": 4417, "loss": 0.4269, "lr": 3.8974191340723156e-05, "epoch": 1.3470681458003169, "percentage": 19.24, "elapsed_time": "1:28:09", "remaining_time": "6:09:55"}
|
||||
{"current_steps": 855, "total_steps": 4417, "loss": 0.4319, "lr": 3.89490568113232e-05, "epoch": 1.3549920760697307, "percentage": 19.36, "elapsed_time": "1:28:40", "remaining_time": "6:09:24"}
|
||||
{"current_steps": 860, "total_steps": 4417, "loss": 0.4174, "lr": 3.8923626376775304e-05, "epoch": 1.3629160063391441, "percentage": 19.47, "elapsed_time": "1:29:08", "remaining_time": "6:08:42"}
|
||||
{"current_steps": 865, "total_steps": 4417, "loss": 0.3846, "lr": 3.889790043419667e-05, "epoch": 1.370839936608558, "percentage": 19.58, "elapsed_time": "1:29:31", "remaining_time": "6:07:36"}
|
||||
{"current_steps": 870, "total_steps": 4417, "loss": 0.3955, "lr": 3.88718793853191e-05, "epoch": 1.3787638668779714, "percentage": 19.7, "elapsed_time": "1:29:55", "remaining_time": "6:06:38"}
|
||||
{"current_steps": 875, "total_steps": 4417, "loss": 0.4079, "lr": 3.884556363648274e-05, "epoch": 1.3866877971473852, "percentage": 19.81, "elapsed_time": "1:30:31", "remaining_time": "6:06:28"}
|
||||
{"current_steps": 880, "total_steps": 4417, "loss": 0.3955, "lr": 3.881895359862971e-05, "epoch": 1.3946117274167986, "percentage": 19.92, "elapsed_time": "1:31:02", "remaining_time": "6:05:56"}
|
||||
{"current_steps": 885, "total_steps": 4417, "loss": 0.429, "lr": 3.8792049687297676e-05, "epoch": 1.4025356576862125, "percentage": 20.04, "elapsed_time": "1:31:30", "remaining_time": "6:05:11"}
|
||||
{"current_steps": 890, "total_steps": 4417, "loss": 0.3962, "lr": 3.8764852322613424e-05, "epoch": 1.4104595879556259, "percentage": 20.15, "elapsed_time": "1:31:56", "remaining_time": "6:04:22"}
|
||||
{"current_steps": 895, "total_steps": 4417, "loss": 0.4215, "lr": 3.873736192928621e-05, "epoch": 1.4183835182250397, "percentage": 20.26, "elapsed_time": "1:32:27", "remaining_time": "6:03:50"}
|
||||
{"current_steps": 900, "total_steps": 4417, "loss": 0.3843, "lr": 3.870957893660121e-05, "epoch": 1.4263074484944531, "percentage": 20.38, "elapsed_time": "1:32:59", "remaining_time": "6:03:24"}
|
||||
{"current_steps": 905, "total_steps": 4417, "loss": 0.3896, "lr": 3.8681503778412755e-05, "epoch": 1.434231378763867, "percentage": 20.49, "elapsed_time": "1:33:33", "remaining_time": "6:03:02"}
|
||||
{"current_steps": 910, "total_steps": 4417, "loss": 0.4115, "lr": 3.86531368931376e-05, "epoch": 1.4421553090332804, "percentage": 20.6, "elapsed_time": "1:33:59", "remaining_time": "6:02:15"}
|
||||
{"current_steps": 915, "total_steps": 4417, "loss": 0.4143, "lr": 3.862447872374804e-05, "epoch": 1.4500792393026942, "percentage": 20.72, "elapsed_time": "1:34:23", "remaining_time": "6:01:15"}
|
||||
{"current_steps": 920, "total_steps": 4417, "loss": 0.4, "lr": 3.859552971776503e-05, "epoch": 1.4580031695721076, "percentage": 20.83, "elapsed_time": "1:34:56", "remaining_time": "6:00:54"}
|
||||
{"current_steps": 925, "total_steps": 4417, "loss": 0.4019, "lr": 3.856629032725117e-05, "epoch": 1.4659270998415215, "percentage": 20.94, "elapsed_time": "1:35:30", "remaining_time": "6:00:33"}
|
||||
{"current_steps": 930, "total_steps": 4417, "loss": 0.4225, "lr": 3.853676100880366e-05, "epoch": 1.473851030110935, "percentage": 21.06, "elapsed_time": "1:35:55", "remaining_time": "5:59:39"}
|
||||
{"current_steps": 935, "total_steps": 4417, "loss": 0.4151, "lr": 3.8506942223547144e-05, "epoch": 1.4817749603803487, "percentage": 21.17, "elapsed_time": "1:36:27", "remaining_time": "5:59:14"}
|
||||
{"current_steps": 940, "total_steps": 4417, "loss": 0.4054, "lr": 3.847683443712655e-05, "epoch": 1.4896988906497624, "percentage": 21.28, "elapsed_time": "1:36:58", "remaining_time": "5:58:42"}
|
||||
{"current_steps": 945, "total_steps": 4417, "loss": 0.3964, "lr": 3.844643811969979e-05, "epoch": 1.497622820919176, "percentage": 21.39, "elapsed_time": "1:37:27", "remaining_time": "5:58:03"}
|
||||
{"current_steps": 950, "total_steps": 4417, "loss": 0.3836, "lr": 3.8415753745930434e-05, "epoch": 1.5055467511885894, "percentage": 21.51, "elapsed_time": "1:37:59", "remaining_time": "5:57:38"}
|
||||
{"current_steps": 955, "total_steps": 4417, "loss": 0.3985, "lr": 3.8384781794980266e-05, "epoch": 1.5134706814580032, "percentage": 21.62, "elapsed_time": "1:38:34", "remaining_time": "5:57:19"}
|
||||
{"current_steps": 960, "total_steps": 4417, "loss": 0.4003, "lr": 3.835352275050186e-05, "epoch": 1.5213946117274166, "percentage": 21.73, "elapsed_time": "1:38:57", "remaining_time": "5:56:19"}
|
||||
{"current_steps": 965, "total_steps": 4417, "loss": 0.4093, "lr": 3.832197710063095e-05, "epoch": 1.5293185419968305, "percentage": 21.85, "elapsed_time": "1:39:30", "remaining_time": "5:55:59"}
|
||||
{"current_steps": 970, "total_steps": 4417, "loss": 0.4049, "lr": 3.829014533797889e-05, "epoch": 1.537242472266244, "percentage": 21.96, "elapsed_time": "1:39:57", "remaining_time": "5:55:13"}
|
||||
{"current_steps": 975, "total_steps": 4417, "loss": 0.3891, "lr": 3.8258027959624896e-05, "epoch": 1.5451664025356577, "percentage": 22.07, "elapsed_time": "1:40:22", "remaining_time": "5:54:21"}
|
||||
{"current_steps": 980, "total_steps": 4417, "loss": 0.4002, "lr": 3.822562546710831e-05, "epoch": 1.5530903328050714, "percentage": 22.19, "elapsed_time": "1:40:53", "remaining_time": "5:53:51"}
|
||||
{"current_steps": 985, "total_steps": 4417, "loss": 0.4058, "lr": 3.8192938366420783e-05, "epoch": 1.561014263074485, "percentage": 22.3, "elapsed_time": "1:41:24", "remaining_time": "5:53:21"}
|
||||
{"current_steps": 990, "total_steps": 4417, "loss": 0.4376, "lr": 3.815996716799834e-05, "epoch": 1.5689381933438986, "percentage": 22.41, "elapsed_time": "1:41:57", "remaining_time": "5:52:57"}
|
||||
{"current_steps": 995, "total_steps": 4417, "loss": 0.4297, "lr": 3.812671238671344e-05, "epoch": 1.5768621236133122, "percentage": 22.53, "elapsed_time": "1:42:22", "remaining_time": "5:52:06"}
|
||||
{"current_steps": 1000, "total_steps": 4417, "loss": 0.3877, "lr": 3.809317454186691e-05, "epoch": 1.5847860538827259, "percentage": 22.64, "elapsed_time": "1:42:56", "remaining_time": "5:51:45"}
|
||||
{"current_steps": 1005, "total_steps": 4417, "loss": 0.403, "lr": 3.8059354157179856e-05, "epoch": 1.5927099841521395, "percentage": 22.75, "elapsed_time": "1:43:27", "remaining_time": "5:51:13"}
|
||||
{"current_steps": 1010, "total_steps": 4417, "loss": 0.4051, "lr": 3.802525176078547e-05, "epoch": 1.6006339144215531, "percentage": 22.87, "elapsed_time": "1:43:57", "remaining_time": "5:50:39"}
|
||||
{"current_steps": 1015, "total_steps": 4417, "loss": 0.3903, "lr": 3.7990867885220796e-05, "epoch": 1.6085578446909667, "percentage": 22.98, "elapsed_time": "1:44:29", "remaining_time": "5:50:15"}
|
||||
{"current_steps": 1020, "total_steps": 4417, "loss": 0.4078, "lr": 3.7956203067418404e-05, "epoch": 1.6164817749603804, "percentage": 23.09, "elapsed_time": "1:45:04", "remaining_time": "5:49:54"}
|
||||
{"current_steps": 1025, "total_steps": 4417, "loss": 0.3946, "lr": 3.792125784869801e-05, "epoch": 1.624405705229794, "percentage": 23.21, "elapsed_time": "1:45:35", "remaining_time": "5:49:26"}
|
||||
{"current_steps": 1030, "total_steps": 4417, "loss": 0.4134, "lr": 3.788603277475802e-05, "epoch": 1.6323296354992076, "percentage": 23.32, "elapsed_time": "1:46:08", "remaining_time": "5:49:00"}
|
||||
{"current_steps": 1035, "total_steps": 4417, "loss": 0.3994, "lr": 3.785052839566701e-05, "epoch": 1.6402535657686212, "percentage": 23.43, "elapsed_time": "1:46:35", "remaining_time": "5:48:19"}
|
||||
{"current_steps": 1040, "total_steps": 4417, "loss": 0.4115, "lr": 3.781474526585515e-05, "epoch": 1.6481774960380349, "percentage": 23.55, "elapsed_time": "1:47:10", "remaining_time": "5:48:00"}
|
||||
{"current_steps": 1045, "total_steps": 4417, "loss": 0.4284, "lr": 3.777868394410549e-05, "epoch": 1.6561014263074485, "percentage": 23.66, "elapsed_time": "1:47:43", "remaining_time": "5:47:37"}
|
||||
{"current_steps": 1050, "total_steps": 4417, "loss": 0.4044, "lr": 3.774234499354534e-05, "epoch": 1.6640253565768621, "percentage": 23.77, "elapsed_time": "1:48:13", "remaining_time": "5:47:01"}
|
||||
{"current_steps": 1055, "total_steps": 4417, "loss": 0.4203, "lr": 3.7705728981637356e-05, "epoch": 1.6719492868462758, "percentage": 23.88, "elapsed_time": "1:48:41", "remaining_time": "5:46:22"}
|
||||
{"current_steps": 1060, "total_steps": 4417, "loss": 0.3809, "lr": 3.7668836480170766e-05, "epoch": 1.6798732171156894, "percentage": 24.0, "elapsed_time": "1:49:11", "remaining_time": "5:45:48"}
|
||||
{"current_steps": 1065, "total_steps": 4417, "loss": 0.411, "lr": 3.763166806525241e-05, "epoch": 1.687797147385103, "percentage": 24.11, "elapsed_time": "1:49:41", "remaining_time": "5:45:15"}
|
||||
{"current_steps": 1070, "total_steps": 4417, "loss": 0.4124, "lr": 3.759422431729772e-05, "epoch": 1.6957210776545166, "percentage": 24.22, "elapsed_time": "1:50:07", "remaining_time": "5:44:27"}
|
||||
{"current_steps": 1075, "total_steps": 4417, "loss": 0.4324, "lr": 3.7556505821021716e-05, "epoch": 1.7036450079239303, "percentage": 24.34, "elapsed_time": "1:50:28", "remaining_time": "5:43:26"}
|
||||
{"current_steps": 1080, "total_steps": 4417, "loss": 0.4057, "lr": 3.751851316542981e-05, "epoch": 1.7115689381933439, "percentage": 24.45, "elapsed_time": "1:50:56", "remaining_time": "5:42:47"}
|
||||
{"current_steps": 1085, "total_steps": 4417, "loss": 0.4001, "lr": 3.748024694380864e-05, "epoch": 1.7194928684627575, "percentage": 24.56, "elapsed_time": "1:51:23", "remaining_time": "5:42:05"}
|
||||
{"current_steps": 1090, "total_steps": 4417, "loss": 0.3975, "lr": 3.744170775371683e-05, "epoch": 1.7274167987321711, "percentage": 24.68, "elapsed_time": "1:51:53", "remaining_time": "5:41:32"}
|
||||
{"current_steps": 1095, "total_steps": 4417, "loss": 0.4089, "lr": 3.740289619697561e-05, "epoch": 1.7353407290015848, "percentage": 24.79, "elapsed_time": "1:52:28", "remaining_time": "5:41:12"}
|
||||
{"current_steps": 1100, "total_steps": 4417, "loss": 0.4128, "lr": 3.736381287965943e-05, "epoch": 1.7432646592709984, "percentage": 24.9, "elapsed_time": "1:53:00", "remaining_time": "5:40:45"}
|
||||
{"current_steps": 1105, "total_steps": 4417, "loss": 0.3996, "lr": 3.7324458412086547e-05, "epoch": 1.751188589540412, "percentage": 25.02, "elapsed_time": "1:53:35", "remaining_time": "5:40:29"}
|
||||
{"current_steps": 1110, "total_steps": 4417, "loss": 0.3909, "lr": 3.728483340880939e-05, "epoch": 1.7591125198098256, "percentage": 25.13, "elapsed_time": "1:54:05", "remaining_time": "5:39:55"}
|
||||
{"current_steps": 1115, "total_steps": 4417, "loss": 0.3934, "lr": 3.7244938488605084e-05, "epoch": 1.7670364500792393, "percentage": 25.24, "elapsed_time": "1:54:35", "remaining_time": "5:39:21"}
|
||||
{"current_steps": 1120, "total_steps": 4417, "loss": 0.382, "lr": 3.7204774274465694e-05, "epoch": 1.7749603803486529, "percentage": 25.36, "elapsed_time": "1:55:06", "remaining_time": "5:38:51"}
|
||||
{"current_steps": 1125, "total_steps": 4417, "loss": 0.4197, "lr": 3.716434139358855e-05, "epoch": 1.7828843106180665, "percentage": 25.47, "elapsed_time": "1:55:33", "remaining_time": "5:38:10"}
|
||||
{"current_steps": 1130, "total_steps": 4417, "loss": 0.4063, "lr": 3.712364047736643e-05, "epoch": 1.7908082408874801, "percentage": 25.58, "elapsed_time": "1:56:04", "remaining_time": "5:37:37"}
|
||||
{"current_steps": 1135, "total_steps": 4417, "loss": 0.4018, "lr": 3.7082672161377706e-05, "epoch": 1.7987321711568938, "percentage": 25.7, "elapsed_time": "1:56:35", "remaining_time": "5:37:07"}
|
||||
{"current_steps": 1140, "total_steps": 4417, "loss": 0.3961, "lr": 3.70414370853764e-05, "epoch": 1.8066561014263076, "percentage": 25.81, "elapsed_time": "1:57:08", "remaining_time": "5:36:43"}
|
||||
{"current_steps": 1145, "total_steps": 4417, "loss": 0.3882, "lr": 3.6999935893282254e-05, "epoch": 1.814580031695721, "percentage": 25.92, "elapsed_time": "1:57:39", "remaining_time": "5:36:13"}
|
||||
{"current_steps": 1150, "total_steps": 4417, "loss": 0.4189, "lr": 3.695816923317058e-05, "epoch": 1.8225039619651349, "percentage": 26.04, "elapsed_time": "1:58:09", "remaining_time": "5:35:41"}
|
||||
{"current_steps": 1155, "total_steps": 4417, "loss": 0.3863, "lr": 3.691613775726223e-05, "epoch": 1.8304278922345483, "percentage": 26.15, "elapsed_time": "1:58:36", "remaining_time": "5:34:59"}
|
||||
{"current_steps": 1160, "total_steps": 4417, "loss": 0.3952, "lr": 3.687384212191336e-05, "epoch": 1.8383518225039621, "percentage": 26.26, "elapsed_time": "1:58:59", "remaining_time": "5:34:05"}
|
||||
{"current_steps": 1165, "total_steps": 4417, "loss": 0.4203, "lr": 3.6831282987605185e-05, "epoch": 1.8462757527733755, "percentage": 26.38, "elapsed_time": "1:59:38", "remaining_time": "5:33:58"}
|
||||
{"current_steps": 1170, "total_steps": 4417, "loss": 0.3953, "lr": 3.6788461018933695e-05, "epoch": 1.8541996830427894, "percentage": 26.49, "elapsed_time": "2:00:12", "remaining_time": "5:33:36"}
|
||||
{"current_steps": 1175, "total_steps": 4417, "loss": 0.3982, "lr": 3.674537688459924e-05, "epoch": 1.8621236133122028, "percentage": 26.6, "elapsed_time": "2:00:43", "remaining_time": "5:33:06"}
|
||||
{"current_steps": 1180, "total_steps": 4417, "loss": 0.4301, "lr": 3.67020312573961e-05, "epoch": 1.8700475435816166, "percentage": 26.71, "elapsed_time": "2:01:12", "remaining_time": "5:32:29"}
|
||||
{"current_steps": 1185, "total_steps": 4417, "loss": 0.4094, "lr": 3.665842481420199e-05, "epoch": 1.87797147385103, "percentage": 26.83, "elapsed_time": "2:01:42", "remaining_time": "5:31:57"}
|
||||
{"current_steps": 1190, "total_steps": 4417, "loss": 0.4235, "lr": 3.661455823596749e-05, "epoch": 1.8858954041204439, "percentage": 26.94, "elapsed_time": "2:02:18", "remaining_time": "5:31:41"}
|
||||
{"current_steps": 1195, "total_steps": 4417, "loss": 0.3828, "lr": 3.6570432207705366e-05, "epoch": 1.8938193343898573, "percentage": 27.05, "elapsed_time": "2:02:51", "remaining_time": "5:31:15"}
|
||||
{"current_steps": 1200, "total_steps": 4417, "loss": 0.3746, "lr": 3.652604741847996e-05, "epoch": 1.9017432646592711, "percentage": 27.17, "elapsed_time": "2:03:14", "remaining_time": "5:30:22"}
|
||||
{"current_steps": 1205, "total_steps": 4417, "loss": 0.3828, "lr": 3.648140456139635e-05, "epoch": 1.9096671949286845, "percentage": 27.28, "elapsed_time": "2:03:51", "remaining_time": "5:30:08"}
|
||||
{"current_steps": 1210, "total_steps": 4417, "loss": 0.3976, "lr": 3.643650433358956e-05, "epoch": 1.9175911251980984, "percentage": 27.39, "elapsed_time": "2:04:16", "remaining_time": "5:29:21"}
|
||||
{"current_steps": 1215, "total_steps": 4417, "loss": 0.4015, "lr": 3.639134743621368e-05, "epoch": 1.9255150554675118, "percentage": 27.51, "elapsed_time": "2:04:44", "remaining_time": "5:28:43"}
|
||||
{"current_steps": 1220, "total_steps": 4417, "loss": 0.3785, "lr": 3.63459345744309e-05, "epoch": 1.9334389857369256, "percentage": 27.62, "elapsed_time": "2:05:12", "remaining_time": "5:28:06"}
|
||||
{"current_steps": 1225, "total_steps": 4417, "loss": 0.4263, "lr": 3.630026645740049e-05, "epoch": 1.941362916006339, "percentage": 27.73, "elapsed_time": "2:05:41", "remaining_time": "5:27:31"}
|
||||
{"current_steps": 1230, "total_steps": 4417, "loss": 0.4054, "lr": 3.625434379826777e-05, "epoch": 1.9492868462757529, "percentage": 27.85, "elapsed_time": "2:06:14", "remaining_time": "5:27:04"}
|
||||
{"current_steps": 1235, "total_steps": 4417, "loss": 0.4009, "lr": 3.620816731415293e-05, "epoch": 1.9572107765451663, "percentage": 27.96, "elapsed_time": "2:06:42", "remaining_time": "5:26:28"}
|
||||
{"current_steps": 1240, "total_steps": 4417, "loss": 0.392, "lr": 3.616173772613985e-05, "epoch": 1.9651347068145801, "percentage": 28.07, "elapsed_time": "2:07:10", "remaining_time": "5:25:49"}
|
||||
{"current_steps": 1245, "total_steps": 4417, "loss": 0.4069, "lr": 3.611505575926482e-05, "epoch": 1.9730586370839935, "percentage": 28.19, "elapsed_time": "2:07:39", "remaining_time": "5:25:13"}
|
||||
{"current_steps": 1250, "total_steps": 4417, "loss": 0.3868, "lr": 3.6068122142505265e-05, "epoch": 1.9809825673534074, "percentage": 28.3, "elapsed_time": "2:08:11", "remaining_time": "5:24:48"}
|
||||
{"current_steps": 1255, "total_steps": 4417, "loss": 0.3925, "lr": 3.602093760876831e-05, "epoch": 1.9889064976228208, "percentage": 28.41, "elapsed_time": "2:08:41", "remaining_time": "5:24:14"}
|
||||
{"current_steps": 1260, "total_steps": 4417, "loss": 0.3847, "lr": 3.597350289487935e-05, "epoch": 1.9968304278922346, "percentage": 28.53, "elapsed_time": "2:09:14", "remaining_time": "5:23:49"}
|
||||
{"current_steps": 1265, "total_steps": 4417, "loss": 0.3712, "lr": 3.5925818741570566e-05, "epoch": 2.004754358161648, "percentage": 28.64, "elapsed_time": "2:09:50", "remaining_time": "5:23:31"}
|
||||
{"current_steps": 1270, "total_steps": 4417, "loss": 0.3875, "lr": 3.587788589346932e-05, "epoch": 2.012678288431062, "percentage": 28.75, "elapsed_time": "2:10:20", "remaining_time": "5:22:58"}
|
||||
{"current_steps": 1275, "total_steps": 4417, "loss": 0.355, "lr": 3.5829705099086586e-05, "epoch": 2.0206022187004753, "percentage": 28.87, "elapsed_time": "2:10:54", "remaining_time": "5:22:36"}
|
||||
{"current_steps": 1280, "total_steps": 4417, "loss": 0.3331, "lr": 3.578127711080517e-05, "epoch": 2.028526148969889, "percentage": 28.98, "elapsed_time": "2:11:26", "remaining_time": "5:22:09"}
|
||||
{"current_steps": 1285, "total_steps": 4417, "loss": 0.3337, "lr": 3.5732602684868065e-05, "epoch": 2.0364500792393025, "percentage": 29.09, "elapsed_time": "2:11:53", "remaining_time": "5:21:27"}
|
||||
{"current_steps": 1290, "total_steps": 4417, "loss": 0.3384, "lr": 3.568368258136657e-05, "epoch": 2.0443740095087164, "percentage": 29.21, "elapsed_time": "2:12:28", "remaining_time": "5:21:06"}
|
||||
{"current_steps": 1295, "total_steps": 4417, "loss": 0.3536, "lr": 3.563451756422843e-05, "epoch": 2.05229793977813, "percentage": 29.32, "elapsed_time": "2:13:00", "remaining_time": "5:20:38"}
|
||||
{"current_steps": 1300, "total_steps": 4417, "loss": 0.3825, "lr": 3.558510840120594e-05, "epoch": 2.0602218700475436, "percentage": 29.43, "elapsed_time": "2:13:27", "remaining_time": "5:19:58"}
|
||||
{"current_steps": 1305, "total_steps": 4417, "loss": 0.3744, "lr": 3.553545586386392e-05, "epoch": 2.068145800316957, "percentage": 29.54, "elapsed_time": "2:13:59", "remaining_time": "5:19:32"}
|
||||
{"current_steps": 1310, "total_steps": 4417, "loss": 0.365, "lr": 3.5485560727567686e-05, "epoch": 2.076069730586371, "percentage": 29.66, "elapsed_time": "2:14:30", "remaining_time": "5:19:00"}
|
||||
{"current_steps": 1315, "total_steps": 4417, "loss": 0.3727, "lr": 3.543542377147093e-05, "epoch": 2.0839936608557843, "percentage": 29.77, "elapsed_time": "2:14:58", "remaining_time": "5:18:22"}
|
||||
{"current_steps": 1320, "total_steps": 4417, "loss": 0.3685, "lr": 3.5385045778503574e-05, "epoch": 2.091917591125198, "percentage": 29.88, "elapsed_time": "2:15:23", "remaining_time": "5:17:39"}
|
||||
{"current_steps": 1325, "total_steps": 4417, "loss": 0.3756, "lr": 3.533442753535952e-05, "epoch": 2.0998415213946116, "percentage": 30.0, "elapsed_time": "2:15:58", "remaining_time": "5:17:19"}
|
||||
{"current_steps": 1330, "total_steps": 4417, "loss": 0.3599, "lr": 3.528356983248438e-05, "epoch": 2.1077654516640254, "percentage": 30.11, "elapsed_time": "2:16:28", "remaining_time": "5:16:45"}
|
||||
{"current_steps": 1335, "total_steps": 4417, "loss": 0.3605, "lr": 3.523247346406311e-05, "epoch": 2.115689381933439, "percentage": 30.22, "elapsed_time": "2:16:55", "remaining_time": "5:16:06"}
|
||||
{"current_steps": 1340, "total_steps": 4417, "loss": 0.3506, "lr": 3.518113922800765e-05, "epoch": 2.1236133122028527, "percentage": 30.34, "elapsed_time": "2:17:30", "remaining_time": "5:15:46"}
|
||||
{"current_steps": 1345, "total_steps": 4417, "loss": 0.3584, "lr": 3.512956792594442e-05, "epoch": 2.131537242472266, "percentage": 30.45, "elapsed_time": "2:17:53", "remaining_time": "5:14:57"}
|
||||
{"current_steps": 1350, "total_steps": 4417, "loss": 0.3464, "lr": 3.5077760363201835e-05, "epoch": 2.13946117274168, "percentage": 30.56, "elapsed_time": "2:18:21", "remaining_time": "5:14:20"}
|
||||
{"current_steps": 1355, "total_steps": 4417, "loss": 0.3566, "lr": 3.50257173487977e-05, "epoch": 2.1473851030110933, "percentage": 30.68, "elapsed_time": "2:18:49", "remaining_time": "5:13:42"}
|
||||
{"current_steps": 1360, "total_steps": 4417, "loss": 0.366, "lr": 3.4973439695426606e-05, "epoch": 2.155309033280507, "percentage": 30.79, "elapsed_time": "2:19:11", "remaining_time": "5:12:53"}
|
||||
{"current_steps": 1365, "total_steps": 4417, "loss": 0.3556, "lr": 3.492092821944722e-05, "epoch": 2.1632329635499206, "percentage": 30.9, "elapsed_time": "2:19:45", "remaining_time": "5:12:29"}
|
||||
{"current_steps": 1370, "total_steps": 4417, "loss": 0.3692, "lr": 3.4868183740869544e-05, "epoch": 2.1711568938193344, "percentage": 31.02, "elapsed_time": "2:20:13", "remaining_time": "5:11:51"}
|
||||
{"current_steps": 1375, "total_steps": 4417, "loss": 0.3793, "lr": 3.48152070833421e-05, "epoch": 2.179080824088748, "percentage": 31.13, "elapsed_time": "2:20:43", "remaining_time": "5:11:19"}
|
||||
{"current_steps": 1380, "total_steps": 4417, "loss": 0.3878, "lr": 3.476199907413909e-05, "epoch": 2.1870047543581617, "percentage": 31.24, "elapsed_time": "2:21:11", "remaining_time": "5:10:43"}
|
||||
{"current_steps": 1385, "total_steps": 4417, "loss": 0.3805, "lr": 3.4708560544147435e-05, "epoch": 2.194928684627575, "percentage": 31.36, "elapsed_time": "2:21:35", "remaining_time": "5:09:58"}
|
||||
{"current_steps": 1390, "total_steps": 4417, "loss": 0.3659, "lr": 3.465489232785388e-05, "epoch": 2.202852614896989, "percentage": 31.47, "elapsed_time": "2:22:05", "remaining_time": "5:09:27"}
|
||||
{"current_steps": 1395, "total_steps": 4417, "loss": 0.3801, "lr": 3.460099526333184e-05, "epoch": 2.2107765451664028, "percentage": 31.58, "elapsed_time": "2:22:29", "remaining_time": "5:08:41"}
|
||||
{"current_steps": 1400, "total_steps": 4417, "loss": 0.3528, "lr": 3.4546870192228446e-05, "epoch": 2.218700475435816, "percentage": 31.7, "elapsed_time": "2:23:00", "remaining_time": "5:08:10"}
|
||||
{"current_steps": 1405, "total_steps": 4417, "loss": 0.4117, "lr": 3.449251795975132e-05, "epoch": 2.2266244057052296, "percentage": 31.81, "elapsed_time": "2:23:28", "remaining_time": "5:07:34"}
|
||||
{"current_steps": 1410, "total_steps": 4417, "loss": 0.3457, "lr": 3.4437939414655375e-05, "epoch": 2.2345483359746434, "percentage": 31.92, "elapsed_time": "2:24:01", "remaining_time": "5:07:08"}
|
||||
{"current_steps": 1415, "total_steps": 4417, "loss": 0.3768, "lr": 3.438313540922961e-05, "epoch": 2.2424722662440573, "percentage": 32.04, "elapsed_time": "2:24:31", "remaining_time": "5:06:37"}
|
||||
{"current_steps": 1420, "total_steps": 4417, "loss": 0.3929, "lr": 3.432810679928376e-05, "epoch": 2.2503961965134707, "percentage": 32.15, "elapsed_time": "2:24:52", "remaining_time": "5:05:46"}
|
||||
{"current_steps": 1425, "total_steps": 4417, "loss": 0.3528, "lr": 3.427285444413495e-05, "epoch": 2.258320126782884, "percentage": 32.26, "elapsed_time": "2:25:23", "remaining_time": "5:05:16"}
|
||||
{"current_steps": 1430, "total_steps": 4417, "loss": 0.349, "lr": 3.421737920659427e-05, "epoch": 2.266244057052298, "percentage": 32.37, "elapsed_time": "2:25:51", "remaining_time": "5:04:41"}
|
||||
{"current_steps": 1435, "total_steps": 4417, "loss": 0.3591, "lr": 3.416168195295329e-05, "epoch": 2.2741679873217118, "percentage": 32.49, "elapsed_time": "2:26:26", "remaining_time": "5:04:17"}
|
||||
{"current_steps": 1440, "total_steps": 4417, "loss": 0.3556, "lr": 3.410576355297056e-05, "epoch": 2.282091917591125, "percentage": 32.6, "elapsed_time": "2:26:58", "remaining_time": "5:03:50"}
|
||||
{"current_steps": 1445, "total_steps": 4417, "loss": 0.3668, "lr": 3.404962487985801e-05, "epoch": 2.2900158478605386, "percentage": 32.71, "elapsed_time": "2:27:27", "remaining_time": "5:03:16"}
|
||||
{"current_steps": 1450, "total_steps": 4417, "loss": 0.3669, "lr": 3.399326681026731e-05, "epoch": 2.2979397781299524, "percentage": 32.83, "elapsed_time": "2:27:58", "remaining_time": "5:02:47"}
|
||||
{"current_steps": 1455, "total_steps": 4417, "loss": 0.3761, "lr": 3.3936690224276194e-05, "epoch": 2.3058637083993663, "percentage": 32.94, "elapsed_time": "2:28:20", "remaining_time": "5:01:58"}
|
||||
{"current_steps": 1460, "total_steps": 4417, "loss": 0.3772, "lr": 3.3879896005374705e-05, "epoch": 2.3137876386687797, "percentage": 33.05, "elapsed_time": "2:28:49", "remaining_time": "5:01:24"}
|
||||
{"current_steps": 1465, "total_steps": 4417, "loss": 0.355, "lr": 3.382288504045141e-05, "epoch": 2.3217115689381935, "percentage": 33.17, "elapsed_time": "2:29:25", "remaining_time": "5:01:05"}
|
||||
{"current_steps": 1470, "total_steps": 4417, "loss": 0.3683, "lr": 3.3765658219779537e-05, "epoch": 2.329635499207607, "percentage": 33.28, "elapsed_time": "2:29:55", "remaining_time": "5:00:32"}
|
||||
{"current_steps": 1475, "total_steps": 4417, "loss": 0.3559, "lr": 3.370821643700307e-05, "epoch": 2.337559429477021, "percentage": 33.39, "elapsed_time": "2:30:26", "remaining_time": "5:00:04"}
|
||||
{"current_steps": 1480, "total_steps": 4417, "loss": 0.383, "lr": 3.365056058912282e-05, "epoch": 2.345483359746434, "percentage": 33.51, "elapsed_time": "2:30:55", "remaining_time": "4:59:29"}
|
||||
{"current_steps": 1485, "total_steps": 4417, "loss": 0.3559, "lr": 3.3592691576482414e-05, "epoch": 2.353407290015848, "percentage": 33.62, "elapsed_time": "2:31:32", "remaining_time": "4:59:11"}
|
||||
{"current_steps": 1490, "total_steps": 4417, "loss": 0.3774, "lr": 3.353461030275418e-05, "epoch": 2.3613312202852614, "percentage": 33.73, "elapsed_time": "2:32:04", "remaining_time": "4:58:44"}
|
||||
{"current_steps": 1495, "total_steps": 4417, "loss": 0.3551, "lr": 3.3476317674925126e-05, "epoch": 2.3692551505546753, "percentage": 33.85, "elapsed_time": "2:32:40", "remaining_time": "4:58:24"}
|
||||
{"current_steps": 1500, "total_steps": 4417, "loss": 0.3705, "lr": 3.341781460328267e-05, "epoch": 2.3771790808240887, "percentage": 33.96, "elapsed_time": "2:33:11", "remaining_time": "4:57:55"}
|
||||
{"current_steps": 1505, "total_steps": 4417, "loss": 0.3741, "lr": 3.3359102001400555e-05, "epoch": 2.3851030110935025, "percentage": 34.07, "elapsed_time": "2:33:55", "remaining_time": "4:57:48"}
|
||||
{"current_steps": 1510, "total_steps": 4417, "loss": 0.3737, "lr": 3.330018078612446e-05, "epoch": 2.393026941362916, "percentage": 34.19, "elapsed_time": "2:34:33", "remaining_time": "4:57:32"}
|
||||
{"current_steps": 1515, "total_steps": 4417, "loss": 0.3676, "lr": 3.324105187755775e-05, "epoch": 2.40095087163233, "percentage": 34.3, "elapsed_time": "2:35:02", "remaining_time": "4:56:58"}
|
||||
{"current_steps": 1520, "total_steps": 4417, "loss": 0.3755, "lr": 3.318171619904709e-05, "epoch": 2.408874801901743, "percentage": 34.41, "elapsed_time": "2:35:38", "remaining_time": "4:56:37"}
|
||||
{"current_steps": 1525, "total_steps": 4417, "loss": 0.3627, "lr": 3.3122174677168027e-05, "epoch": 2.416798732171157, "percentage": 34.53, "elapsed_time": "2:36:07", "remaining_time": "4:56:04"}
|
||||
{"current_steps": 1530, "total_steps": 4417, "loss": 0.3824, "lr": 3.306242824171053e-05, "epoch": 2.4247226624405704, "percentage": 34.64, "elapsed_time": "2:36:28", "remaining_time": "4:55:14"}
|
||||
{"current_steps": 1535, "total_steps": 4417, "loss": 0.3678, "lr": 3.300247782566445e-05, "epoch": 2.4326465927099843, "percentage": 34.75, "elapsed_time": "2:36:57", "remaining_time": "4:54:42"}
|
||||
{"current_steps": 1540, "total_steps": 4417, "loss": 0.3463, "lr": 3.294232436520499e-05, "epoch": 2.4405705229793977, "percentage": 34.87, "elapsed_time": "2:37:28", "remaining_time": "4:54:10"}
|
||||
{"current_steps": 1545, "total_steps": 4417, "loss": 0.3684, "lr": 3.288196879967801e-05, "epoch": 2.4484944532488115, "percentage": 34.98, "elapsed_time": "2:38:02", "remaining_time": "4:53:46"}
|
||||
{"current_steps": 1550, "total_steps": 4417, "loss": 0.3683, "lr": 3.282141207158546e-05, "epoch": 2.456418383518225, "percentage": 35.09, "elapsed_time": "2:38:38", "remaining_time": "4:53:27"}
|
||||
{"current_steps": 1555, "total_steps": 4417, "loss": 0.3705, "lr": 3.276065512657058e-05, "epoch": 2.464342313787639, "percentage": 35.2, "elapsed_time": "2:39:12", "remaining_time": "4:53:02"}
|
||||
{"current_steps": 1560, "total_steps": 4417, "loss": 0.3577, "lr": 3.269969891340315e-05, "epoch": 2.472266244057052, "percentage": 35.32, "elapsed_time": "2:39:42", "remaining_time": "4:52:28"}
|
||||
{"current_steps": 1565, "total_steps": 4417, "loss": 0.3522, "lr": 3.263854438396473e-05, "epoch": 2.480190174326466, "percentage": 35.43, "elapsed_time": "2:40:08", "remaining_time": "4:51:49"}
|
||||
{"current_steps": 1570, "total_steps": 4417, "loss": 0.3602, "lr": 3.257719249323371e-05, "epoch": 2.4881141045958794, "percentage": 35.54, "elapsed_time": "2:40:37", "remaining_time": "4:51:16"}
|
||||
{"current_steps": 1575, "total_steps": 4417, "loss": 0.3602, "lr": 3.251564419927046e-05, "epoch": 2.4960380348652933, "percentage": 35.66, "elapsed_time": "2:41:08", "remaining_time": "4:50:46"}
|
||||
{"current_steps": 1580, "total_steps": 4417, "loss": 0.3558, "lr": 3.245390046320234e-05, "epoch": 2.5039619651347067, "percentage": 35.77, "elapsed_time": "2:41:41", "remaining_time": "4:50:20"}
|
||||
{"current_steps": 1585, "total_steps": 4417, "loss": 0.3481, "lr": 3.239196224920873e-05, "epoch": 2.5118858954041206, "percentage": 35.88, "elapsed_time": "2:42:13", "remaining_time": "4:49:52"}
|
||||
{"current_steps": 1590, "total_steps": 4417, "loss": 0.3649, "lr": 3.232983052450588e-05, "epoch": 2.519809825673534, "percentage": 36.0, "elapsed_time": "2:42:45", "remaining_time": "4:49:22"}
|
||||
{"current_steps": 1595, "total_steps": 4417, "loss": 0.398, "lr": 3.2267506259331936e-05, "epoch": 2.527733755942948, "percentage": 36.11, "elapsed_time": "2:43:13", "remaining_time": "4:48:47"}
|
||||
{"current_steps": 1600, "total_steps": 4417, "loss": 0.3724, "lr": 3.220499042693167e-05, "epoch": 2.535657686212361, "percentage": 36.22, "elapsed_time": "2:43:43", "remaining_time": "4:48:15"}
|
||||
{"current_steps": 1605, "total_steps": 4417, "loss": 0.3751, "lr": 3.214228400354137e-05, "epoch": 2.543581616481775, "percentage": 36.34, "elapsed_time": "2:44:10", "remaining_time": "4:47:38"}
|
||||
{"current_steps": 1610, "total_steps": 4417, "loss": 0.3904, "lr": 3.207938796837354e-05, "epoch": 2.5515055467511885, "percentage": 36.45, "elapsed_time": "2:44:36", "remaining_time": "4:46:59"}
|
||||
{"current_steps": 1615, "total_steps": 4417, "loss": 0.3588, "lr": 3.201630330360165e-05, "epoch": 2.5594294770206023, "percentage": 36.56, "elapsed_time": "2:45:04", "remaining_time": "4:46:24"}
|
||||
{"current_steps": 1620, "total_steps": 4417, "loss": 0.3699, "lr": 3.195303099434474e-05, "epoch": 2.5673534072900157, "percentage": 36.68, "elapsed_time": "2:45:32", "remaining_time": "4:45:49"}
|
||||
{"current_steps": 1625, "total_steps": 4417, "loss": 0.368, "lr": 3.1889572028652106e-05, "epoch": 2.5752773375594296, "percentage": 36.79, "elapsed_time": "2:46:02", "remaining_time": "4:45:16"}
|
||||
{"current_steps": 1630, "total_steps": 4417, "loss": 0.3534, "lr": 3.182592739748783e-05, "epoch": 2.583201267828843, "percentage": 36.9, "elapsed_time": "2:46:35", "remaining_time": "4:44:51"}
|
||||
{"current_steps": 1635, "total_steps": 4417, "loss": 0.3615, "lr": 3.17620980947153e-05, "epoch": 2.591125198098257, "percentage": 37.02, "elapsed_time": "2:47:01", "remaining_time": "4:44:12"}
|
||||
{"current_steps": 1640, "total_steps": 4417, "loss": 0.3741, "lr": 3.1698085117081725e-05, "epoch": 2.59904912836767, "percentage": 37.13, "elapsed_time": "2:47:31", "remaining_time": "4:43:40"}
|
||||
{"current_steps": 1645, "total_steps": 4417, "loss": 0.3697, "lr": 3.163388946420253e-05, "epoch": 2.606973058637084, "percentage": 37.24, "elapsed_time": "2:48:03", "remaining_time": "4:43:12"}
|
||||
{"current_steps": 1650, "total_steps": 4417, "loss": 0.3758, "lr": 3.156951213854578e-05, "epoch": 2.6148969889064975, "percentage": 37.36, "elapsed_time": "2:48:26", "remaining_time": "4:42:27"}
|
||||
{"current_steps": 1655, "total_steps": 4417, "loss": 0.3624, "lr": 3.1504954145416494e-05, "epoch": 2.6228209191759113, "percentage": 37.47, "elapsed_time": "2:48:56", "remaining_time": "4:41:57"}
|
||||
{"current_steps": 1660, "total_steps": 4417, "loss": 0.3741, "lr": 3.144021649294096e-05, "epoch": 2.6307448494453247, "percentage": 37.58, "elapsed_time": "2:49:26", "remaining_time": "4:41:24"}
|
||||
{"current_steps": 1665, "total_steps": 4417, "loss": 0.3402, "lr": 3.137530019205104e-05, "epoch": 2.6386687797147386, "percentage": 37.7, "elapsed_time": "2:49:52", "remaining_time": "4:40:46"}
|
||||
{"current_steps": 1670, "total_steps": 4417, "loss": 0.3533, "lr": 3.131020625646827e-05, "epoch": 2.6465927099841524, "percentage": 37.81, "elapsed_time": "2:50:26", "remaining_time": "4:40:20"}
|
||||
{"current_steps": 1675, "total_steps": 4417, "loss": 0.3697, "lr": 3.124493570268815e-05, "epoch": 2.654516640253566, "percentage": 37.92, "elapsed_time": "2:50:59", "remaining_time": "4:39:55"}
|
||||
{"current_steps": 1680, "total_steps": 4417, "loss": 0.3485, "lr": 3.117948954996419e-05, "epoch": 2.662440570522979, "percentage": 38.03, "elapsed_time": "2:51:27", "remaining_time": "4:39:19"}
|
||||
{"current_steps": 1685, "total_steps": 4417, "loss": 0.3557, "lr": 3.111386882029205e-05, "epoch": 2.670364500792393, "percentage": 38.15, "elapsed_time": "2:52:03", "remaining_time": "4:38:58"}
|
||||
{"current_steps": 1690, "total_steps": 4417, "loss": 0.3705, "lr": 3.1048074538393515e-05, "epoch": 2.678288431061807, "percentage": 38.26, "elapsed_time": "2:52:36", "remaining_time": "4:38:31"}
|
||||
{"current_steps": 1695, "total_steps": 4417, "loss": 0.3674, "lr": 3.098210773170057e-05, "epoch": 2.6862123613312203, "percentage": 38.37, "elapsed_time": "2:53:08", "remaining_time": "4:38:03"}
|
||||
{"current_steps": 1700, "total_steps": 4417, "loss": 0.3788, "lr": 3.0915969430339285e-05, "epoch": 2.6941362916006337, "percentage": 38.49, "elapsed_time": "2:53:42", "remaining_time": "4:37:37"}
|
||||
{"current_steps": 1705, "total_steps": 4417, "loss": 0.3806, "lr": 3.0849660667113785e-05, "epoch": 2.7020602218700476, "percentage": 38.6, "elapsed_time": "2:54:07", "remaining_time": "4:36:58"}
|
||||
{"current_steps": 1710, "total_steps": 4417, "loss": 0.3819, "lr": 3.078318247749009e-05, "epoch": 2.7099841521394614, "percentage": 38.71, "elapsed_time": "2:54:36", "remaining_time": "4:36:25"}
|
||||
{"current_steps": 1715, "total_steps": 4417, "loss": 0.3429, "lr": 3.0716535899579936e-05, "epoch": 2.717908082408875, "percentage": 38.83, "elapsed_time": "2:55:00", "remaining_time": "4:35:43"}
|
||||
{"current_steps": 1720, "total_steps": 4417, "loss": 0.3544, "lr": 3.0649721974124606e-05, "epoch": 2.7258320126782882, "percentage": 38.94, "elapsed_time": "2:55:34", "remaining_time": "4:35:18"}
|
||||
{"current_steps": 1725, "total_steps": 4417, "loss": 0.3744, "lr": 3.058274174447864e-05, "epoch": 2.733755942947702, "percentage": 39.05, "elapsed_time": "2:56:05", "remaining_time": "4:34:48"}
|
||||
{"current_steps": 1730, "total_steps": 4417, "loss": 0.335, "lr": 3.0515596256593566e-05, "epoch": 2.741679873217116, "percentage": 39.17, "elapsed_time": "2:56:40", "remaining_time": "4:34:24"}
|
||||
{"current_steps": 1735, "total_steps": 4417, "loss": 0.3834, "lr": 3.044828655900153e-05, "epoch": 2.7496038034865293, "percentage": 39.28, "elapsed_time": "2:57:08", "remaining_time": "4:33:49"}
|
||||
{"current_steps": 1740, "total_steps": 4417, "loss": 0.3765, "lr": 3.0380813702798975e-05, "epoch": 2.7575277337559427, "percentage": 39.39, "elapsed_time": "2:57:35", "remaining_time": "4:33:13"}
|
||||
{"current_steps": 1745, "total_steps": 4417, "loss": 0.3775, "lr": 3.0313178741630193e-05, "epoch": 2.7654516640253566, "percentage": 39.51, "elapsed_time": "2:57:59", "remaining_time": "4:32:32"}
|
||||
{"current_steps": 1750, "total_steps": 4417, "loss": 0.3603, "lr": 3.0245382731670872e-05, "epoch": 2.7733755942947704, "percentage": 39.62, "elapsed_time": "2:58:27", "remaining_time": "4:31:58"}
|
||||
{"current_steps": 1755, "total_steps": 4417, "loss": 0.3694, "lr": 3.017742673161161e-05, "epoch": 2.781299524564184, "percentage": 39.73, "elapsed_time": "2:58:56", "remaining_time": "4:31:25"}
|
||||
{"current_steps": 1760, "total_steps": 4417, "loss": 0.3745, "lr": 3.0109311802641394e-05, "epoch": 2.7892234548335972, "percentage": 39.85, "elapsed_time": "2:59:28", "remaining_time": "4:30:56"}
|
||||
{"current_steps": 1765, "total_steps": 4417, "loss": 0.3456, "lr": 3.0041039008431006e-05, "epoch": 2.797147385103011, "percentage": 39.96, "elapsed_time": "2:59:56", "remaining_time": "4:30:22"}
|
||||
{"current_steps": 1770, "total_steps": 4417, "loss": 0.3595, "lr": 2.9972609415116424e-05, "epoch": 2.805071315372425, "percentage": 40.07, "elapsed_time": "3:00:31", "remaining_time": "4:29:57"}
|
||||
{"current_steps": 1775, "total_steps": 4417, "loss": 0.3546, "lr": 2.990402409128218e-05, "epoch": 2.8129952456418383, "percentage": 40.19, "elapsed_time": "3:00:52", "remaining_time": "4:29:12"}
|
||||
{"current_steps": 1780, "total_steps": 4417, "loss": 0.3695, "lr": 2.983528410794466e-05, "epoch": 2.8209191759112517, "percentage": 40.3, "elapsed_time": "3:01:22", "remaining_time": "4:28:42"}
|
||||
{"current_steps": 1785, "total_steps": 4417, "loss": 0.3649, "lr": 2.9766390538535382e-05, "epoch": 2.8288431061806656, "percentage": 40.41, "elapsed_time": "3:01:42", "remaining_time": "4:27:55"}
|
||||
{"current_steps": 1790, "total_steps": 4417, "loss": 0.3545, "lr": 2.9697344458884265e-05, "epoch": 2.8367670364500794, "percentage": 40.53, "elapsed_time": "3:02:11", "remaining_time": "4:27:22"}
|
||||
{"current_steps": 1795, "total_steps": 4417, "loss": 0.365, "lr": 2.9628146947202756e-05, "epoch": 2.844690966719493, "percentage": 40.64, "elapsed_time": "3:02:46", "remaining_time": "4:26:58"}
|
||||
{"current_steps": 1800, "total_steps": 4417, "loss": 0.3718, "lr": 2.9558799084067074e-05, "epoch": 2.8526148969889062, "percentage": 40.75, "elapsed_time": "3:03:08", "remaining_time": "4:26:16"}
|
||||
{"current_steps": 1805, "total_steps": 4417, "loss": 0.3522, "lr": 2.9489301952401286e-05, "epoch": 2.86053882725832, "percentage": 40.86, "elapsed_time": "3:03:43", "remaining_time": "4:25:51"}
|
||||
{"current_steps": 1810, "total_steps": 4417, "loss": 0.3624, "lr": 2.9419656637460413e-05, "epoch": 2.868462757527734, "percentage": 40.98, "elapsed_time": "3:04:12", "remaining_time": "4:25:19"}
|
||||
{"current_steps": 1815, "total_steps": 4417, "loss": 0.3402, "lr": 2.9349864226813475e-05, "epoch": 2.8763866877971473, "percentage": 41.09, "elapsed_time": "3:04:44", "remaining_time": "4:24:50"}
|
||||
{"current_steps": 1820, "total_steps": 4417, "loss": 0.343, "lr": 2.9279925810326516e-05, "epoch": 2.8843106180665607, "percentage": 41.2, "elapsed_time": "3:05:14", "remaining_time": "4:24:19"}
|
||||
{"current_steps": 1825, "total_steps": 4417, "loss": 0.359, "lr": 2.9209842480145587e-05, "epoch": 2.8922345483359746, "percentage": 41.32, "elapsed_time": "3:05:49", "remaining_time": "4:23:55"}
|
||||
{"current_steps": 1830, "total_steps": 4417, "loss": 0.362, "lr": 2.913961533067968e-05, "epoch": 2.9001584786053884, "percentage": 41.43, "elapsed_time": "3:06:21", "remaining_time": "4:23:27"}
|
||||
{"current_steps": 1835, "total_steps": 4417, "loss": 0.371, "lr": 2.906924545858364e-05, "epoch": 2.908082408874802, "percentage": 41.54, "elapsed_time": "3:06:51", "remaining_time": "4:22:55"}
|
||||
{"current_steps": 1840, "total_steps": 4417, "loss": 0.3613, "lr": 2.8998733962741057e-05, "epoch": 2.9160063391442153, "percentage": 41.66, "elapsed_time": "3:07:26", "remaining_time": "4:22:31"}
|
||||
{"current_steps": 1845, "total_steps": 4417, "loss": 0.3627, "lr": 2.8928081944247088e-05, "epoch": 2.923930269413629, "percentage": 41.77, "elapsed_time": "3:08:03", "remaining_time": "4:22:08"}
|
||||
{"current_steps": 1850, "total_steps": 4417, "loss": 0.3724, "lr": 2.8857290506391243e-05, "epoch": 2.931854199683043, "percentage": 41.88, "elapsed_time": "3:08:33", "remaining_time": "4:21:37"}
|
||||
{"current_steps": 1855, "total_steps": 4417, "loss": 0.3374, "lr": 2.8786360754640213e-05, "epoch": 2.9397781299524564, "percentage": 42.0, "elapsed_time": "3:08:59", "remaining_time": "4:21:01"}
|
||||
{"current_steps": 1860, "total_steps": 4417, "loss": 0.3623, "lr": 2.8715293796620565e-05, "epoch": 2.94770206022187, "percentage": 42.11, "elapsed_time": "3:09:29", "remaining_time": "4:20:30"}
|
||||
{"current_steps": 1865, "total_steps": 4417, "loss": 0.3495, "lr": 2.8644090742101444e-05, "epoch": 2.9556259904912836, "percentage": 42.22, "elapsed_time": "3:10:02", "remaining_time": "4:20:02"}
|
||||
{"current_steps": 1870, "total_steps": 4417, "loss": 0.3692, "lr": 2.8572752702977265e-05, "epoch": 2.9635499207606975, "percentage": 42.34, "elapsed_time": "3:10:39", "remaining_time": "4:19:41"}
|
||||
{"current_steps": 1875, "total_steps": 4417, "loss": 0.3851, "lr": 2.8501280793250343e-05, "epoch": 2.971473851030111, "percentage": 42.45, "elapsed_time": "3:11:05", "remaining_time": "4:19:04"}
|
||||
{"current_steps": 1880, "total_steps": 4417, "loss": 0.3629, "lr": 2.8429676129013476e-05, "epoch": 2.9793977812995247, "percentage": 42.56, "elapsed_time": "3:11:39", "remaining_time": "4:18:37"}
|
||||
{"current_steps": 1885, "total_steps": 4417, "loss": 0.3818, "lr": 2.835793982843255e-05, "epoch": 2.987321711568938, "percentage": 42.68, "elapsed_time": "3:12:10", "remaining_time": "4:18:07"}
|
||||
{"current_steps": 1890, "total_steps": 4417, "loss": 0.3654, "lr": 2.8286073011729044e-05, "epoch": 2.995245641838352, "percentage": 42.79, "elapsed_time": "3:12:44", "remaining_time": "4:17:42"}
|
||||
{"current_steps": 1895, "total_steps": 4417, "loss": 0.3712, "lr": 2.8214076801162575e-05, "epoch": 3.0031695721077654, "percentage": 42.9, "elapsed_time": "3:13:12", "remaining_time": "4:17:07"}
|
||||
{"current_steps": 1900, "total_steps": 4417, "loss": 0.3455, "lr": 2.814195232101334e-05, "epoch": 3.011093502377179, "percentage": 43.02, "elapsed_time": "3:13:47", "remaining_time": "4:16:43"}
|
||||
{"current_steps": 1905, "total_steps": 4417, "loss": 0.3123, "lr": 2.806970069756456e-05, "epoch": 3.0190174326465926, "percentage": 43.13, "elapsed_time": "3:14:25", "remaining_time": "4:16:22"}
|
||||
{"current_steps": 1910, "total_steps": 4417, "loss": 0.331, "lr": 2.7997323059084926e-05, "epoch": 3.0269413629160065, "percentage": 43.24, "elapsed_time": "3:14:53", "remaining_time": "4:15:48"}
|
||||
{"current_steps": 1915, "total_steps": 4417, "loss": 0.3444, "lr": 2.7924820535810947e-05, "epoch": 3.03486529318542, "percentage": 43.36, "elapsed_time": "3:15:23", "remaining_time": "4:15:17"}
|
||||
{"current_steps": 1920, "total_steps": 4417, "loss": 0.3104, "lr": 2.7852194259929306e-05, "epoch": 3.0427892234548337, "percentage": 43.47, "elapsed_time": "3:15:53", "remaining_time": "4:14:45"}
|
||||
{"current_steps": 1925, "total_steps": 4417, "loss": 0.3243, "lr": 2.7779445365559196e-05, "epoch": 3.050713153724247, "percentage": 43.58, "elapsed_time": "3:16:24", "remaining_time": "4:14:15"}
|
||||
{"current_steps": 1930, "total_steps": 4417, "loss": 0.3458, "lr": 2.7706574988734596e-05, "epoch": 3.058637083993661, "percentage": 43.69, "elapsed_time": "3:16:58", "remaining_time": "4:13:49"}
|
||||
{"current_steps": 1935, "total_steps": 4417, "loss": 0.3272, "lr": 2.7633584267386518e-05, "epoch": 3.0665610142630744, "percentage": 43.81, "elapsed_time": "3:17:29", "remaining_time": "4:13:18"}
|
||||
{"current_steps": 1940, "total_steps": 4417, "loss": 0.3374, "lr": 2.7560474341325276e-05, "epoch": 3.074484944532488, "percentage": 43.92, "elapsed_time": "3:18:03", "remaining_time": "4:12:52"}
|
||||
{"current_steps": 1945, "total_steps": 4417, "loss": 0.3264, "lr": 2.7487246352222648e-05, "epoch": 3.0824088748019016, "percentage": 44.03, "elapsed_time": "3:18:28", "remaining_time": "4:12:14"}
|
||||
{"current_steps": 1950, "total_steps": 4417, "loss": 0.3155, "lr": 2.7413901443594073e-05, "epoch": 3.0903328050713155, "percentage": 44.15, "elapsed_time": "3:18:51", "remaining_time": "4:11:34"}
|
||||
{"current_steps": 1955, "total_steps": 4417, "loss": 0.3474, "lr": 2.7340440760780778e-05, "epoch": 3.098256735340729, "percentage": 44.26, "elapsed_time": "3:19:23", "remaining_time": "4:11:06"}
|
||||
{"current_steps": 1960, "total_steps": 4417, "loss": 0.3263, "lr": 2.7266865450931907e-05, "epoch": 3.1061806656101427, "percentage": 44.37, "elapsed_time": "3:19:49", "remaining_time": "4:10:30"}
|
||||
{"current_steps": 1965, "total_steps": 4417, "loss": 0.333, "lr": 2.7193176662986593e-05, "epoch": 3.114104595879556, "percentage": 44.49, "elapsed_time": "3:20:13", "remaining_time": "4:09:50"}
|
||||
{"current_steps": 1970, "total_steps": 4417, "loss": 0.3367, "lr": 2.7119375547656025e-05, "epoch": 3.12202852614897, "percentage": 44.6, "elapsed_time": "3:20:43", "remaining_time": "4:09:20"}
|
||||
{"current_steps": 1975, "total_steps": 4417, "loss": 0.3357, "lr": 2.704546325740548e-05, "epoch": 3.1299524564183834, "percentage": 44.71, "elapsed_time": "3:21:10", "remaining_time": "4:08:44"}
|
||||
{"current_steps": 1980, "total_steps": 4417, "loss": 0.333, "lr": 2.6971440946436306e-05, "epoch": 3.1378763866877972, "percentage": 44.83, "elapsed_time": "3:21:45", "remaining_time": "4:08:19"}
|
||||
{"current_steps": 1985, "total_steps": 4417, "loss": 0.3226, "lr": 2.689730977066795e-05, "epoch": 3.1458003169572106, "percentage": 44.94, "elapsed_time": "3:22:16", "remaining_time": "4:07:49"}
|
||||
{"current_steps": 1990, "total_steps": 4417, "loss": 0.3259, "lr": 2.682307088771984e-05, "epoch": 3.1537242472266245, "percentage": 45.05, "elapsed_time": "3:22:50", "remaining_time": "4:07:23"}
|
||||
{"current_steps": 1995, "total_steps": 4417, "loss": 0.3307, "lr": 2.6748725456893355e-05, "epoch": 3.161648177496038, "percentage": 45.17, "elapsed_time": "3:23:16", "remaining_time": "4:06:46"}
|
||||
{"current_steps": 2000, "total_steps": 4417, "loss": 0.3513, "lr": 2.66742746391537e-05, "epoch": 3.1695721077654517, "percentage": 45.28, "elapsed_time": "3:23:42", "remaining_time": "4:06:10"}
|
||||
{"current_steps": 2005, "total_steps": 4417, "loss": 0.3545, "lr": 2.6599719597111794e-05, "epoch": 3.177496038034865, "percentage": 45.39, "elapsed_time": "3:24:07", "remaining_time": "4:05:34"}
|
||||
{"current_steps": 2010, "total_steps": 4417, "loss": 0.3181, "lr": 2.6525061495006103e-05, "epoch": 3.185419968304279, "percentage": 45.51, "elapsed_time": "3:24:43", "remaining_time": "4:05:09"}
|
||||
{"current_steps": 2015, "total_steps": 4417, "loss": 0.3291, "lr": 2.6450301498684443e-05, "epoch": 3.1933438985736924, "percentage": 45.62, "elapsed_time": "3:25:15", "remaining_time": "4:04:41"}
|
||||
{"current_steps": 2020, "total_steps": 4417, "loss": 0.332, "lr": 2.637544077558581e-05, "epoch": 3.2012678288431062, "percentage": 45.73, "elapsed_time": "3:25:40", "remaining_time": "4:04:03"}
|
||||
{"current_steps": 2025, "total_steps": 4417, "loss": 0.3377, "lr": 2.630048049472213e-05, "epoch": 3.2091917591125196, "percentage": 45.85, "elapsed_time": "3:26:05", "remaining_time": "4:03:26"}
|
||||
{"current_steps": 2030, "total_steps": 4417, "loss": 0.3213, "lr": 2.622542182666e-05, "epoch": 3.2171156893819335, "percentage": 45.96, "elapsed_time": "3:26:36", "remaining_time": "4:02:56"}
|
||||
{"current_steps": 2035, "total_steps": 4417, "loss": 0.3364, "lr": 2.6150265943502424e-05, "epoch": 3.225039619651347, "percentage": 46.07, "elapsed_time": "3:27:07", "remaining_time": "4:02:26"}
|
||||
{"current_steps": 2040, "total_steps": 4417, "loss": 0.3167, "lr": 2.6075014018870483e-05, "epoch": 3.2329635499207607, "percentage": 46.19, "elapsed_time": "3:27:35", "remaining_time": "4:01:52"}
|
||||
{"current_steps": 2045, "total_steps": 4417, "loss": 0.3069, "lr": 2.599966722788504e-05, "epoch": 3.240887480190174, "percentage": 46.3, "elapsed_time": "3:28:05", "remaining_time": "4:01:22"}
|
||||
{"current_steps": 2050, "total_steps": 4417, "loss": 0.3402, "lr": 2.5924226747148372e-05, "epoch": 3.248811410459588, "percentage": 46.41, "elapsed_time": "3:28:40", "remaining_time": "4:00:56"}
|
||||
{"current_steps": 2055, "total_steps": 4417, "loss": 0.3183, "lr": 2.584869375472579e-05, "epoch": 3.2567353407290014, "percentage": 46.52, "elapsed_time": "3:29:16", "remaining_time": "4:00:32"}
|
||||
{"current_steps": 2060, "total_steps": 4417, "loss": 0.3181, "lr": 2.577306943012725e-05, "epoch": 3.2646592709984152, "percentage": 46.64, "elapsed_time": "3:29:43", "remaining_time": "3:59:57"}
|
||||
{"current_steps": 2065, "total_steps": 4417, "loss": 0.3259, "lr": 2.569735495428896e-05, "epoch": 3.272583201267829, "percentage": 46.75, "elapsed_time": "3:30:12", "remaining_time": "3:59:25"}
|
||||
{"current_steps": 2070, "total_steps": 4417, "loss": 0.3161, "lr": 2.562155150955488e-05, "epoch": 3.2805071315372425, "percentage": 46.86, "elapsed_time": "3:30:45", "remaining_time": "3:58:57"}
|
||||
{"current_steps": 2075, "total_steps": 4417, "loss": 0.3336, "lr": 2.554566027965832e-05, "epoch": 3.288431061806656, "percentage": 46.98, "elapsed_time": "3:31:21", "remaining_time": "3:58:32"}
|
||||
{"current_steps": 2080, "total_steps": 4417, "loss": 0.3378, "lr": 2.5469682449703414e-05, "epoch": 3.2963549920760697, "percentage": 47.09, "elapsed_time": "3:31:52", "remaining_time": "3:58:03"}
|
||||
{"current_steps": 2085, "total_steps": 4417, "loss": 0.3471, "lr": 2.539361920614662e-05, "epoch": 3.3042789223454836, "percentage": 47.2, "elapsed_time": "3:32:24", "remaining_time": "3:57:33"}
|
||||
{"current_steps": 2090, "total_steps": 4417, "loss": 0.3348, "lr": 2.5317471736778226e-05, "epoch": 3.312202852614897, "percentage": 47.32, "elapsed_time": "3:32:49", "remaining_time": "3:56:57"}
|
||||
{"current_steps": 2095, "total_steps": 4417, "loss": 0.318, "lr": 2.524124123070375e-05, "epoch": 3.3201267828843104, "percentage": 47.43, "elapsed_time": "3:33:27", "remaining_time": "3:56:35"}
|
||||
{"current_steps": 2100, "total_steps": 4417, "loss": 0.3251, "lr": 2.5164928878325404e-05, "epoch": 3.3280507131537242, "percentage": 47.54, "elapsed_time": "3:33:52", "remaining_time": "3:55:58"}
|
||||
{"current_steps": 2105, "total_steps": 4417, "loss": 0.3245, "lr": 2.5088535871323502e-05, "epoch": 3.335974643423138, "percentage": 47.66, "elapsed_time": "3:34:21", "remaining_time": "3:55:26"}
|
||||
{"current_steps": 2110, "total_steps": 4417, "loss": 0.3421, "lr": 2.5012063402637843e-05, "epoch": 3.3438985736925515, "percentage": 47.77, "elapsed_time": "3:34:53", "remaining_time": "3:54:57"}
|
||||
{"current_steps": 2115, "total_steps": 4417, "loss": 0.3188, "lr": 2.4935512666449085e-05, "epoch": 3.351822503961965, "percentage": 47.88, "elapsed_time": "3:35:25", "remaining_time": "3:54:28"}
|
||||
{"current_steps": 2120, "total_steps": 4417, "loss": 0.3242, "lr": 2.4858884858160107e-05, "epoch": 3.3597464342313788, "percentage": 48.0, "elapsed_time": "3:35:55", "remaining_time": "3:53:57"}
|
||||
{"current_steps": 2125, "total_steps": 4417, "loss": 0.3354, "lr": 2.4782181174377314e-05, "epoch": 3.3676703645007926, "percentage": 48.11, "elapsed_time": "3:36:24", "remaining_time": "3:53:24"}
|
||||
{"current_steps": 2130, "total_steps": 4417, "loss": 0.3422, "lr": 2.470540281289199e-05, "epoch": 3.375594294770206, "percentage": 48.22, "elapsed_time": "3:36:54", "remaining_time": "3:52:53"}
|
||||
{"current_steps": 2135, "total_steps": 4417, "loss": 0.324, "lr": 2.4628550972661557e-05, "epoch": 3.3835182250396194, "percentage": 48.34, "elapsed_time": "3:37:27", "remaining_time": "3:52:26"}
|
||||
{"current_steps": 2140, "total_steps": 4417, "loss": 0.3212, "lr": 2.455162685379087e-05, "epoch": 3.3914421553090333, "percentage": 48.45, "elapsed_time": "3:38:02", "remaining_time": "3:51:59"}
|
||||
{"current_steps": 2145, "total_steps": 4417, "loss": 0.3236, "lr": 2.447463165751349e-05, "epoch": 3.399366085578447, "percentage": 48.56, "elapsed_time": "3:38:32", "remaining_time": "3:51:28"}
|
||||
{"current_steps": 2150, "total_steps": 4417, "loss": 0.3479, "lr": 2.4397566586172883e-05, "epoch": 3.4072900158478605, "percentage": 48.68, "elapsed_time": "3:38:57", "remaining_time": "3:50:52"}
|
||||
{"current_steps": 2155, "total_steps": 4417, "loss": 0.3337, "lr": 2.4320432843203704e-05, "epoch": 3.4152139461172744, "percentage": 48.79, "elapsed_time": "3:39:33", "remaining_time": "3:50:27"}
|
||||
{"current_steps": 2160, "total_steps": 4417, "loss": 0.3461, "lr": 2.4243231633112943e-05, "epoch": 3.4231378763866878, "percentage": 48.9, "elapsed_time": "3:40:04", "remaining_time": "3:49:57"}
|
||||
{"current_steps": 2165, "total_steps": 4417, "loss": 0.3177, "lr": 2.4165964161461167e-05, "epoch": 3.4310618066561016, "percentage": 49.02, "elapsed_time": "3:40:38", "remaining_time": "3:49:30"}
|
||||
{"current_steps": 2170, "total_steps": 4417, "loss": 0.332, "lr": 2.408863163484366e-05, "epoch": 3.438985736925515, "percentage": 49.13, "elapsed_time": "3:41:08", "remaining_time": "3:48:59"}
|
||||
{"current_steps": 2175, "total_steps": 4417, "loss": 0.319, "lr": 2.401123526087159e-05, "epoch": 3.446909667194929, "percentage": 49.24, "elapsed_time": "3:41:37", "remaining_time": "3:48:27"}
|
||||
{"current_steps": 2180, "total_steps": 4417, "loss": 0.3332, "lr": 2.3933776248153168e-05, "epoch": 3.4548335974643423, "percentage": 49.35, "elapsed_time": "3:42:11", "remaining_time": "3:47:59"}
|
||||
{"current_steps": 2185, "total_steps": 4417, "loss": 0.3282, "lr": 2.385625580627474e-05, "epoch": 3.462757527733756, "percentage": 49.47, "elapsed_time": "3:42:44", "remaining_time": "3:47:32"}
|
||||
{"current_steps": 2190, "total_steps": 4417, "loss": 0.3361, "lr": 2.377867514578194e-05, "epoch": 3.4706814580031695, "percentage": 49.58, "elapsed_time": "3:43:10", "remaining_time": "3:46:56"}
|
||||
{"current_steps": 2195, "total_steps": 4417, "loss": 0.349, "lr": 2.3701035478160763e-05, "epoch": 3.4786053882725834, "percentage": 49.69, "elapsed_time": "3:43:42", "remaining_time": "3:46:27"}
|
||||
{"current_steps": 2200, "total_steps": 4417, "loss": 0.3357, "lr": 2.3623338015818623e-05, "epoch": 3.4865293185419968, "percentage": 49.81, "elapsed_time": "3:44:15", "remaining_time": "3:45:58"}
|
||||
{"current_steps": 2205, "total_steps": 4417, "loss": 0.3342, "lr": 2.3545583972065484e-05, "epoch": 3.4944532488114106, "percentage": 49.92, "elapsed_time": "3:44:49", "remaining_time": "3:45:32"}
|
||||
{"current_steps": 2210, "total_steps": 4417, "loss": 0.3277, "lr": 2.346777456109485e-05, "epoch": 3.502377179080824, "percentage": 50.03, "elapsed_time": "3:45:19", "remaining_time": "3:45:00"}
|
||||
{"current_steps": 2215, "total_steps": 4417, "loss": 0.3335, "lr": 2.3389910997964832e-05, "epoch": 3.510301109350238, "percentage": 50.15, "elapsed_time": "3:45:48", "remaining_time": "3:44:29"}
|
||||
{"current_steps": 2220, "total_steps": 4417, "loss": 0.331, "lr": 2.3311994498579185e-05, "epoch": 3.5182250396196513, "percentage": 50.26, "elapsed_time": "3:46:16", "remaining_time": "3:43:55"}
|
||||
{"current_steps": 2225, "total_steps": 4417, "loss": 0.3326, "lr": 2.3234026279668296e-05, "epoch": 3.526148969889065, "percentage": 50.37, "elapsed_time": "3:46:43", "remaining_time": "3:43:22"}
|
||||
{"current_steps": 2230, "total_steps": 4417, "loss": 0.3254, "lr": 2.3156007558770192e-05, "epoch": 3.5340729001584785, "percentage": 50.49, "elapsed_time": "3:47:14", "remaining_time": "3:42:51"}
|
||||
{"current_steps": 2235, "total_steps": 4417, "loss": 0.3331, "lr": 2.307793955421154e-05, "epoch": 3.5419968304278924, "percentage": 50.6, "elapsed_time": "3:47:53", "remaining_time": "3:42:28"}
|
||||
{"current_steps": 2240, "total_steps": 4417, "loss": 0.3536, "lr": 2.299982348508861e-05, "epoch": 3.5499207606973058, "percentage": 50.71, "elapsed_time": "3:48:25", "remaining_time": "3:42:00"}
|
||||
{"current_steps": 2245, "total_steps": 4417, "loss": 0.3353, "lr": 2.2921660571248237e-05, "epoch": 3.5578446909667196, "percentage": 50.83, "elapsed_time": "3:48:50", "remaining_time": "3:41:24"}
|
||||
{"current_steps": 2250, "total_steps": 4417, "loss": 0.3364, "lr": 2.2843452033268775e-05, "epoch": 3.565768621236133, "percentage": 50.94, "elapsed_time": "3:49:23", "remaining_time": "3:40:56"}
|
||||
{"current_steps": 2255, "total_steps": 4417, "loss": 0.3335, "lr": 2.2765199092441033e-05, "epoch": 3.573692551505547, "percentage": 51.05, "elapsed_time": "3:49:56", "remaining_time": "3:40:27"}
|
||||
{"current_steps": 2260, "total_steps": 4417, "loss": 0.3326, "lr": 2.2686902970749218e-05, "epoch": 3.5816164817749603, "percentage": 51.17, "elapsed_time": "3:50:30", "remaining_time": "3:40:00"}
|
||||
{"current_steps": 2265, "total_steps": 4417, "loss": 0.3363, "lr": 2.260856489085183e-05, "epoch": 3.589540412044374, "percentage": 51.28, "elapsed_time": "3:51:05", "remaining_time": "3:39:33"}
|
||||
{"current_steps": 2270, "total_steps": 4417, "loss": 0.3408, "lr": 2.2530186076062588e-05, "epoch": 3.5974643423137875, "percentage": 51.39, "elapsed_time": "3:51:32", "remaining_time": "3:38:59"}
|
||||
{"current_steps": 2275, "total_steps": 4417, "loss": 0.3105, "lr": 2.2451767750331316e-05, "epoch": 3.6053882725832014, "percentage": 51.51, "elapsed_time": "3:51:58", "remaining_time": "3:38:25"}
|
||||
{"current_steps": 2280, "total_steps": 4417, "loss": 0.3089, "lr": 2.2373311138224838e-05, "epoch": 3.613312202852615, "percentage": 51.62, "elapsed_time": "3:52:27", "remaining_time": "3:37:52"}
|
||||
{"current_steps": 2285, "total_steps": 4417, "loss": 0.3274, "lr": 2.2294817464907852e-05, "epoch": 3.6212361331220286, "percentage": 51.73, "elapsed_time": "3:52:53", "remaining_time": "3:37:17"}
|
||||
{"current_steps": 2290, "total_steps": 4417, "loss": 0.3313, "lr": 2.221628795612379e-05, "epoch": 3.629160063391442, "percentage": 51.85, "elapsed_time": "3:53:17", "remaining_time": "3:36:41"}
|
||||
{"current_steps": 2295, "total_steps": 4417, "loss": 0.319, "lr": 2.2137723838175693e-05, "epoch": 3.637083993660856, "percentage": 51.96, "elapsed_time": "3:53:44", "remaining_time": "3:36:07"}
|
||||
{"current_steps": 2300, "total_steps": 4417, "loss": 0.3461, "lr": 2.205912633790704e-05, "epoch": 3.6450079239302693, "percentage": 52.07, "elapsed_time": "3:54:22", "remaining_time": "3:35:43"}
|
||||
{"current_steps": 2305, "total_steps": 4417, "loss": 0.3284, "lr": 2.198049668268262e-05, "epoch": 3.652931854199683, "percentage": 52.18, "elapsed_time": "3:54:51", "remaining_time": "3:35:12"}
|
||||
{"current_steps": 2310, "total_steps": 4417, "loss": 0.3317, "lr": 2.1901836100369335e-05, "epoch": 3.6608557844690965, "percentage": 52.3, "elapsed_time": "3:55:18", "remaining_time": "3:34:37"}
|
||||
{"current_steps": 2315, "total_steps": 4417, "loss": 0.3365, "lr": 2.1823145819317048e-05, "epoch": 3.6687797147385104, "percentage": 52.41, "elapsed_time": "3:55:48", "remaining_time": "3:34:06"}
|
||||
{"current_steps": 2320, "total_steps": 4417, "loss": 0.3361, "lr": 2.174442706833938e-05, "epoch": 3.676703645007924, "percentage": 52.52, "elapsed_time": "3:56:15", "remaining_time": "3:33:32"}
|
||||
{"current_steps": 2325, "total_steps": 4417, "loss": 0.3247, "lr": 2.1665681076694544e-05, "epoch": 3.6846275752773376, "percentage": 52.64, "elapsed_time": "3:56:45", "remaining_time": "3:33:02"}
|
||||
{"current_steps": 2330, "total_steps": 4417, "loss": 0.3462, "lr": 2.1586909074066136e-05, "epoch": 3.692551505546751, "percentage": 52.75, "elapsed_time": "3:57:13", "remaining_time": "3:32:28"}
|
||||
{"current_steps": 2335, "total_steps": 4417, "loss": 0.3447, "lr": 2.1508112290543933e-05, "epoch": 3.700475435816165, "percentage": 52.86, "elapsed_time": "3:57:41", "remaining_time": "3:31:56"}
|
||||
{"current_steps": 2340, "total_steps": 4417, "loss": 0.3344, "lr": 2.142929195660469e-05, "epoch": 3.7083993660855783, "percentage": 52.98, "elapsed_time": "3:58:09", "remaining_time": "3:31:23"}
|
||||
{"current_steps": 2345, "total_steps": 4417, "loss": 0.3187, "lr": 2.1350449303092917e-05, "epoch": 3.716323296354992, "percentage": 53.09, "elapsed_time": "3:58:41", "remaining_time": "3:30:54"}
|
||||
{"current_steps": 2350, "total_steps": 4417, "loss": 0.3281, "lr": 2.1271585561201666e-05, "epoch": 3.7242472266244055, "percentage": 53.2, "elapsed_time": "3:59:17", "remaining_time": "3:30:28"}
|
||||
{"current_steps": 2355, "total_steps": 4417, "loss": 0.3324, "lr": 2.1192701962453296e-05, "epoch": 3.7321711568938194, "percentage": 53.32, "elapsed_time": "3:59:49", "remaining_time": "3:29:59"}
|
||||
{"current_steps": 2360, "total_steps": 4417, "loss": 0.3162, "lr": 2.1113799738680254e-05, "epoch": 3.740095087163233, "percentage": 53.43, "elapsed_time": "4:00:23", "remaining_time": "3:29:32"}
|
||||
{"current_steps": 2365, "total_steps": 4417, "loss": 0.3333, "lr": 2.103488012200583e-05, "epoch": 3.7480190174326466, "percentage": 53.54, "elapsed_time": "4:00:47", "remaining_time": "3:28:55"}
|
||||
{"current_steps": 2370, "total_steps": 4417, "loss": 0.3194, "lr": 2.095594434482491e-05, "epoch": 3.75594294770206, "percentage": 53.66, "elapsed_time": "4:01:19", "remaining_time": "3:28:26"}
|
||||
{"current_steps": 2375, "total_steps": 4417, "loss": 0.3578, "lr": 2.0876993639784752e-05, "epoch": 3.763866877971474, "percentage": 53.77, "elapsed_time": "4:01:55", "remaining_time": "3:28:00"}
|
||||
{"current_steps": 2380, "total_steps": 4417, "loss": 0.3409, "lr": 2.0798029239765722e-05, "epoch": 3.7717908082408877, "percentage": 53.88, "elapsed_time": "4:02:22", "remaining_time": "3:27:26"}
|
||||
{"current_steps": 2385, "total_steps": 4417, "loss": 0.3278, "lr": 2.0719052377862033e-05, "epoch": 3.779714738510301, "percentage": 54.0, "elapsed_time": "4:02:54", "remaining_time": "3:26:57"}
|
||||
{"current_steps": 2390, "total_steps": 4417, "loss": 0.326, "lr": 2.0640064287362525e-05, "epoch": 3.7876386687797146, "percentage": 54.11, "elapsed_time": "4:03:26", "remaining_time": "3:26:28"}
|
||||
{"current_steps": 2395, "total_steps": 4417, "loss": 0.3336, "lr": 2.056106620173135e-05, "epoch": 3.7955625990491284, "percentage": 54.22, "elapsed_time": "4:03:55", "remaining_time": "3:25:56"}
|
||||
{"current_steps": 2400, "total_steps": 4417, "loss": 0.326, "lr": 2.048205935458877e-05, "epoch": 3.8034865293185423, "percentage": 54.34, "elapsed_time": "4:04:35", "remaining_time": "3:25:33"}
|
||||
{"current_steps": 2405, "total_steps": 4417, "loss": 0.3082, "lr": 2.0403044979691854e-05, "epoch": 3.8114104595879557, "percentage": 54.45, "elapsed_time": "4:05:07", "remaining_time": "3:25:03"}
|
||||
{"current_steps": 2410, "total_steps": 4417, "loss": 0.3451, "lr": 2.032402431091522e-05, "epoch": 3.819334389857369, "percentage": 54.56, "elapsed_time": "4:05:34", "remaining_time": "3:24:30"}
|
||||
{"current_steps": 2415, "total_steps": 4417, "loss": 0.3408, "lr": 2.0244998582231767e-05, "epoch": 3.827258320126783, "percentage": 54.68, "elapsed_time": "4:06:01", "remaining_time": "3:23:56"}
|
||||
{"current_steps": 2420, "total_steps": 4417, "loss": 0.3353, "lr": 2.0165969027693425e-05, "epoch": 3.8351822503961968, "percentage": 54.79, "elapsed_time": "4:06:35", "remaining_time": "3:23:29"}
|
||||
{"current_steps": 2425, "total_steps": 4417, "loss": 0.3313, "lr": 2.008693688141185e-05, "epoch": 3.84310618066561, "percentage": 54.9, "elapsed_time": "4:07:05", "remaining_time": "3:22:58"}
|
||||
{"current_steps": 2430, "total_steps": 4417, "loss": 0.3171, "lr": 2.0007903377539187e-05, "epoch": 3.8510301109350236, "percentage": 55.01, "elapsed_time": "4:07:37", "remaining_time": "3:22:28"}
|
||||
{"current_steps": 2435, "total_steps": 4417, "loss": 0.337, "lr": 1.992886975024876e-05, "epoch": 3.8589540412044374, "percentage": 55.13, "elapsed_time": "4:08:05", "remaining_time": "3:21:56"}
|
||||
{"current_steps": 2440, "total_steps": 4417, "loss": 0.3328, "lr": 1.984983723371584e-05, "epoch": 3.8668779714738513, "percentage": 55.24, "elapsed_time": "4:08:41", "remaining_time": "3:21:29"}
|
||||
{"current_steps": 2445, "total_steps": 4417, "loss": 0.3311, "lr": 1.977080706209835e-05, "epoch": 3.8748019017432647, "percentage": 55.35, "elapsed_time": "4:09:08", "remaining_time": "3:20:56"}
|
||||
{"current_steps": 2450, "total_steps": 4417, "loss": 0.3263, "lr": 1.9691780469517578e-05, "epoch": 3.882725832012678, "percentage": 55.47, "elapsed_time": "4:09:40", "remaining_time": "3:20:27"}
|
||||
{"current_steps": 2455, "total_steps": 4417, "loss": 0.3189, "lr": 1.961275869003894e-05, "epoch": 3.890649762282092, "percentage": 55.58, "elapsed_time": "4:10:07", "remaining_time": "3:19:53"}
|
||||
{"current_steps": 2460, "total_steps": 4417, "loss": 0.3253, "lr": 1.9533742957652683e-05, "epoch": 3.8985736925515058, "percentage": 55.69, "elapsed_time": "4:10:36", "remaining_time": "3:19:22"}
|
||||
{"current_steps": 2465, "total_steps": 4417, "loss": 0.3296, "lr": 1.945473450625463e-05, "epoch": 3.906497622820919, "percentage": 55.81, "elapsed_time": "4:11:09", "remaining_time": "3:18:53"}
|
||||
{"current_steps": 2470, "total_steps": 4417, "loss": 0.3243, "lr": 1.9375734569626904e-05, "epoch": 3.9144215530903326, "percentage": 55.92, "elapsed_time": "4:11:42", "remaining_time": "3:18:24"}
|
||||
{"current_steps": 2475, "total_steps": 4417, "loss": 0.3309, "lr": 1.9296744381418653e-05, "epoch": 3.9223454833597464, "percentage": 56.03, "elapsed_time": "4:12:09", "remaining_time": "3:17:51"}
|
||||
{"current_steps": 2480, "total_steps": 4417, "loss": 0.3336, "lr": 1.9217765175126808e-05, "epoch": 3.9302694136291603, "percentage": 56.15, "elapsed_time": "4:12:35", "remaining_time": "3:17:17"}
|
||||
{"current_steps": 2485, "total_steps": 4417, "loss": 0.3389, "lr": 1.9138798184076805e-05, "epoch": 3.9381933438985737, "percentage": 56.26, "elapsed_time": "4:13:05", "remaining_time": "3:16:46"}
|
||||
{"current_steps": 2490, "total_steps": 4417, "loss": 0.3386, "lr": 1.9059844641403323e-05, "epoch": 3.946117274167987, "percentage": 56.37, "elapsed_time": "4:13:38", "remaining_time": "3:16:17"}
|
||||
{"current_steps": 2495, "total_steps": 4417, "loss": 0.3316, "lr": 1.898090578003104e-05, "epoch": 3.954041204437401, "percentage": 56.49, "elapsed_time": "4:14:10", "remaining_time": "3:15:48"}
|
||||
{"current_steps": 2500, "total_steps": 4417, "loss": 0.3479, "lr": 1.890198283265537e-05, "epoch": 3.9619651347068148, "percentage": 56.6, "elapsed_time": "4:14:33", "remaining_time": "3:15:11"}
|
||||
{"current_steps": 2505, "total_steps": 4417, "loss": 0.328, "lr": 1.8823077031723217e-05, "epoch": 3.969889064976228, "percentage": 56.71, "elapsed_time": "4:15:03", "remaining_time": "3:14:40"}
|
||||
{"current_steps": 2510, "total_steps": 4417, "loss": 0.3363, "lr": 1.8744189609413733e-05, "epoch": 3.9778129952456416, "percentage": 56.83, "elapsed_time": "4:15:34", "remaining_time": "3:14:10"}
|
||||
{"current_steps": 2515, "total_steps": 4417, "loss": 0.3601, "lr": 1.8665321797619068e-05, "epoch": 3.9857369255150554, "percentage": 56.94, "elapsed_time": "4:16:12", "remaining_time": "3:13:45"}
|
||||
{"current_steps": 2520, "total_steps": 4417, "loss": 0.3399, "lr": 1.8586474827925142e-05, "epoch": 3.9936608557844693, "percentage": 57.05, "elapsed_time": "4:16:39", "remaining_time": "3:13:12"}
|
||||
{"current_steps": 2525, "total_steps": 4417, "loss": 0.3189, "lr": 1.8507649931592405e-05, "epoch": 4.001584786053883, "percentage": 57.17, "elapsed_time": "4:17:13", "remaining_time": "3:12:44"}
|
||||
{"current_steps": 2530, "total_steps": 4417, "loss": 0.301, "lr": 1.8428848339536613e-05, "epoch": 4.009508716323296, "percentage": 57.28, "elapsed_time": "4:17:42", "remaining_time": "3:12:12"}
|
||||
{"current_steps": 2535, "total_steps": 4417, "loss": 0.306, "lr": 1.8350071282309606e-05, "epoch": 4.01743264659271, "percentage": 57.39, "elapsed_time": "4:18:16", "remaining_time": "3:11:44"}
|
||||
{"current_steps": 2540, "total_steps": 4417, "loss": 0.318, "lr": 1.8271319990080093e-05, "epoch": 4.025356576862124, "percentage": 57.51, "elapsed_time": "4:18:41", "remaining_time": "3:11:10"}
|
||||
{"current_steps": 2545, "total_steps": 4417, "loss": 0.3013, "lr": 1.8192595692614445e-05, "epoch": 4.033280507131537, "percentage": 57.62, "elapsed_time": "4:19:07", "remaining_time": "3:10:36"}
|
||||
{"current_steps": 2550, "total_steps": 4417, "loss": 0.2955, "lr": 1.8113899619257487e-05, "epoch": 4.041204437400951, "percentage": 57.73, "elapsed_time": "4:19:43", "remaining_time": "3:10:09"}
|
||||
{"current_steps": 2555, "total_steps": 4417, "loss": 0.316, "lr": 1.8035232998913294e-05, "epoch": 4.049128367670365, "percentage": 57.84, "elapsed_time": "4:20:10", "remaining_time": "3:09:36"}
|
||||
{"current_steps": 2560, "total_steps": 4417, "loss": 0.2825, "lr": 1.795659706002602e-05, "epoch": 4.057052297939778, "percentage": 57.96, "elapsed_time": "4:20:43", "remaining_time": "3:09:07"}
|
||||
{"current_steps": 2565, "total_steps": 4417, "loss": 0.3171, "lr": 1.7877993030560693e-05, "epoch": 4.064976228209192, "percentage": 58.07, "elapsed_time": "4:21:16", "remaining_time": "3:08:39"}
|
||||
{"current_steps": 2570, "total_steps": 4417, "loss": 0.3017, "lr": 1.7799422137984047e-05, "epoch": 4.072900158478605, "percentage": 58.18, "elapsed_time": "4:21:46", "remaining_time": "3:08:07"}
|
||||
{"current_steps": 2575, "total_steps": 4417, "loss": 0.3075, "lr": 1.772088560924536e-05, "epoch": 4.080824088748019, "percentage": 58.3, "elapsed_time": "4:22:09", "remaining_time": "3:07:32"}
|
||||
{"current_steps": 2580, "total_steps": 4417, "loss": 0.2984, "lr": 1.76423846707573e-05, "epoch": 4.088748019017433, "percentage": 58.41, "elapsed_time": "4:22:41", "remaining_time": "3:07:02"}
|
||||
{"current_steps": 2585, "total_steps": 4417, "loss": 0.3321, "lr": 1.756392054837675e-05, "epoch": 4.096671949286846, "percentage": 58.52, "elapsed_time": "4:23:04", "remaining_time": "3:06:26"}
|
||||
{"current_steps": 2590, "total_steps": 4417, "loss": 0.296, "lr": 1.7485494467385686e-05, "epoch": 4.10459587955626, "percentage": 58.64, "elapsed_time": "4:23:29", "remaining_time": "3:05:52"}
|
||||
{"current_steps": 2595, "total_steps": 4417, "loss": 0.2949, "lr": 1.7407107652472034e-05, "epoch": 4.112519809825674, "percentage": 58.75, "elapsed_time": "4:23:57", "remaining_time": "3:05:19"}
|
||||
{"current_steps": 2600, "total_steps": 4417, "loss": 0.3044, "lr": 1.7328761327710545e-05, "epoch": 4.120443740095087, "percentage": 58.86, "elapsed_time": "4:24:28", "remaining_time": "3:04:49"}
|
||||
{"current_steps": 2605, "total_steps": 4417, "loss": 0.3267, "lr": 1.7250456716543693e-05, "epoch": 4.128367670364501, "percentage": 58.98, "elapsed_time": "4:24:55", "remaining_time": "3:04:16"}
|
||||
{"current_steps": 2610, "total_steps": 4417, "loss": 0.3178, "lr": 1.717219504176255e-05, "epoch": 4.136291600633914, "percentage": 59.09, "elapsed_time": "4:25:16", "remaining_time": "3:03:39"}
|
||||
{"current_steps": 2615, "total_steps": 4417, "loss": 0.3187, "lr": 1.7093977525487704e-05, "epoch": 4.144215530903328, "percentage": 59.2, "elapsed_time": "4:25:46", "remaining_time": "3:03:09"}
|
||||
{"current_steps": 2620, "total_steps": 4417, "loss": 0.3076, "lr": 1.701580538915017e-05, "epoch": 4.152139461172742, "percentage": 59.32, "elapsed_time": "4:26:19", "remaining_time": "3:02:40"}
|
||||
{"current_steps": 2625, "total_steps": 4417, "loss": 0.3016, "lr": 1.693767985347232e-05, "epoch": 4.160063391442155, "percentage": 59.43, "elapsed_time": "4:26:51", "remaining_time": "3:02:10"}
|
||||
{"current_steps": 2630, "total_steps": 4417, "loss": 0.3103, "lr": 1.6859602138448822e-05, "epoch": 4.167987321711569, "percentage": 59.54, "elapsed_time": "4:27:20", "remaining_time": "3:01:39"}
|
||||
{"current_steps": 2635, "total_steps": 4417, "loss": 0.297, "lr": 1.6781573463327573e-05, "epoch": 4.175911251980983, "percentage": 59.66, "elapsed_time": "4:27:58", "remaining_time": "3:01:13"}
|
||||
{"current_steps": 2640, "total_steps": 4417, "loss": 0.3006, "lr": 1.6703595046590685e-05, "epoch": 4.183835182250396, "percentage": 59.77, "elapsed_time": "4:28:30", "remaining_time": "3:00:44"}
|
||||
{"current_steps": 2645, "total_steps": 4417, "loss": 0.3119, "lr": 1.662566810593544e-05, "epoch": 4.19175911251981, "percentage": 59.88, "elapsed_time": "4:29:05", "remaining_time": "3:00:16"}
|
||||
{"current_steps": 2650, "total_steps": 4417, "loss": 0.2901, "lr": 1.6547793858255275e-05, "epoch": 4.199683042789223, "percentage": 60.0, "elapsed_time": "4:29:38", "remaining_time": "2:59:47"}
|
||||
{"current_steps": 2655, "total_steps": 4417, "loss": 0.3056, "lr": 1.646997351962078e-05, "epoch": 4.207606973058637, "percentage": 60.11, "elapsed_time": "4:30:05", "remaining_time": "2:59:14"}
|
||||
{"current_steps": 2660, "total_steps": 4417, "loss": 0.3064, "lr": 1.639220830526072e-05, "epoch": 4.215530903328051, "percentage": 60.22, "elapsed_time": "4:30:35", "remaining_time": "2:58:44"}
|
||||
{"current_steps": 2665, "total_steps": 4417, "loss": 0.2877, "lr": 1.631449942954304e-05, "epoch": 4.223454833597464, "percentage": 60.34, "elapsed_time": "4:31:08", "remaining_time": "2:58:14"}
|
||||
{"current_steps": 2670, "total_steps": 4417, "loss": 0.2897, "lr": 1.623684810595592e-05, "epoch": 4.231378763866878, "percentage": 60.45, "elapsed_time": "4:31:44", "remaining_time": "2:57:48"}
|
||||
{"current_steps": 2675, "total_steps": 4417, "loss": 0.2988, "lr": 1.6159255547088806e-05, "epoch": 4.239302694136292, "percentage": 60.56, "elapsed_time": "4:32:17", "remaining_time": "2:57:19"}
|
||||
{"current_steps": 2680, "total_steps": 4417, "loss": 0.3128, "lr": 1.608172296461349e-05, "epoch": 4.247226624405705, "percentage": 60.67, "elapsed_time": "4:32:48", "remaining_time": "2:56:48"}
|
||||
{"current_steps": 2685, "total_steps": 4417, "loss": 0.3077, "lr": 1.6004251569265187e-05, "epoch": 4.255150554675119, "percentage": 60.79, "elapsed_time": "4:33:20", "remaining_time": "2:56:19"}
|
||||
{"current_steps": 2690, "total_steps": 4417, "loss": 0.2935, "lr": 1.5926842570823624e-05, "epoch": 4.263074484944532, "percentage": 60.9, "elapsed_time": "4:33:51", "remaining_time": "2:55:49"}
|
||||
{"current_steps": 2695, "total_steps": 4417, "loss": 0.3179, "lr": 1.5849497178094132e-05, "epoch": 4.270998415213946, "percentage": 61.01, "elapsed_time": "4:34:24", "remaining_time": "2:55:20"}
|
||||
{"current_steps": 2700, "total_steps": 4417, "loss": 0.3086, "lr": 1.5772216598888802e-05, "epoch": 4.27892234548336, "percentage": 61.13, "elapsed_time": "4:34:53", "remaining_time": "2:54:48"}
|
||||
{"current_steps": 2705, "total_steps": 4417, "loss": 0.3147, "lr": 1.5695002040007613e-05, "epoch": 4.286846275752773, "percentage": 61.24, "elapsed_time": "4:35:29", "remaining_time": "2:54:21"}
|
||||
{"current_steps": 2710, "total_steps": 4417, "loss": 0.2983, "lr": 1.5617854707219574e-05, "epoch": 4.294770206022187, "percentage": 61.35, "elapsed_time": "4:36:00", "remaining_time": "2:53:51"}
|
||||
{"current_steps": 2715, "total_steps": 4417, "loss": 0.3072, "lr": 1.5540775805243896e-05, "epoch": 4.302694136291601, "percentage": 61.47, "elapsed_time": "4:36:31", "remaining_time": "2:53:21"}
|
||||
{"current_steps": 2720, "total_steps": 4417, "loss": 0.3073, "lr": 1.5463766537731202e-05, "epoch": 4.310618066561014, "percentage": 61.58, "elapsed_time": "4:37:05", "remaining_time": "2:52:52"}
|
||||
{"current_steps": 2725, "total_steps": 4417, "loss": 0.3089, "lr": 1.5386828107244705e-05, "epoch": 4.318541996830428, "percentage": 61.69, "elapsed_time": "4:37:37", "remaining_time": "2:52:23"}
|
||||
{"current_steps": 2730, "total_steps": 4417, "loss": 0.2982, "lr": 1.530996171524145e-05, "epoch": 4.326465927099841, "percentage": 61.81, "elapsed_time": "4:38:00", "remaining_time": "2:51:47"}
|
||||
{"current_steps": 2735, "total_steps": 4417, "loss": 0.2959, "lr": 1.5233168562053517e-05, "epoch": 4.334389857369255, "percentage": 61.92, "elapsed_time": "4:38:36", "remaining_time": "2:51:20"}
|
||||
{"current_steps": 2740, "total_steps": 4417, "loss": 0.3012, "lr": 1.515644984686933e-05, "epoch": 4.342313787638669, "percentage": 62.03, "elapsed_time": "4:39:06", "remaining_time": "2:50:49"}
|
||||
{"current_steps": 2745, "total_steps": 4417, "loss": 0.2995, "lr": 1.507980676771489e-05, "epoch": 4.350237717908082, "percentage": 62.15, "elapsed_time": "4:39:37", "remaining_time": "2:50:19"}
|
||||
{"current_steps": 2750, "total_steps": 4417, "loss": 0.3068, "lr": 1.5003240521435076e-05, "epoch": 4.358161648177496, "percentage": 62.26, "elapsed_time": "4:40:10", "remaining_time": "2:49:50"}
|
||||
{"current_steps": 2755, "total_steps": 4417, "loss": 0.3323, "lr": 1.4926752303674967e-05, "epoch": 4.36608557844691, "percentage": 62.37, "elapsed_time": "4:40:34", "remaining_time": "2:49:15"}
|
||||
{"current_steps": 2760, "total_steps": 4417, "loss": 0.3097, "lr": 1.4850343308861152e-05, "epoch": 4.374009508716323, "percentage": 62.49, "elapsed_time": "4:41:07", "remaining_time": "2:48:46"}
|
||||
{"current_steps": 2765, "total_steps": 4417, "loss": 0.3174, "lr": 1.47740147301831e-05, "epoch": 4.381933438985737, "percentage": 62.6, "elapsed_time": "4:41:31", "remaining_time": "2:48:12"}
|
||||
{"current_steps": 2770, "total_steps": 4417, "loss": 0.289, "lr": 1.4697767759574508e-05, "epoch": 4.38985736925515, "percentage": 62.71, "elapsed_time": "4:42:05", "remaining_time": "2:47:43"}
|
||||
{"current_steps": 2775, "total_steps": 4417, "loss": 0.3059, "lr": 1.4621603587694688e-05, "epoch": 4.397781299524564, "percentage": 62.83, "elapsed_time": "4:42:41", "remaining_time": "2:47:16"}
|
||||
{"current_steps": 2780, "total_steps": 4417, "loss": 0.2988, "lr": 1.454552340391e-05, "epoch": 4.405705229793978, "percentage": 62.94, "elapsed_time": "4:43:10", "remaining_time": "2:46:44"}
|
||||
{"current_steps": 2785, "total_steps": 4417, "loss": 0.3172, "lr": 1.446952839627525e-05, "epoch": 4.413629160063391, "percentage": 63.05, "elapsed_time": "4:43:37", "remaining_time": "2:46:12"}
|
||||
{"current_steps": 2790, "total_steps": 4417, "loss": 0.3113, "lr": 1.4393619751515147e-05, "epoch": 4.4215530903328055, "percentage": 63.17, "elapsed_time": "4:44:07", "remaining_time": "2:45:41"}
|
||||
{"current_steps": 2795, "total_steps": 4417, "loss": 0.3035, "lr": 1.4317798655005777e-05, "epoch": 4.429477020602219, "percentage": 63.28, "elapsed_time": "4:44:38", "remaining_time": "2:45:10"}
|
||||
{"current_steps": 2800, "total_steps": 4417, "loss": 0.3336, "lr": 1.4242066290756084e-05, "epoch": 4.437400950871632, "percentage": 63.39, "elapsed_time": "4:45:08", "remaining_time": "2:44:39"}
|
||||
{"current_steps": 2805, "total_steps": 4417, "loss": 0.3054, "lr": 1.4166423841389387e-05, "epoch": 4.445324881141046, "percentage": 63.5, "elapsed_time": "4:45:33", "remaining_time": "2:44:06"}
|
||||
{"current_steps": 2810, "total_steps": 4417, "loss": 0.3136, "lr": 1.4090872488124916e-05, "epoch": 4.453248811410459, "percentage": 63.62, "elapsed_time": "4:46:03", "remaining_time": "2:43:35"}
|
||||
{"current_steps": 2815, "total_steps": 4417, "loss": 0.3249, "lr": 1.4015413410759343e-05, "epoch": 4.461172741679873, "percentage": 63.73, "elapsed_time": "4:46:36", "remaining_time": "2:43:06"}
|
||||
{"current_steps": 2820, "total_steps": 4417, "loss": 0.294, "lr": 1.3940047787648388e-05, "epoch": 4.469096671949287, "percentage": 63.84, "elapsed_time": "4:47:07", "remaining_time": "2:42:36"}
|
||||
{"current_steps": 2825, "total_steps": 4417, "loss": 0.3309, "lr": 1.3864776795688408e-05, "epoch": 4.4770206022187, "percentage": 63.96, "elapsed_time": "4:47:34", "remaining_time": "2:42:03"}
|
||||
{"current_steps": 2830, "total_steps": 4417, "loss": 0.3051, "lr": 1.3789601610298013e-05, "epoch": 4.4849445324881145, "percentage": 64.07, "elapsed_time": "4:48:10", "remaining_time": "2:41:36"}
|
||||
{"current_steps": 2835, "total_steps": 4417, "loss": 0.3021, "lr": 1.3714523405399711e-05, "epoch": 4.492868462757528, "percentage": 64.18, "elapsed_time": "4:48:41", "remaining_time": "2:41:05"}
|
||||
{"current_steps": 2840, "total_steps": 4417, "loss": 0.3278, "lr": 1.3639543353401588e-05, "epoch": 4.500792393026941, "percentage": 64.3, "elapsed_time": "4:49:14", "remaining_time": "2:40:36"}
|
||||
{"current_steps": 2845, "total_steps": 4417, "loss": 0.3059, "lr": 1.3564662625178979e-05, "epoch": 4.508716323296355, "percentage": 64.41, "elapsed_time": "4:49:45", "remaining_time": "2:40:06"}
|
||||
{"current_steps": 2850, "total_steps": 4417, "loss": 0.3075, "lr": 1.3489882390056214e-05, "epoch": 4.516640253565768, "percentage": 64.52, "elapsed_time": "4:50:12", "remaining_time": "2:39:34"}
|
||||
{"current_steps": 2855, "total_steps": 4417, "loss": 0.3022, "lr": 1.3415203815788317e-05, "epoch": 4.524564183835182, "percentage": 64.64, "elapsed_time": "4:50:40", "remaining_time": "2:39:02"}
|
||||
{"current_steps": 2860, "total_steps": 4417, "loss": 0.3162, "lr": 1.3340628068542816e-05, "epoch": 4.532488114104596, "percentage": 64.75, "elapsed_time": "4:51:12", "remaining_time": "2:38:32"}
|
||||
{"current_steps": 2865, "total_steps": 4417, "loss": 0.3041, "lr": 1.3266156312881497e-05, "epoch": 4.540412044374009, "percentage": 64.86, "elapsed_time": "4:51:43", "remaining_time": "2:38:01"}
|
||||
{"current_steps": 2870, "total_steps": 4417, "loss": 0.3105, "lr": 1.3191789711742237e-05, "epoch": 4.5483359746434235, "percentage": 64.98, "elapsed_time": "4:52:09", "remaining_time": "2:37:28"}
|
||||
{"current_steps": 2875, "total_steps": 4417, "loss": 0.2968, "lr": 1.3117529426420838e-05, "epoch": 4.556259904912837, "percentage": 65.09, "elapsed_time": "4:52:38", "remaining_time": "2:36:57"}
|
||||
{"current_steps": 2880, "total_steps": 4417, "loss": 0.3152, "lr": 1.3043376616552889e-05, "epoch": 4.56418383518225, "percentage": 65.2, "elapsed_time": "4:53:08", "remaining_time": "2:36:26"}
|
||||
{"current_steps": 2885, "total_steps": 4417, "loss": 0.2968, "lr": 1.296933244009567e-05, "epoch": 4.572107765451664, "percentage": 65.32, "elapsed_time": "4:53:40", "remaining_time": "2:35:56"}
|
||||
{"current_steps": 2890, "total_steps": 4417, "loss": 0.2862, "lr": 1.2895398053310053e-05, "epoch": 4.580031695721077, "percentage": 65.43, "elapsed_time": "4:54:11", "remaining_time": "2:35:26"}
|
||||
{"current_steps": 2895, "total_steps": 4417, "loss": 0.2937, "lr": 1.2821574610742453e-05, "epoch": 4.5879556259904914, "percentage": 65.54, "elapsed_time": "4:54:38", "remaining_time": "2:34:54"}
|
||||
{"current_steps": 2900, "total_steps": 4417, "loss": 0.3126, "lr": 1.2747863265206801e-05, "epoch": 4.595879556259905, "percentage": 65.66, "elapsed_time": "4:55:06", "remaining_time": "2:34:22"}
|
||||
{"current_steps": 2905, "total_steps": 4417, "loss": 0.3101, "lr": 1.2674265167766545e-05, "epoch": 4.603803486529318, "percentage": 65.77, "elapsed_time": "4:55:36", "remaining_time": "2:33:51"}
|
||||
{"current_steps": 2910, "total_steps": 4417, "loss": 0.3193, "lr": 1.2600781467716665e-05, "epoch": 4.6117274167987325, "percentage": 65.88, "elapsed_time": "4:56:04", "remaining_time": "2:33:19"}
|
||||
{"current_steps": 2915, "total_steps": 4417, "loss": 0.3169, "lr": 1.2527413312565737e-05, "epoch": 4.619651347068146, "percentage": 66.0, "elapsed_time": "4:56:32", "remaining_time": "2:32:47"}
|
||||
{"current_steps": 2920, "total_steps": 4417, "loss": 0.2974, "lr": 1.2454161848018003e-05, "epoch": 4.627575277337559, "percentage": 66.11, "elapsed_time": "4:57:05", "remaining_time": "2:32:18"}
|
||||
{"current_steps": 2925, "total_steps": 4417, "loss": 0.3239, "lr": 1.2381028217955486e-05, "epoch": 4.635499207606973, "percentage": 66.22, "elapsed_time": "4:57:34", "remaining_time": "2:31:47"}
|
||||
{"current_steps": 2930, "total_steps": 4417, "loss": 0.299, "lr": 1.2308013564420132e-05, "epoch": 4.643423137876387, "percentage": 66.33, "elapsed_time": "4:58:12", "remaining_time": "2:31:20"}
|
||||
{"current_steps": 2935, "total_steps": 4417, "loss": 0.3069, "lr": 1.2235119027595957e-05, "epoch": 4.6513470681458005, "percentage": 66.45, "elapsed_time": "4:58:39", "remaining_time": "2:30:48"}
|
||||
{"current_steps": 2940, "total_steps": 4417, "loss": 0.3033, "lr": 1.2162345745791268e-05, "epoch": 4.659270998415214, "percentage": 66.56, "elapsed_time": "4:59:13", "remaining_time": "2:30:19"}
|
||||
{"current_steps": 2945, "total_steps": 4417, "loss": 0.2991, "lr": 1.2089694855420873e-05, "epoch": 4.667194928684627, "percentage": 66.67, "elapsed_time": "4:59:46", "remaining_time": "2:29:49"}
|
||||
{"current_steps": 2950, "total_steps": 4417, "loss": 0.2926, "lr": 1.201716749098833e-05, "epoch": 4.675118858954042, "percentage": 66.79, "elapsed_time": "5:00:15", "remaining_time": "2:29:18"}
|
||||
{"current_steps": 2955, "total_steps": 4417, "loss": 0.3303, "lr": 1.1944764785068248e-05, "epoch": 4.683042789223455, "percentage": 66.9, "elapsed_time": "5:00:45", "remaining_time": "2:28:48"}
|
||||
{"current_steps": 2960, "total_steps": 4417, "loss": 0.2963, "lr": 1.187248786828858e-05, "epoch": 4.690966719492868, "percentage": 67.01, "elapsed_time": "5:01:19", "remaining_time": "2:28:19"}
|
||||
{"current_steps": 2965, "total_steps": 4417, "loss": 0.305, "lr": 1.1800337869312988e-05, "epoch": 4.698890649762282, "percentage": 67.13, "elapsed_time": "5:01:50", "remaining_time": "2:27:49"}
|
||||
{"current_steps": 2970, "total_steps": 4417, "loss": 0.3156, "lr": 1.17283159148232e-05, "epoch": 4.706814580031696, "percentage": 67.24, "elapsed_time": "5:02:15", "remaining_time": "2:27:15"}
|
||||
{"current_steps": 2975, "total_steps": 4417, "loss": 0.2903, "lr": 1.165642312950142e-05, "epoch": 4.7147385103011095, "percentage": 67.35, "elapsed_time": "5:02:51", "remaining_time": "2:26:47"}
|
||||
{"current_steps": 2980, "total_steps": 4417, "loss": 0.3051, "lr": 1.1584660636012788e-05, "epoch": 4.722662440570523, "percentage": 67.47, "elapsed_time": "5:03:20", "remaining_time": "2:26:16"}
|
||||
{"current_steps": 2985, "total_steps": 4417, "loss": 0.2888, "lr": 1.1513029554987804e-05, "epoch": 4.730586370839936, "percentage": 67.58, "elapsed_time": "5:03:51", "remaining_time": "2:25:46"}
|
||||
{"current_steps": 2990, "total_steps": 4417, "loss": 0.3123, "lr": 1.144153100500488e-05, "epoch": 4.738510301109351, "percentage": 67.69, "elapsed_time": "5:04:22", "remaining_time": "2:25:15"}
|
||||
{"current_steps": 2995, "total_steps": 4417, "loss": 0.3057, "lr": 1.1370166102572826e-05, "epoch": 4.746434231378764, "percentage": 67.81, "elapsed_time": "5:04:51", "remaining_time": "2:24:44"}
|
||||
{"current_steps": 3000, "total_steps": 4417, "loss": 0.2829, "lr": 1.1298935962113436e-05, "epoch": 4.754358161648177, "percentage": 67.92, "elapsed_time": "5:05:26", "remaining_time": "2:24:16"}
|
||||
{"current_steps": 3005, "total_steps": 4417, "loss": 0.2957, "lr": 1.1227841695944104e-05, "epoch": 4.762282091917591, "percentage": 68.03, "elapsed_time": "5:06:04", "remaining_time": "2:23:49"}
|
||||
{"current_steps": 3010, "total_steps": 4417, "loss": 0.3123, "lr": 1.1156884414260412e-05, "epoch": 4.770206022187005, "percentage": 68.15, "elapsed_time": "5:06:34", "remaining_time": "2:23:18"}
|
||||
{"current_steps": 3015, "total_steps": 4417, "loss": 0.299, "lr": 1.1086065225118836e-05, "epoch": 4.7781299524564185, "percentage": 68.26, "elapsed_time": "5:07:00", "remaining_time": "2:22:45"}
|
||||
{"current_steps": 3020, "total_steps": 4417, "loss": 0.3033, "lr": 1.1015385234419402e-05, "epoch": 4.786053882725832, "percentage": 68.37, "elapsed_time": "5:07:38", "remaining_time": "2:22:18"}
|
||||
{"current_steps": 3025, "total_steps": 4417, "loss": 0.3019, "lr": 1.0944845545888462e-05, "epoch": 4.793977812995245, "percentage": 68.49, "elapsed_time": "5:08:06", "remaining_time": "2:21:46"}
|
||||
{"current_steps": 3030, "total_steps": 4417, "loss": 0.3058, "lr": 1.0874447261061406e-05, "epoch": 4.80190174326466, "percentage": 68.6, "elapsed_time": "5:08:43", "remaining_time": "2:21:19"}
|
||||
{"current_steps": 3035, "total_steps": 4417, "loss": 0.3127, "lr": 1.0804191479265525e-05, "epoch": 4.809825673534073, "percentage": 68.71, "elapsed_time": "5:09:17", "remaining_time": "2:20:50"}
|
||||
{"current_steps": 3040, "total_steps": 4417, "loss": 0.3342, "lr": 1.0734079297602772e-05, "epoch": 4.817749603803486, "percentage": 68.82, "elapsed_time": "5:09:48", "remaining_time": "2:20:19"}
|
||||
{"current_steps": 3045, "total_steps": 4417, "loss": 0.2946, "lr": 1.0664111810932671e-05, "epoch": 4.8256735340729, "percentage": 68.94, "elapsed_time": "5:10:22", "remaining_time": "2:19:50"}
|
||||
{"current_steps": 3050, "total_steps": 4417, "loss": 0.305, "lr": 1.0594290111855245e-05, "epoch": 4.833597464342314, "percentage": 69.05, "elapsed_time": "5:10:49", "remaining_time": "2:19:18"}
|
||||
{"current_steps": 3055, "total_steps": 4417, "loss": 0.3086, "lr": 1.0524615290693881e-05, "epoch": 4.8415213946117275, "percentage": 69.16, "elapsed_time": "5:11:17", "remaining_time": "2:18:46"}
|
||||
{"current_steps": 3060, "total_steps": 4417, "loss": 0.29, "lr": 1.0455088435478384e-05, "epoch": 4.849445324881141, "percentage": 69.28, "elapsed_time": "5:11:49", "remaining_time": "2:18:17"}
|
||||
{"current_steps": 3065, "total_steps": 4417, "loss": 0.2906, "lr": 1.0385710631927917e-05, "epoch": 4.857369255150554, "percentage": 69.39, "elapsed_time": "5:12:22", "remaining_time": "2:17:47"}
|
||||
{"current_steps": 3070, "total_steps": 4417, "loss": 0.3175, "lr": 1.031648296343411e-05, "epoch": 4.865293185419969, "percentage": 69.5, "elapsed_time": "5:12:51", "remaining_time": "2:17:16"}
|
||||
{"current_steps": 3075, "total_steps": 4417, "loss": 0.2896, "lr": 1.0247406511044079e-05, "epoch": 4.873217115689382, "percentage": 69.62, "elapsed_time": "5:13:20", "remaining_time": "2:16:44"}
|
||||
{"current_steps": 3080, "total_steps": 4417, "loss": 0.3078, "lr": 1.0178482353443588e-05, "epoch": 4.881141045958795, "percentage": 69.73, "elapsed_time": "5:13:49", "remaining_time": "2:16:13"}
|
||||
{"current_steps": 3085, "total_steps": 4417, "loss": 0.3011, "lr": 1.010971156694021e-05, "epoch": 4.889064976228209, "percentage": 69.84, "elapsed_time": "5:14:21", "remaining_time": "2:15:43"}
|
||||
{"current_steps": 3090, "total_steps": 4417, "loss": 0.3023, "lr": 1.004109522544647e-05, "epoch": 4.896988906497623, "percentage": 69.96, "elapsed_time": "5:14:53", "remaining_time": "2:15:14"}
|
||||
{"current_steps": 3095, "total_steps": 4417, "loss": 0.3108, "lr": 9.972634400463135e-06, "epoch": 4.9049128367670365, "percentage": 70.07, "elapsed_time": "5:15:21", "remaining_time": "2:14:42"}
|
||||
{"current_steps": 3100, "total_steps": 4417, "loss": 0.3156, "lr": 9.90433016106243e-06, "epoch": 4.91283676703645, "percentage": 70.18, "elapsed_time": "5:15:53", "remaining_time": "2:14:12"}
|
||||
{"current_steps": 3105, "total_steps": 4417, "loss": 0.2904, "lr": 9.836183573871394e-06, "epoch": 4.920760697305864, "percentage": 70.3, "elapsed_time": "5:16:23", "remaining_time": "2:13:41"}
|
||||
{"current_steps": 3110, "total_steps": 4417, "loss": 0.3035, "lr": 9.768195703055163e-06, "epoch": 4.928684627575278, "percentage": 70.41, "elapsed_time": "5:16:54", "remaining_time": "2:13:11"}
|
||||
{"current_steps": 3115, "total_steps": 4417, "loss": 0.307, "lr": 9.700367610300423e-06, "epoch": 4.936608557844691, "percentage": 70.52, "elapsed_time": "5:17:22", "remaining_time": "2:12:39"}
|
||||
{"current_steps": 3120, "total_steps": 4417, "loss": 0.3006, "lr": 9.632700354798766e-06, "epoch": 4.944532488114104, "percentage": 70.64, "elapsed_time": "5:17:44", "remaining_time": "2:12:05"}
|
||||
{"current_steps": 3125, "total_steps": 4417, "loss": 0.3132, "lr": 9.565194993230176e-06, "epoch": 4.952456418383518, "percentage": 70.75, "elapsed_time": "5:18:18", "remaining_time": "2:11:35"}
|
||||
{"current_steps": 3130, "total_steps": 4417, "loss": 0.2822, "lr": 9.49785257974656e-06, "epoch": 4.960380348652932, "percentage": 70.86, "elapsed_time": "5:18:52", "remaining_time": "2:11:06"}
|
||||
{"current_steps": 3135, "total_steps": 4417, "loss": 0.3046, "lr": 9.430674165955218e-06, "epoch": 4.9683042789223455, "percentage": 70.98, "elapsed_time": "5:19:23", "remaining_time": "2:10:36"}
|
||||
{"current_steps": 3140, "total_steps": 4417, "loss": 0.3157, "lr": 9.363660800902498e-06, "epoch": 4.976228209191759, "percentage": 71.09, "elapsed_time": "5:20:00", "remaining_time": "2:10:08"}
|
||||
{"current_steps": 3145, "total_steps": 4417, "loss": 0.2933, "lr": 9.296813531057336e-06, "epoch": 4.984152139461173, "percentage": 71.2, "elapsed_time": "5:20:25", "remaining_time": "2:09:35"}
|
||||
{"current_steps": 3150, "total_steps": 4417, "loss": 0.3112, "lr": 9.230133400294994e-06, "epoch": 4.992076069730587, "percentage": 71.32, "elapsed_time": "5:20:54", "remaining_time": "2:09:04"}
|
||||
{"current_steps": 3155, "total_steps": 4417, "loss": 0.3065, "lr": 9.163621449880686e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "5:21:24", "remaining_time": "2:08:33"}
|
||||
{"current_steps": 3160, "total_steps": 4417, "loss": 0.2917, "lr": 9.097278718453366e-06, "epoch": 5.007923930269413, "percentage": 71.54, "elapsed_time": "5:21:50", "remaining_time": "2:08:01"}
|
||||
{"current_steps": 3165, "total_steps": 4417, "loss": 0.29, "lr": 9.0311062420095e-06, "epoch": 5.015847860538828, "percentage": 71.65, "elapsed_time": "5:22:19", "remaining_time": "2:07:30"}
|
||||
{"current_steps": 3170, "total_steps": 4417, "loss": 0.2956, "lr": 8.965105053886864e-06, "epoch": 5.023771790808241, "percentage": 71.77, "elapsed_time": "5:22:49", "remaining_time": "2:06:59"}
|
||||
{"current_steps": 3175, "total_steps": 4417, "loss": 0.2826, "lr": 8.899276184748454e-06, "epoch": 5.0316957210776545, "percentage": 71.88, "elapsed_time": "5:23:21", "remaining_time": "2:06:29"}
|
||||
{"current_steps": 3180, "total_steps": 4417, "loss": 0.287, "lr": 8.833620662566325e-06, "epoch": 5.039619651347068, "percentage": 71.99, "elapsed_time": "5:23:52", "remaining_time": "2:05:59"}
|
||||
{"current_steps": 3185, "total_steps": 4417, "loss": 0.2739, "lr": 8.768139512605615e-06, "epoch": 5.047543581616482, "percentage": 72.11, "elapsed_time": "5:24:20", "remaining_time": "2:05:27"}
|
||||
{"current_steps": 3190, "total_steps": 4417, "loss": 0.2806, "lr": 8.702833757408465e-06, "epoch": 5.055467511885896, "percentage": 72.22, "elapsed_time": "5:24:53", "remaining_time": "2:04:57"}
|
||||
{"current_steps": 3195, "total_steps": 4417, "loss": 0.2789, "lr": 8.637704416778106e-06, "epoch": 5.063391442155309, "percentage": 72.33, "elapsed_time": "5:25:29", "remaining_time": "2:04:29"}
|
||||
{"current_steps": 3200, "total_steps": 4417, "loss": 0.2984, "lr": 8.572752507762893e-06, "epoch": 5.071315372424722, "percentage": 72.45, "elapsed_time": "5:25:57", "remaining_time": "2:03:58"}
|
||||
{"current_steps": 3205, "total_steps": 4417, "loss": 0.2747, "lr": 8.50797904464044e-06, "epoch": 5.079239302694137, "percentage": 72.56, "elapsed_time": "5:26:27", "remaining_time": "2:03:27"}
|
||||
{"current_steps": 3210, "total_steps": 4417, "loss": 0.2951, "lr": 8.443385038901801e-06, "epoch": 5.08716323296355, "percentage": 72.67, "elapsed_time": "5:26:59", "remaining_time": "2:02:57"}
|
||||
{"current_steps": 3215, "total_steps": 4417, "loss": 0.2899, "lr": 8.378971499235627e-06, "epoch": 5.0950871632329635, "percentage": 72.79, "elapsed_time": "5:27:21", "remaining_time": "2:02:23"}
|
||||
{"current_steps": 3220, "total_steps": 4417, "loss": 0.2977, "lr": 8.31473943151247e-06, "epoch": 5.103011093502377, "percentage": 72.9, "elapsed_time": "5:27:44", "remaining_time": "2:01:49"}
|
||||
{"current_steps": 3225, "total_steps": 4417, "loss": 0.3003, "lr": 8.250689838769021e-06, "epoch": 5.110935023771791, "percentage": 73.01, "elapsed_time": "5:28:08", "remaining_time": "2:01:16"}
|
||||
{"current_steps": 3230, "total_steps": 4417, "loss": 0.2932, "lr": 8.186823721192505e-06, "epoch": 5.118858954041205, "percentage": 73.13, "elapsed_time": "5:28:36", "remaining_time": "2:00:45"}
|
||||
{"current_steps": 3235, "total_steps": 4417, "loss": 0.3097, "lr": 8.123142076105004e-06, "epoch": 5.126782884310618, "percentage": 73.24, "elapsed_time": "5:29:07", "remaining_time": "2:00:15"}
|
||||
{"current_steps": 3240, "total_steps": 4417, "loss": 0.2876, "lr": 8.05964589794791e-06, "epoch": 5.134706814580031, "percentage": 73.35, "elapsed_time": "5:29:42", "remaining_time": "1:59:46"}
|
||||
{"current_steps": 3245, "total_steps": 4417, "loss": 0.2941, "lr": 7.996336178266422e-06, "epoch": 5.142630744849446, "percentage": 73.47, "elapsed_time": "5:30:12", "remaining_time": "1:59:15"}
|
||||
{"current_steps": 3250, "total_steps": 4417, "loss": 0.3021, "lr": 7.933213905694003e-06, "epoch": 5.150554675118859, "percentage": 73.58, "elapsed_time": "5:30:45", "remaining_time": "1:58:46"}
|
||||
{"current_steps": 3255, "total_steps": 4417, "loss": 0.2986, "lr": 7.87028006593701e-06, "epoch": 5.1584786053882725, "percentage": 73.69, "elapsed_time": "5:31:15", "remaining_time": "1:58:15"}
|
||||
{"current_steps": 3260, "total_steps": 4417, "loss": 0.2742, "lr": 7.807535641759229e-06, "epoch": 5.166402535657686, "percentage": 73.81, "elapsed_time": "5:31:45", "remaining_time": "1:57:44"}
|
||||
{"current_steps": 3265, "total_steps": 4417, "loss": 0.2694, "lr": 7.744981612966603e-06, "epoch": 5.1743264659271, "percentage": 73.92, "elapsed_time": "5:32:16", "remaining_time": "1:57:14"}
|
||||
{"current_steps": 3270, "total_steps": 4417, "loss": 0.2967, "lr": 7.682618956391854e-06, "epoch": 5.182250396196514, "percentage": 74.03, "elapsed_time": "5:32:43", "remaining_time": "1:56:42"}
|
||||
{"current_steps": 3275, "total_steps": 4417, "loss": 0.2928, "lr": 7.62044864587931e-06, "epoch": 5.190174326465927, "percentage": 74.15, "elapsed_time": "5:33:21", "remaining_time": "1:56:14"}
|
||||
{"current_steps": 3280, "total_steps": 4417, "loss": 0.2794, "lr": 7.558471652269628e-06, "epoch": 5.19809825673534, "percentage": 74.26, "elapsed_time": "5:33:54", "remaining_time": "1:55:45"}
|
||||
{"current_steps": 3285, "total_steps": 4417, "loss": 0.2782, "lr": 7.496688943384665e-06, "epoch": 5.206022187004755, "percentage": 74.37, "elapsed_time": "5:34:28", "remaining_time": "1:55:15"}
|
||||
{"current_steps": 3290, "total_steps": 4417, "loss": 0.2966, "lr": 7.435101484012377e-06, "epoch": 5.213946117274168, "percentage": 74.48, "elapsed_time": "5:34:56", "remaining_time": "1:54:44"}
|
||||
{"current_steps": 3295, "total_steps": 4417, "loss": 0.2905, "lr": 7.37371023589172e-06, "epoch": 5.2218700475435815, "percentage": 74.6, "elapsed_time": "5:35:26", "remaining_time": "1:54:13"}
|
||||
{"current_steps": 3300, "total_steps": 4417, "loss": 0.2811, "lr": 7.312516157697671e-06, "epoch": 5.229793977812995, "percentage": 74.71, "elapsed_time": "5:35:56", "remaining_time": "1:53:42"}
|
||||
{"current_steps": 3305, "total_steps": 4417, "loss": 0.279, "lr": 7.251520205026206e-06, "epoch": 5.237717908082409, "percentage": 74.82, "elapsed_time": "5:36:28", "remaining_time": "1:53:12"}
|
||||
{"current_steps": 3310, "total_steps": 4417, "loss": 0.2834, "lr": 7.190723330379441e-06, "epoch": 5.245641838351823, "percentage": 74.94, "elapsed_time": "5:36:58", "remaining_time": "1:52:41"}
|
||||
{"current_steps": 3315, "total_steps": 4417, "loss": 0.2879, "lr": 7.1301264831507034e-06, "epoch": 5.253565768621236, "percentage": 75.05, "elapsed_time": "5:37:25", "remaining_time": "1:52:10"}
|
||||
{"current_steps": 3320, "total_steps": 4417, "loss": 0.2764, "lr": 7.0697306096097195e-06, "epoch": 5.261489698890649, "percentage": 75.16, "elapsed_time": "5:37:59", "remaining_time": "1:51:40"}
|
||||
{"current_steps": 3325, "total_steps": 4417, "loss": 0.2854, "lr": 7.0095366528878785e-06, "epoch": 5.269413629160064, "percentage": 75.28, "elapsed_time": "5:38:25", "remaining_time": "1:51:08"}
|
||||
{"current_steps": 3330, "total_steps": 4417, "loss": 0.2839, "lr": 6.949545552963432e-06, "epoch": 5.277337559429477, "percentage": 75.39, "elapsed_time": "5:38:54", "remaining_time": "1:50:37"}
|
||||
{"current_steps": 3335, "total_steps": 4417, "loss": 0.2788, "lr": 6.8897582466468935e-06, "epoch": 5.2852614896988905, "percentage": 75.5, "elapsed_time": "5:39:29", "remaining_time": "1:50:08"}
|
||||
{"current_steps": 3340, "total_steps": 4417, "loss": 0.2881, "lr": 6.830175667566341e-06, "epoch": 5.293185419968304, "percentage": 75.62, "elapsed_time": "5:39:57", "remaining_time": "1:49:37"}
|
||||
{"current_steps": 3345, "total_steps": 4417, "loss": 0.2987, "lr": 6.770798746152889e-06, "epoch": 5.301109350237718, "percentage": 75.73, "elapsed_time": "5:40:33", "remaining_time": "1:49:08"}
|
||||
{"current_steps": 3350, "total_steps": 4417, "loss": 0.3043, "lr": 6.711628409626116e-06, "epoch": 5.309033280507132, "percentage": 75.84, "elapsed_time": "5:40:59", "remaining_time": "1:48:36"}
|
||||
{"current_steps": 3355, "total_steps": 4417, "loss": 0.2615, "lr": 6.652665581979638e-06, "epoch": 5.316957210776545, "percentage": 75.96, "elapsed_time": "5:41:25", "remaining_time": "1:48:04"}
|
||||
{"current_steps": 3360, "total_steps": 4417, "loss": 0.3023, "lr": 6.593911183966619e-06, "epoch": 5.324881141045958, "percentage": 76.07, "elapsed_time": "5:41:58", "remaining_time": "1:47:34"}
|
||||
{"current_steps": 3365, "total_steps": 4417, "loss": 0.2941, "lr": 6.53536613308543e-06, "epoch": 5.332805071315373, "percentage": 76.18, "elapsed_time": "5:42:30", "remaining_time": "1:47:04"}
|
||||
{"current_steps": 3370, "total_steps": 4417, "loss": 0.2849, "lr": 6.477031343565332e-06, "epoch": 5.340729001584786, "percentage": 76.3, "elapsed_time": "5:43:05", "remaining_time": "1:46:35"}
|
||||
{"current_steps": 3375, "total_steps": 4417, "loss": 0.2888, "lr": 6.418907726352153e-06, "epoch": 5.3486529318541995, "percentage": 76.41, "elapsed_time": "5:43:43", "remaining_time": "1:46:07"}
|
||||
{"current_steps": 3380, "total_steps": 4417, "loss": 0.2946, "lr": 6.36099618909412e-06, "epoch": 5.356576862123613, "percentage": 76.52, "elapsed_time": "5:44:11", "remaining_time": "1:45:36"}
|
||||
{"current_steps": 3385, "total_steps": 4417, "loss": 0.2896, "lr": 6.303297636127627e-06, "epoch": 5.364500792393027, "percentage": 76.64, "elapsed_time": "5:44:41", "remaining_time": "1:45:05"}
|
||||
{"current_steps": 3390, "total_steps": 4417, "loss": 0.2816, "lr": 6.245812968463176e-06, "epoch": 5.372424722662441, "percentage": 76.75, "elapsed_time": "5:45:12", "remaining_time": "1:44:34"}
|
||||
{"current_steps": 3395, "total_steps": 4417, "loss": 0.2981, "lr": 6.188543083771237e-06, "epoch": 5.380348652931854, "percentage": 76.86, "elapsed_time": "5:45:47", "remaining_time": "1:44:05"}
|
||||
{"current_steps": 3400, "total_steps": 4417, "loss": 0.2874, "lr": 6.131488876368303e-06, "epoch": 5.3882725832012675, "percentage": 76.98, "elapsed_time": "5:46:13", "remaining_time": "1:43:33"}
|
||||
{"current_steps": 3405, "total_steps": 4417, "loss": 0.2832, "lr": 6.074651237202862e-06, "epoch": 5.396196513470682, "percentage": 77.09, "elapsed_time": "5:46:43", "remaining_time": "1:43:03"}
|
||||
{"current_steps": 3410, "total_steps": 4417, "loss": 0.2897, "lr": 6.018031053841515e-06, "epoch": 5.404120443740095, "percentage": 77.2, "elapsed_time": "5:47:11", "remaining_time": "1:42:31"}
|
||||
{"current_steps": 3415, "total_steps": 4417, "loss": 0.2887, "lr": 5.9616292104551266e-06, "epoch": 5.4120443740095086, "percentage": 77.31, "elapsed_time": "5:47:35", "remaining_time": "1:41:59"}
|
||||
{"current_steps": 3420, "total_steps": 4417, "loss": 0.2841, "lr": 5.905446587804977e-06, "epoch": 5.419968304278922, "percentage": 77.43, "elapsed_time": "5:48:05", "remaining_time": "1:41:28"}
|
||||
{"current_steps": 3425, "total_steps": 4417, "loss": 0.284, "lr": 5.849484063229065e-06, "epoch": 5.427892234548336, "percentage": 77.54, "elapsed_time": "5:48:42", "remaining_time": "1:40:59"}
|
||||
{"current_steps": 3430, "total_steps": 4417, "loss": 0.2934, "lr": 5.793742510628344e-06, "epoch": 5.43581616481775, "percentage": 77.65, "elapsed_time": "5:49:11", "remaining_time": "1:40:28"}
|
||||
{"current_steps": 3435, "total_steps": 4417, "loss": 0.2905, "lr": 5.738222800453139e-06, "epoch": 5.443740095087163, "percentage": 77.77, "elapsed_time": "5:49:40", "remaining_time": "1:39:58"}
|
||||
{"current_steps": 3440, "total_steps": 4417, "loss": 0.2773, "lr": 5.682925799689498e-06, "epoch": 5.4516640253565765, "percentage": 77.88, "elapsed_time": "5:50:07", "remaining_time": "1:39:26"}
|
||||
{"current_steps": 3445, "total_steps": 4417, "loss": 0.2746, "lr": 5.627852371845684e-06, "epoch": 5.459587955625991, "percentage": 77.99, "elapsed_time": "5:50:34", "remaining_time": "1:38:54"}
|
||||
{"current_steps": 3450, "total_steps": 4417, "loss": 0.2922, "lr": 5.573003376938697e-06, "epoch": 5.467511885895404, "percentage": 78.11, "elapsed_time": "5:51:04", "remaining_time": "1:38:24"}
|
||||
{"current_steps": 3455, "total_steps": 4417, "loss": 0.2783, "lr": 5.518379671480813e-06, "epoch": 5.475435816164818, "percentage": 78.22, "elapsed_time": "5:51:35", "remaining_time": "1:37:53"}
|
||||
{"current_steps": 3460, "total_steps": 4417, "loss": 0.2916, "lr": 5.4639821084662455e-06, "epoch": 5.483359746434231, "percentage": 78.33, "elapsed_time": "5:52:09", "remaining_time": "1:37:24"}
|
||||
{"current_steps": 3465, "total_steps": 4417, "loss": 0.276, "lr": 5.409811537357789e-06, "epoch": 5.491283676703645, "percentage": 78.45, "elapsed_time": "5:52:41", "remaining_time": "1:36:54"}
|
||||
{"current_steps": 3470, "total_steps": 4417, "loss": 0.2812, "lr": 5.355868804073594e-06, "epoch": 5.499207606973059, "percentage": 78.56, "elapsed_time": "5:53:04", "remaining_time": "1:36:21"}
|
||||
{"current_steps": 3475, "total_steps": 4417, "loss": 0.2706, "lr": 5.302154750973909e-06, "epoch": 5.507131537242472, "percentage": 78.67, "elapsed_time": "5:53:26", "remaining_time": "1:35:48"}
|
||||
{"current_steps": 3480, "total_steps": 4417, "loss": 0.2811, "lr": 5.24867021684798e-06, "epoch": 5.5150554675118855, "percentage": 78.79, "elapsed_time": "5:53:54", "remaining_time": "1:35:17"}
|
||||
{"current_steps": 3485, "total_steps": 4417, "loss": 0.2744, "lr": 5.195416036900906e-06, "epoch": 5.5229793977813, "percentage": 78.9, "elapsed_time": "5:54:25", "remaining_time": "1:34:47"}
|
||||
{"current_steps": 3490, "total_steps": 4417, "loss": 0.319, "lr": 5.142393042740617e-06, "epoch": 5.530903328050713, "percentage": 79.01, "elapsed_time": "5:54:47", "remaining_time": "1:34:14"}
|
||||
{"current_steps": 3495, "total_steps": 4417, "loss": 0.28, "lr": 5.089602062364898e-06, "epoch": 5.538827258320127, "percentage": 79.13, "elapsed_time": "5:55:18", "remaining_time": "1:33:43"}
|
||||
{"current_steps": 3500, "total_steps": 4417, "loss": 0.2863, "lr": 5.037043920148434e-06, "epoch": 5.546751188589541, "percentage": 79.24, "elapsed_time": "5:55:42", "remaining_time": "1:33:11"}
|
||||
{"current_steps": 3505, "total_steps": 4417, "loss": 0.2824, "lr": 4.984719436829966e-06, "epoch": 5.554675118858954, "percentage": 79.35, "elapsed_time": "5:56:08", "remaining_time": "1:32:40"}
|
||||
{"current_steps": 3510, "total_steps": 4417, "loss": 0.2937, "lr": 4.932629429499436e-06, "epoch": 5.562599049128368, "percentage": 79.47, "elapsed_time": "5:56:38", "remaining_time": "1:32:09"}
|
||||
{"current_steps": 3515, "total_steps": 4417, "loss": 0.3079, "lr": 4.880774711585276e-06, "epoch": 5.570522979397781, "percentage": 79.58, "elapsed_time": "5:57:08", "remaining_time": "1:31:38"}
|
||||
{"current_steps": 3520, "total_steps": 4417, "loss": 0.2844, "lr": 4.829156092841663e-06, "epoch": 5.5784469096671945, "percentage": 79.69, "elapsed_time": "5:57:32", "remaining_time": "1:31:06"}
|
||||
{"current_steps": 3525, "total_steps": 4417, "loss": 0.285, "lr": 4.777774379335885e-06, "epoch": 5.586370839936609, "percentage": 79.81, "elapsed_time": "5:58:06", "remaining_time": "1:30:37"}
|
||||
{"current_steps": 3530, "total_steps": 4417, "loss": 0.2938, "lr": 4.726630373435783e-06, "epoch": 5.594294770206022, "percentage": 79.92, "elapsed_time": "5:58:39", "remaining_time": "1:30:07"}
|
||||
{"current_steps": 3535, "total_steps": 4417, "loss": 0.2926, "lr": 4.675724873797171e-06, "epoch": 5.602218700475436, "percentage": 80.03, "elapsed_time": "5:59:09", "remaining_time": "1:29:36"}
|
||||
{"current_steps": 3540, "total_steps": 4417, "loss": 0.2973, "lr": 4.625058675351415e-06, "epoch": 5.61014263074485, "percentage": 80.14, "elapsed_time": "5:59:39", "remaining_time": "1:29:06"}
|
||||
{"current_steps": 3545, "total_steps": 4417, "loss": 0.283, "lr": 4.5746325692929735e-06, "epoch": 5.618066561014263, "percentage": 80.26, "elapsed_time": "6:00:10", "remaining_time": "1:28:35"}
|
||||
{"current_steps": 3550, "total_steps": 4417, "loss": 0.2785, "lr": 4.524447343067089e-06, "epoch": 5.625990491283677, "percentage": 80.37, "elapsed_time": "6:00:46", "remaining_time": "1:28:06"}
|
||||
{"current_steps": 3555, "total_steps": 4417, "loss": 0.2766, "lr": 4.474503780357435e-06, "epoch": 5.63391442155309, "percentage": 80.48, "elapsed_time": "6:01:15", "remaining_time": "1:27:35"}
|
||||
{"current_steps": 3560, "total_steps": 4417, "loss": 0.2828, "lr": 4.424802661073946e-06, "epoch": 5.6418383518225035, "percentage": 80.6, "elapsed_time": "6:01:42", "remaining_time": "1:27:04"}
|
||||
{"current_steps": 3565, "total_steps": 4417, "loss": 0.2829, "lr": 4.375344761340576e-06, "epoch": 5.649762282091918, "percentage": 80.71, "elapsed_time": "6:02:07", "remaining_time": "1:26:32"}
|
||||
{"current_steps": 3570, "total_steps": 4417, "loss": 0.2937, "lr": 4.326130853483206e-06, "epoch": 5.657686212361331, "percentage": 80.82, "elapsed_time": "6:02:33", "remaining_time": "1:26:01"}
|
||||
{"current_steps": 3575, "total_steps": 4417, "loss": 0.2936, "lr": 4.277161706017605e-06, "epoch": 5.665610142630745, "percentage": 80.94, "elapsed_time": "6:03:01", "remaining_time": "1:25:30"}
|
||||
{"current_steps": 3580, "total_steps": 4417, "loss": 0.2948, "lr": 4.228438083637376e-06, "epoch": 5.673534072900159, "percentage": 81.05, "elapsed_time": "6:03:24", "remaining_time": "1:24:57"}
|
||||
{"current_steps": 3585, "total_steps": 4417, "loss": 0.2872, "lr": 4.179960747202079e-06, "epoch": 5.681458003169572, "percentage": 81.16, "elapsed_time": "6:03:57", "remaining_time": "1:24:27"}
|
||||
{"current_steps": 3590, "total_steps": 4417, "loss": 0.2702, "lr": 4.1317304537252804e-06, "epoch": 5.689381933438986, "percentage": 81.28, "elapsed_time": "6:04:30", "remaining_time": "1:23:58"}
|
||||
{"current_steps": 3595, "total_steps": 4417, "loss": 0.2758, "lr": 4.0837479563627955e-06, "epoch": 5.697305863708399, "percentage": 81.39, "elapsed_time": "6:04:56", "remaining_time": "1:23:26"}
|
||||
{"current_steps": 3600, "total_steps": 4417, "loss": 0.2821, "lr": 4.03601400440089e-06, "epoch": 5.705229793977813, "percentage": 81.5, "elapsed_time": "6:05:26", "remaining_time": "1:22:56"}
|
||||
{"current_steps": 3605, "total_steps": 4417, "loss": 0.2817, "lr": 3.988529343244573e-06, "epoch": 5.713153724247227, "percentage": 81.62, "elapsed_time": "6:05:56", "remaining_time": "1:22:25"}
|
||||
{"current_steps": 3610, "total_steps": 4417, "loss": 0.2943, "lr": 3.941294714406001e-06, "epoch": 5.72107765451664, "percentage": 81.73, "elapsed_time": "6:06:26", "remaining_time": "1:21:55"}
|
||||
{"current_steps": 3615, "total_steps": 4417, "loss": 0.2917, "lr": 3.8943108554928396e-06, "epoch": 5.729001584786054, "percentage": 81.84, "elapsed_time": "6:06:57", "remaining_time": "1:21:24"}
|
||||
{"current_steps": 3620, "total_steps": 4417, "loss": 0.2905, "lr": 3.8475785001968045e-06, "epoch": 5.736925515055468, "percentage": 81.96, "elapsed_time": "6:07:23", "remaining_time": "1:20:53"}
|
||||
{"current_steps": 3625, "total_steps": 4417, "loss": 0.2659, "lr": 3.8010983782821464e-06, "epoch": 5.744849445324881, "percentage": 82.07, "elapsed_time": "6:07:52", "remaining_time": "1:20:22"}
|
||||
{"current_steps": 3630, "total_steps": 4417, "loss": 0.2844, "lr": 3.7548712155743096e-06, "epoch": 5.752773375594295, "percentage": 82.18, "elapsed_time": "6:08:27", "remaining_time": "1:19:53"}
|
||||
{"current_steps": 3635, "total_steps": 4417, "loss": 0.2897, "lr": 3.708897733948551e-06, "epoch": 5.760697305863708, "percentage": 82.3, "elapsed_time": "6:08:47", "remaining_time": "1:19:20"}
|
||||
{"current_steps": 3640, "total_steps": 4417, "loss": 0.2857, "lr": 3.6631786513187017e-06, "epoch": 5.768621236133122, "percentage": 82.41, "elapsed_time": "6:09:19", "remaining_time": "1:18:50"}
|
||||
{"current_steps": 3645, "total_steps": 4417, "loss": 0.2857, "lr": 3.617714681625939e-06, "epoch": 5.776545166402536, "percentage": 82.52, "elapsed_time": "6:09:45", "remaining_time": "1:18:18"}
|
||||
{"current_steps": 3650, "total_steps": 4417, "loss": 0.2964, "lr": 3.5725065348276332e-06, "epoch": 5.784469096671949, "percentage": 82.64, "elapsed_time": "6:10:11", "remaining_time": "1:17:47"}
|
||||
{"current_steps": 3655, "total_steps": 4417, "loss": 0.2971, "lr": 3.5275549168862843e-06, "epoch": 5.792393026941363, "percentage": 82.75, "elapsed_time": "6:10:40", "remaining_time": "1:17:16"}
|
||||
{"current_steps": 3660, "total_steps": 4417, "loss": 0.2861, "lr": 3.482860529758467e-06, "epoch": 5.800316957210777, "percentage": 82.86, "elapsed_time": "6:11:15", "remaining_time": "1:16:47"}
|
||||
{"current_steps": 3665, "total_steps": 4417, "loss": 0.3084, "lr": 3.4384240713839034e-06, "epoch": 5.80824088748019, "percentage": 82.97, "elapsed_time": "6:11:47", "remaining_time": "1:16:17"}
|
||||
{"current_steps": 3670, "total_steps": 4417, "loss": 0.2793, "lr": 3.3942462356745188e-06, "epoch": 5.816164817749604, "percentage": 83.09, "elapsed_time": "6:12:17", "remaining_time": "1:15:46"}
|
||||
{"current_steps": 3675, "total_steps": 4417, "loss": 0.296, "lr": 3.350327712503665e-06, "epoch": 5.824088748019017, "percentage": 83.2, "elapsed_time": "6:12:49", "remaining_time": "1:15:16"}
|
||||
{"current_steps": 3680, "total_steps": 4417, "loss": 0.2761, "lr": 3.306669187695286e-06, "epoch": 5.832012678288431, "percentage": 83.31, "elapsed_time": "6:13:20", "remaining_time": "1:14:46"}
|
||||
{"current_steps": 3685, "total_steps": 4417, "loss": 0.2669, "lr": 3.2632713430132502e-06, "epoch": 5.839936608557845, "percentage": 83.43, "elapsed_time": "6:13:47", "remaining_time": "1:14:15"}
|
||||
{"current_steps": 3690, "total_steps": 4417, "loss": 0.2894, "lr": 3.220134856150692e-06, "epoch": 5.847860538827258, "percentage": 83.54, "elapsed_time": "6:14:19", "remaining_time": "1:13:44"}
|
||||
{"current_steps": 3695, "total_steps": 4417, "loss": 0.295, "lr": 3.177260400719422e-06, "epoch": 5.855784469096672, "percentage": 83.65, "elapsed_time": "6:14:52", "remaining_time": "1:13:15"}
|
||||
{"current_steps": 3700, "total_steps": 4417, "loss": 0.2904, "lr": 3.134648646239422e-06, "epoch": 5.863708399366086, "percentage": 83.77, "elapsed_time": "6:15:19", "remaining_time": "1:12:43"}
|
||||
{"current_steps": 3705, "total_steps": 4417, "loss": 0.28, "lr": 3.092300258128369e-06, "epoch": 5.871632329635499, "percentage": 83.88, "elapsed_time": "6:15:53", "remaining_time": "1:12:14"}
|
||||
{"current_steps": 3710, "total_steps": 4417, "loss": 0.2839, "lr": 3.0502158976912733e-06, "epoch": 5.879556259904913, "percentage": 83.99, "elapsed_time": "6:16:15", "remaining_time": "1:11:42"}
|
||||
{"current_steps": 3715, "total_steps": 4417, "loss": 0.2822, "lr": 3.0083962221101193e-06, "epoch": 5.887480190174326, "percentage": 84.11, "elapsed_time": "6:16:51", "remaining_time": "1:11:12"}
|
||||
{"current_steps": 3720, "total_steps": 4417, "loss": 0.2959, "lr": 2.9668418844336356e-06, "epoch": 5.89540412044374, "percentage": 84.22, "elapsed_time": "6:17:25", "remaining_time": "1:10:42"}
|
||||
{"current_steps": 3725, "total_steps": 4417, "loss": 0.3036, "lr": 2.9255535335670673e-06, "epoch": 5.903328050713154, "percentage": 84.33, "elapsed_time": "6:17:57", "remaining_time": "1:10:12"}
|
||||
{"current_steps": 3730, "total_steps": 4417, "loss": 0.3075, "lr": 2.8845318142620526e-06, "epoch": 5.911251980982567, "percentage": 84.45, "elapsed_time": "6:18:26", "remaining_time": "1:09:42"}
|
||||
{"current_steps": 3735, "total_steps": 4417, "loss": 0.2783, "lr": 2.8437773671065796e-06, "epoch": 5.919175911251981, "percentage": 84.56, "elapsed_time": "6:18:59", "remaining_time": "1:09:12"}
|
||||
{"current_steps": 3740, "total_steps": 4417, "loss": 0.3036, "lr": 2.803290828514935e-06, "epoch": 5.927099841521395, "percentage": 84.67, "elapsed_time": "6:19:30", "remaining_time": "1:08:41"}
|
||||
{"current_steps": 3745, "total_steps": 4417, "loss": 0.2945, "lr": 2.7630728307178166e-06, "epoch": 5.935023771790808, "percentage": 84.79, "elapsed_time": "6:19:57", "remaining_time": "1:08:10"}
|
||||
{"current_steps": 3750, "total_steps": 4417, "loss": 0.3019, "lr": 2.723124001752415e-06, "epoch": 5.942947702060222, "percentage": 84.9, "elapsed_time": "6:20:27", "remaining_time": "1:07:40"}
|
||||
{"current_steps": 3755, "total_steps": 4417, "loss": 0.2946, "lr": 2.6834449654526463e-06, "epoch": 5.950871632329635, "percentage": 85.01, "elapsed_time": "6:20:53", "remaining_time": "1:07:09"}
|
||||
{"current_steps": 3760, "total_steps": 4417, "loss": 0.2809, "lr": 2.6440363414393776e-06, "epoch": 5.958795562599049, "percentage": 85.13, "elapsed_time": "6:21:18", "remaining_time": "1:06:37"}
|
||||
{"current_steps": 3765, "total_steps": 4417, "loss": 0.305, "lr": 2.6048987451107667e-06, "epoch": 5.966719492868463, "percentage": 85.24, "elapsed_time": "6:21:46", "remaining_time": "1:06:06"}
|
||||
{"current_steps": 3770, "total_steps": 4417, "loss": 0.2923, "lr": 2.5660327876326596e-06, "epoch": 5.974643423137876, "percentage": 85.35, "elapsed_time": "6:22:11", "remaining_time": "1:05:35"}
|
||||
{"current_steps": 3775, "total_steps": 4417, "loss": 0.2773, "lr": 2.527439075929028e-06, "epoch": 5.9825673534072905, "percentage": 85.47, "elapsed_time": "6:22:36", "remaining_time": "1:05:04"}
|
||||
{"current_steps": 3780, "total_steps": 4417, "loss": 0.2854, "lr": 2.489118212672512e-06, "epoch": 5.990491283676704, "percentage": 85.58, "elapsed_time": "6:23:11", "remaining_time": "1:04:34"}
|
||||
{"current_steps": 3785, "total_steps": 4417, "loss": 0.2731, "lr": 2.4510707962749813e-06, "epoch": 5.998415213946117, "percentage": 85.69, "elapsed_time": "6:23:41", "remaining_time": "1:04:03"}
|
||||
{"current_steps": 3790, "total_steps": 4417, "loss": 0.2736, "lr": 2.413297420878231e-06, "epoch": 6.006339144215531, "percentage": 85.8, "elapsed_time": "6:24:12", "remaining_time": "1:03:33"}
|
||||
{"current_steps": 3795, "total_steps": 4417, "loss": 0.2809, "lr": 2.375798676344652e-06, "epoch": 6.014263074484944, "percentage": 85.92, "elapsed_time": "6:24:39", "remaining_time": "1:03:02"}
|
||||
{"current_steps": 3800, "total_steps": 4417, "loss": 0.2651, "lr": 2.338575148248077e-06, "epoch": 6.022187004754358, "percentage": 86.03, "elapsed_time": "6:25:09", "remaining_time": "1:02:32"}
|
||||
{"current_steps": 3805, "total_steps": 4417, "loss": 0.2702, "lr": 2.3016274178645826e-06, "epoch": 6.030110935023772, "percentage": 86.14, "elapsed_time": "6:25:34", "remaining_time": "1:02:00"}
|
||||
{"current_steps": 3810, "total_steps": 4417, "loss": 0.282, "lr": 2.264956062163448e-06, "epoch": 6.038034865293185, "percentage": 86.26, "elapsed_time": "6:26:06", "remaining_time": "1:01:30"}
|
||||
{"current_steps": 3815, "total_steps": 4417, "loss": 0.2923, "lr": 2.2285616537981382e-06, "epoch": 6.045958795562599, "percentage": 86.37, "elapsed_time": "6:26:41", "remaining_time": "1:01:01"}
|
||||
{"current_steps": 3820, "total_steps": 4417, "loss": 0.2904, "lr": 2.192444761097341e-06, "epoch": 6.053882725832013, "percentage": 86.48, "elapsed_time": "6:27:09", "remaining_time": "1:00:30"}
|
||||
{"current_steps": 3825, "total_steps": 4417, "loss": 0.285, "lr": 2.1566059480561276e-06, "epoch": 6.061806656101426, "percentage": 86.6, "elapsed_time": "6:27:39", "remaining_time": "0:59:59"}
|
||||
{"current_steps": 3830, "total_steps": 4417, "loss": 0.2865, "lr": 2.1210457743271173e-06, "epoch": 6.06973058637084, "percentage": 86.71, "elapsed_time": "6:28:15", "remaining_time": "0:59:30"}
|
||||
{"current_steps": 3835, "total_steps": 4417, "loss": 0.2707, "lr": 2.085764795211742e-06, "epoch": 6.077654516640253, "percentage": 86.82, "elapsed_time": "6:28:41", "remaining_time": "0:58:59"}
|
||||
{"current_steps": 3840, "total_steps": 4417, "loss": 0.2785, "lr": 2.0507635616515896e-06, "epoch": 6.085578446909667, "percentage": 86.94, "elapsed_time": "6:29:18", "remaining_time": "0:58:29"}
|
||||
{"current_steps": 3845, "total_steps": 4417, "loss": 0.2692, "lr": 2.0160426202197782e-06, "epoch": 6.093502377179081, "percentage": 87.05, "elapsed_time": "6:29:48", "remaining_time": "0:57:59"}
|
||||
{"current_steps": 3850, "total_steps": 4417, "loss": 0.2793, "lr": 1.981602513112446e-06, "epoch": 6.101426307448494, "percentage": 87.16, "elapsed_time": "6:30:16", "remaining_time": "0:57:28"}
|
||||
{"current_steps": 3855, "total_steps": 4417, "loss": 0.2831, "lr": 1.947443778140259e-06, "epoch": 6.1093502377179085, "percentage": 87.28, "elapsed_time": "6:30:49", "remaining_time": "0:56:58"}
|
||||
{"current_steps": 3860, "total_steps": 4417, "loss": 0.2814, "lr": 1.9135669487200404e-06, "epoch": 6.117274167987322, "percentage": 87.39, "elapsed_time": "6:31:13", "remaining_time": "0:56:27"}
|
||||
{"current_steps": 3865, "total_steps": 4417, "loss": 0.2773, "lr": 1.8799725538664138e-06, "epoch": 6.125198098256735, "percentage": 87.5, "elapsed_time": "6:31:41", "remaining_time": "0:55:56"}
|
||||
{"current_steps": 3870, "total_steps": 4417, "loss": 0.2884, "lr": 1.8466611181835504e-06, "epoch": 6.133122028526149, "percentage": 87.62, "elapsed_time": "6:32:08", "remaining_time": "0:55:25"}
|
||||
{"current_steps": 3875, "total_steps": 4417, "loss": 0.2685, "lr": 1.813633161856998e-06, "epoch": 6.141045958795562, "percentage": 87.73, "elapsed_time": "6:32:40", "remaining_time": "0:54:55"}
|
||||
{"current_steps": 3880, "total_steps": 4417, "loss": 0.2696, "lr": 1.7808892006455236e-06, "epoch": 6.148969889064976, "percentage": 87.84, "elapsed_time": "6:33:07", "remaining_time": "0:54:24"}
|
||||
{"current_steps": 3885, "total_steps": 4417, "loss": 0.2679, "lr": 1.7484297458730903e-06, "epoch": 6.15689381933439, "percentage": 87.96, "elapsed_time": "6:33:36", "remaining_time": "0:53:53"}
|
||||
{"current_steps": 3890, "total_steps": 4417, "loss": 0.2765, "lr": 1.7162553044208397e-06, "epoch": 6.164817749603803, "percentage": 88.07, "elapsed_time": "6:34:09", "remaining_time": "0:53:23"}
|
||||
{"current_steps": 3895, "total_steps": 4417, "loss": 0.2566, "lr": 1.6843663787192198e-06, "epoch": 6.1727416798732175, "percentage": 88.18, "elapsed_time": "6:34:42", "remaining_time": "0:52:53"}
|
||||
{"current_steps": 3900, "total_steps": 4417, "loss": 0.2794, "lr": 1.6527634667400905e-06, "epoch": 6.180665610142631, "percentage": 88.3, "elapsed_time": "6:35:13", "remaining_time": "0:52:23"}
|
||||
{"current_steps": 3905, "total_steps": 4417, "loss": 0.2734, "lr": 1.621447061988992e-06, "epoch": 6.188589540412044, "percentage": 88.41, "elapsed_time": "6:35:44", "remaining_time": "0:51:53"}
|
||||
{"current_steps": 3910, "total_steps": 4417, "loss": 0.2785, "lr": 1.5904176534974026e-06, "epoch": 6.196513470681458, "percentage": 88.52, "elapsed_time": "6:36:09", "remaining_time": "0:51:22"}
|
||||
{"current_steps": 3915, "total_steps": 4417, "loss": 0.2644, "lr": 1.559675725815124e-06, "epoch": 6.204437400950872, "percentage": 88.63, "elapsed_time": "6:36:39", "remaining_time": "0:50:51"}
|
||||
{"current_steps": 3920, "total_steps": 4417, "loss": 0.275, "lr": 1.5292217590027126e-06, "epoch": 6.212361331220285, "percentage": 88.75, "elapsed_time": "6:37:18", "remaining_time": "0:50:22"}
|
||||
{"current_steps": 3925, "total_steps": 4417, "loss": 0.2916, "lr": 1.4990562286239651e-06, "epoch": 6.220285261489699, "percentage": 88.86, "elapsed_time": "6:37:48", "remaining_time": "0:49:51"}
|
||||
{"current_steps": 3930, "total_steps": 4417, "loss": 0.2906, "lr": 1.4691796057385222e-06, "epoch": 6.228209191759112, "percentage": 88.97, "elapsed_time": "6:38:15", "remaining_time": "0:49:21"}
|
||||
{"current_steps": 3935, "total_steps": 4417, "loss": 0.2773, "lr": 1.4395923568944792e-06, "epoch": 6.2361331220285265, "percentage": 89.09, "elapsed_time": "6:38:46", "remaining_time": "0:48:50"}
|
||||
{"current_steps": 3940, "total_steps": 4417, "loss": 0.2628, "lr": 1.4102949441211333e-06, "epoch": 6.24405705229794, "percentage": 89.2, "elapsed_time": "6:39:20", "remaining_time": "0:48:20"}
|
||||
{"current_steps": 3945, "total_steps": 4417, "loss": 0.2719, "lr": 1.3812878249217375e-06, "epoch": 6.251980982567353, "percentage": 89.31, "elapsed_time": "6:39:55", "remaining_time": "0:47:50"}
|
||||
{"current_steps": 3950, "total_steps": 4417, "loss": 0.2886, "lr": 1.3525714522663802e-06, "epoch": 6.259904912836767, "percentage": 89.43, "elapsed_time": "6:40:27", "remaining_time": "0:47:20"}
|
||||
{"current_steps": 3955, "total_steps": 4417, "loss": 0.2755, "lr": 1.3241462745849032e-06, "epoch": 6.267828843106181, "percentage": 89.54, "elapsed_time": "6:40:52", "remaining_time": "0:46:49"}
|
||||
{"current_steps": 3960, "total_steps": 4417, "loss": 0.2741, "lr": 1.2960127357598907e-06, "epoch": 6.2757527733755945, "percentage": 89.65, "elapsed_time": "6:41:25", "remaining_time": "0:46:19"}
|
||||
{"current_steps": 3965, "total_steps": 4417, "loss": 0.2656, "lr": 1.2681712751197561e-06, "epoch": 6.283676703645008, "percentage": 89.77, "elapsed_time": "6:41:57", "remaining_time": "0:45:49"}
|
||||
{"current_steps": 3970, "total_steps": 4417, "loss": 0.2889, "lr": 1.2406223274318619e-06, "epoch": 6.291600633914421, "percentage": 89.88, "elapsed_time": "6:42:27", "remaining_time": "0:45:18"}
|
||||
{"current_steps": 3975, "total_steps": 4417, "loss": 0.2672, "lr": 1.2133663228957483e-06, "epoch": 6.2995245641838356, "percentage": 89.99, "elapsed_time": "6:43:00", "remaining_time": "0:44:48"}
|
||||
{"current_steps": 3980, "total_steps": 4417, "loss": 0.2822, "lr": 1.186403687136397e-06, "epoch": 6.307448494453249, "percentage": 90.11, "elapsed_time": "6:43:28", "remaining_time": "0:44:18"}
|
||||
{"current_steps": 3985, "total_steps": 4417, "loss": 0.2835, "lr": 1.1597348411976038e-06, "epoch": 6.315372424722662, "percentage": 90.22, "elapsed_time": "6:43:57", "remaining_time": "0:43:47"}
|
||||
{"current_steps": 3990, "total_steps": 4417, "loss": 0.2593, "lr": 1.1333602015353896e-06, "epoch": 6.323296354992076, "percentage": 90.33, "elapsed_time": "6:44:32", "remaining_time": "0:43:17"}
|
||||
{"current_steps": 3995, "total_steps": 4417, "loss": 0.2783, "lr": 1.1072801800114985e-06, "epoch": 6.33122028526149, "percentage": 90.45, "elapsed_time": "6:45:01", "remaining_time": "0:42:46"}
|
||||
{"current_steps": 4000, "total_steps": 4417, "loss": 0.2937, "lr": 1.0814951838869758e-06, "epoch": 6.3391442155309035, "percentage": 90.56, "elapsed_time": "6:45:38", "remaining_time": "0:42:17"}
|
||||
{"current_steps": 4005, "total_steps": 4417, "loss": 0.258, "lr": 1.0560056158157938e-06, "epoch": 6.347068145800317, "percentage": 90.67, "elapsed_time": "6:46:14", "remaining_time": "0:41:47"}
|
||||
{"current_steps": 4010, "total_steps": 4417, "loss": 0.2742, "lr": 1.0308118738385865e-06, "epoch": 6.35499207606973, "percentage": 90.79, "elapsed_time": "6:46:44", "remaining_time": "0:41:16"}
|
||||
{"current_steps": 4015, "total_steps": 4417, "loss": 0.2864, "lr": 1.0059143513764003e-06, "epoch": 6.362916006339145, "percentage": 90.9, "elapsed_time": "6:47:15", "remaining_time": "0:40:46"}
|
||||
{"current_steps": 4020, "total_steps": 4417, "loss": 0.2712, "lr": 9.81313437224587e-07, "epoch": 6.370839936608558, "percentage": 91.01, "elapsed_time": "6:47:44", "remaining_time": "0:40:16"}
|
||||
{"current_steps": 4025, "total_steps": 4417, "loss": 0.2802, "lr": 9.57009515546703e-07, "epoch": 6.378763866877971, "percentage": 91.13, "elapsed_time": "6:48:17", "remaining_time": "0:39:45"}
|
||||
{"current_steps": 4030, "total_steps": 4417, "loss": 0.2564, "lr": 9.330029658685302e-07, "epoch": 6.386687797147385, "percentage": 91.24, "elapsed_time": "6:48:45", "remaining_time": "0:39:15"}
|
||||
{"current_steps": 4035, "total_steps": 4417, "loss": 0.2943, "lr": 9.092941630721363e-07, "epoch": 6.394611727416799, "percentage": 91.35, "elapsed_time": "6:49:15", "remaining_time": "0:38:44"}
|
||||
{"current_steps": 4040, "total_steps": 4417, "loss": 0.2447, "lr": 8.85883477390026e-07, "epoch": 6.4025356576862125, "percentage": 91.46, "elapsed_time": "6:49:50", "remaining_time": "0:38:14"}
|
||||
{"current_steps": 4045, "total_steps": 4417, "loss": 0.2701, "lr": 8.627712743993655e-07, "epoch": 6.410459587955626, "percentage": 91.58, "elapsed_time": "6:50:22", "remaining_time": "0:37:44"}
|
||||
{"current_steps": 4050, "total_steps": 4417, "loss": 0.273, "lr": 8.399579150162585e-07, "epoch": 6.418383518225039, "percentage": 91.69, "elapsed_time": "6:50:56", "remaining_time": "0:37:14"}
|
||||
{"current_steps": 4055, "total_steps": 4417, "loss": 0.2989, "lr": 8.174437554901304e-07, "epoch": 6.426307448494454, "percentage": 91.8, "elapsed_time": "6:51:31", "remaining_time": "0:36:44"}
|
||||
{"current_steps": 4060, "total_steps": 4417, "loss": 0.2872, "lr": 7.9522914739814e-07, "epoch": 6.434231378763867, "percentage": 91.92, "elapsed_time": "6:52:02", "remaining_time": "0:36:13"}
|
||||
{"current_steps": 4065, "total_steps": 4417, "loss": 0.2785, "lr": 7.733144376397184e-07, "epoch": 6.44215530903328, "percentage": 92.03, "elapsed_time": "6:52:39", "remaining_time": "0:35:44"}
|
||||
{"current_steps": 4070, "total_steps": 4417, "loss": 0.2675, "lr": 7.516999684311277e-07, "epoch": 6.450079239302694, "percentage": 92.14, "elapsed_time": "6:53:05", "remaining_time": "0:35:13"}
|
||||
{"current_steps": 4075, "total_steps": 4417, "loss": 0.2703, "lr": 7.303860773001248e-07, "epoch": 6.458003169572108, "percentage": 92.26, "elapsed_time": "6:53:38", "remaining_time": "0:34:42"}
|
||||
{"current_steps": 4080, "total_steps": 4417, "loss": 0.2966, "lr": 7.093730970807034e-07, "epoch": 6.4659270998415215, "percentage": 92.37, "elapsed_time": "6:54:11", "remaining_time": "0:34:12"}
|
||||
{"current_steps": 4085, "total_steps": 4417, "loss": 0.28, "lr": 6.88661355907867e-07, "epoch": 6.473851030110935, "percentage": 92.48, "elapsed_time": "6:54:41", "remaining_time": "0:33:42"}
|
||||
{"current_steps": 4090, "total_steps": 4417, "loss": 0.2936, "lr": 6.682511772125422e-07, "epoch": 6.481774960380348, "percentage": 92.6, "elapsed_time": "6:55:09", "remaining_time": "0:33:11"}
|
||||
{"current_steps": 4095, "total_steps": 4417, "loss": 0.277, "lr": 6.481428797164957e-07, "epoch": 6.489698890649763, "percentage": 92.71, "elapsed_time": "6:55:41", "remaining_time": "0:32:41"}
|
||||
{"current_steps": 4100, "total_steps": 4417, "loss": 0.2678, "lr": 6.283367774273785e-07, "epoch": 6.497622820919176, "percentage": 92.82, "elapsed_time": "6:56:14", "remaining_time": "0:32:10"}
|
||||
{"current_steps": 4105, "total_steps": 4417, "loss": 0.2762, "lr": 6.088331796338032e-07, "epoch": 6.505546751188589, "percentage": 92.94, "elapsed_time": "6:56:43", "remaining_time": "0:31:40"}
|
||||
{"current_steps": 4110, "total_steps": 4417, "loss": 0.2658, "lr": 5.896323909005408e-07, "epoch": 6.513470681458003, "percentage": 93.05, "elapsed_time": "6:57:11", "remaining_time": "0:31:09"}
|
||||
{"current_steps": 4115, "total_steps": 4417, "loss": 0.2877, "lr": 5.707347110637363e-07, "epoch": 6.521394611727417, "percentage": 93.16, "elapsed_time": "6:57:43", "remaining_time": "0:30:39"}
|
||||
{"current_steps": 4120, "total_steps": 4417, "loss": 0.2755, "lr": 5.521404352262427e-07, "epoch": 6.5293185419968305, "percentage": 93.28, "elapsed_time": "6:58:14", "remaining_time": "0:30:09"}
|
||||
{"current_steps": 4125, "total_steps": 4417, "loss": 0.2764, "lr": 5.338498537530146e-07, "epoch": 6.537242472266244, "percentage": 93.39, "elapsed_time": "6:58:42", "remaining_time": "0:29:38"}
|
||||
{"current_steps": 4130, "total_steps": 4417, "loss": 0.306, "lr": 5.158632522665619e-07, "epoch": 6.545166402535658, "percentage": 93.5, "elapsed_time": "6:59:10", "remaining_time": "0:29:07"}
|
||||
{"current_steps": 4135, "total_steps": 4417, "loss": 0.2783, "lr": 4.981809116424985e-07, "epoch": 6.553090332805072, "percentage": 93.62, "elapsed_time": "6:59:39", "remaining_time": "0:28:37"}
|
||||
{"current_steps": 4140, "total_steps": 4417, "loss": 0.2749, "lr": 4.808031080051545e-07, "epoch": 6.561014263074485, "percentage": 93.73, "elapsed_time": "7:00:11", "remaining_time": "0:28:06"}
|
||||
{"current_steps": 4145, "total_steps": 4417, "loss": 0.2693, "lr": 4.637301127232663e-07, "epoch": 6.568938193343898, "percentage": 93.84, "elapsed_time": "7:00:40", "remaining_time": "0:27:36"}
|
||||
{"current_steps": 4150, "total_steps": 4417, "loss": 0.2796, "lr": 4.4696219240573327e-07, "epoch": 6.576862123613312, "percentage": 93.96, "elapsed_time": "7:01:06", "remaining_time": "0:27:05"}
|
||||
{"current_steps": 4155, "total_steps": 4417, "loss": 0.2898, "lr": 4.3049960889745666e-07, "epoch": 6.584786053882726, "percentage": 94.07, "elapsed_time": "7:01:39", "remaining_time": "0:26:35"}
|
||||
{"current_steps": 4160, "total_steps": 4417, "loss": 0.2723, "lr": 4.143426192752542e-07, "epoch": 6.5927099841521395, "percentage": 94.18, "elapsed_time": "7:02:11", "remaining_time": "0:26:04"}
|
||||
{"current_steps": 4165, "total_steps": 4417, "loss": 0.2995, "lr": 3.984914758438407e-07, "epoch": 6.600633914421553, "percentage": 94.29, "elapsed_time": "7:02:41", "remaining_time": "0:25:34"}
|
||||
{"current_steps": 4170, "total_steps": 4417, "loss": 0.2902, "lr": 3.8294642613189603e-07, "epoch": 6.608557844690967, "percentage": 94.41, "elapsed_time": "7:03:11", "remaining_time": "0:25:04"}
|
||||
{"current_steps": 4175, "total_steps": 4417, "loss": 0.2814, "lr": 3.6770771288818563e-07, "epoch": 6.616481774960381, "percentage": 94.52, "elapsed_time": "7:03:38", "remaining_time": "0:24:33"}
|
||||
{"current_steps": 4180, "total_steps": 4417, "loss": 0.2727, "lr": 3.5277557407778605e-07, "epoch": 6.624405705229794, "percentage": 94.63, "elapsed_time": "7:04:11", "remaining_time": "0:24:03"}
|
||||
{"current_steps": 4185, "total_steps": 4417, "loss": 0.2931, "lr": 3.3815024287835895e-07, "epoch": 6.632329635499207, "percentage": 94.75, "elapsed_time": "7:04:33", "remaining_time": "0:23:32"}
|
||||
{"current_steps": 4190, "total_steps": 4417, "loss": 0.2763, "lr": 3.2383194767650947e-07, "epoch": 6.640253565768621, "percentage": 94.86, "elapsed_time": "7:04:59", "remaining_time": "0:23:01"}
|
||||
{"current_steps": 4195, "total_steps": 4417, "loss": 0.2866, "lr": 3.098209120642248e-07, "epoch": 6.648177496038035, "percentage": 94.97, "elapsed_time": "7:05:27", "remaining_time": "0:22:30"}
|
||||
{"current_steps": 4200, "total_steps": 4417, "loss": 0.2793, "lr": 2.9611735483537684e-07, "epoch": 6.6561014263074485, "percentage": 95.09, "elapsed_time": "7:05:59", "remaining_time": "0:22:00"}
|
||||
{"current_steps": 4205, "total_steps": 4417, "loss": 0.2888, "lr": 2.827214899823183e-07, "epoch": 6.664025356576862, "percentage": 95.2, "elapsed_time": "7:06:28", "remaining_time": "0:21:30"}
|
||||
{"current_steps": 4210, "total_steps": 4417, "loss": 0.2831, "lr": 2.6963352669252095e-07, "epoch": 6.671949286846276, "percentage": 95.31, "elapsed_time": "7:06:54", "remaining_time": "0:20:59"}
|
||||
{"current_steps": 4215, "total_steps": 4417, "loss": 0.2754, "lr": 2.568536693453272e-07, "epoch": 6.67987321711569, "percentage": 95.43, "elapsed_time": "7:07:26", "remaining_time": "0:20:29"}
|
||||
{"current_steps": 4220, "total_steps": 4417, "loss": 0.2713, "lr": 2.443821175087435e-07, "epoch": 6.687797147385103, "percentage": 95.54, "elapsed_time": "7:07:59", "remaining_time": "0:19:58"}
|
||||
{"current_steps": 4225, "total_steps": 4417, "loss": 0.2712, "lr": 2.3221906593634102e-07, "epoch": 6.695721077654516, "percentage": 95.65, "elapsed_time": "7:08:35", "remaining_time": "0:19:28"}
|
||||
{"current_steps": 4230, "total_steps": 4417, "loss": 0.2889, "lr": 2.2036470456419767e-07, "epoch": 6.70364500792393, "percentage": 95.77, "elapsed_time": "7:09:06", "remaining_time": "0:18:58"}
|
||||
{"current_steps": 4235, "total_steps": 4417, "loss": 0.2789, "lr": 2.088192185079385e-07, "epoch": 6.711568938193344, "percentage": 95.88, "elapsed_time": "7:09:40", "remaining_time": "0:18:27"}
|
||||
{"current_steps": 4240, "total_steps": 4417, "loss": 0.2838, "lr": 1.9758278805985574e-07, "epoch": 6.7194928684627575, "percentage": 95.99, "elapsed_time": "7:10:17", "remaining_time": "0:17:57"}
|
||||
{"current_steps": 4245, "total_steps": 4417, "loss": 0.2734, "lr": 1.8665558868607325e-07, "epoch": 6.727416798732171, "percentage": 96.11, "elapsed_time": "7:10:47", "remaining_time": "0:17:27"}
|
||||
{"current_steps": 4250, "total_steps": 4417, "loss": 0.2862, "lr": 1.760377910238198e-07, "epoch": 6.735340729001585, "percentage": 96.22, "elapsed_time": "7:11:21", "remaining_time": "0:16:56"}
|
||||
{"current_steps": 4255, "total_steps": 4417, "loss": 0.2709, "lr": 1.6572956087876014e-07, "epoch": 6.743264659270999, "percentage": 96.33, "elapsed_time": "7:11:55", "remaining_time": "0:16:26"}
|
||||
{"current_steps": 4260, "total_steps": 4417, "loss": 0.2922, "lr": 1.5573105922241037e-07, "epoch": 6.751188589540412, "percentage": 96.45, "elapsed_time": "7:12:21", "remaining_time": "0:15:56"}
|
||||
{"current_steps": 4265, "total_steps": 4417, "loss": 0.2755, "lr": 1.4604244218962005e-07, "epoch": 6.759112519809825, "percentage": 96.56, "elapsed_time": "7:12:53", "remaining_time": "0:15:25"}
|
||||
{"current_steps": 4270, "total_steps": 4417, "loss": 0.2675, "lr": 1.3666386107613173e-07, "epoch": 6.767036450079239, "percentage": 96.67, "elapsed_time": "7:13:27", "remaining_time": "0:14:55"}
|
||||
{"current_steps": 4275, "total_steps": 4417, "loss": 0.3002, "lr": 1.2759546233622299e-07, "epoch": 6.774960380348653, "percentage": 96.79, "elapsed_time": "7:13:56", "remaining_time": "0:14:24"}
|
||||
{"current_steps": 4280, "total_steps": 4417, "loss": 0.2893, "lr": 1.1883738758041941e-07, "epoch": 6.7828843106180665, "percentage": 96.9, "elapsed_time": "7:14:26", "remaining_time": "0:13:54"}
|
||||
{"current_steps": 4285, "total_steps": 4417, "loss": 0.2871, "lr": 1.1038977357328063e-07, "epoch": 6.79080824088748, "percentage": 97.01, "elapsed_time": "7:15:02", "remaining_time": "0:13:24"}
|
||||
{"current_steps": 4290, "total_steps": 4417, "loss": 0.2731, "lr": 1.0225275223126219e-07, "epoch": 6.798732171156894, "percentage": 97.12, "elapsed_time": "7:15:32", "remaining_time": "0:12:53"}
|
||||
{"current_steps": 4295, "total_steps": 4417, "loss": 0.2726, "lr": 9.442645062066602e-08, "epoch": 6.806656101426308, "percentage": 97.24, "elapsed_time": "7:15:59", "remaining_time": "0:12:23"}
|
||||
{"current_steps": 4300, "total_steps": 4417, "loss": 0.2849, "lr": 8.691099095564426e-08, "epoch": 6.814580031695721, "percentage": 97.35, "elapsed_time": "7:16:30", "remaining_time": "0:11:52"}
|
||||
{"current_steps": 4305, "total_steps": 4417, "loss": 0.3006, "lr": 7.970649059629853e-08, "epoch": 6.822503961965134, "percentage": 97.46, "elapsed_time": "7:16:59", "remaining_time": "0:11:22"}
|
||||
{"current_steps": 4310, "total_steps": 4417, "loss": 0.2888, "lr": 7.281306204684147e-08, "epoch": 6.830427892234549, "percentage": 97.58, "elapsed_time": "7:17:24", "remaining_time": "0:10:51"}
|
||||
{"current_steps": 4315, "total_steps": 4417, "loss": 0.292, "lr": 6.623081295384248e-08, "epoch": 6.838351822503962, "percentage": 97.69, "elapsed_time": "7:17:53", "remaining_time": "0:10:21"}
|
||||
{"current_steps": 4320, "total_steps": 4417, "loss": 0.2763, "lr": 5.995984610455141e-08, "epoch": 6.8462757527733755, "percentage": 97.8, "elapsed_time": "7:18:28", "remaining_time": "0:09:50"}
|
||||
{"current_steps": 4325, "total_steps": 4417, "loss": 0.2584, "lr": 5.4000259425286415e-08, "epoch": 6.854199683042789, "percentage": 97.92, "elapsed_time": "7:18:52", "remaining_time": "0:09:20"}
|
||||
{"current_steps": 4330, "total_steps": 4417, "loss": 0.2671, "lr": 4.835214597990856e-08, "epoch": 6.862123613312203, "percentage": 98.03, "elapsed_time": "7:19:26", "remaining_time": "0:08:49"}
|
||||
{"current_steps": 4335, "total_steps": 4417, "loss": 0.2752, "lr": 4.301559396836519e-08, "epoch": 6.870047543581617, "percentage": 98.14, "elapsed_time": "7:19:47", "remaining_time": "0:08:19"}
|
||||
{"current_steps": 4340, "total_steps": 4417, "loss": 0.2875, "lr": 3.799068672531769e-08, "epoch": 6.87797147385103, "percentage": 98.26, "elapsed_time": "7:20:13", "remaining_time": "0:07:48"}
|
||||
{"current_steps": 4345, "total_steps": 4417, "loss": 0.2829, "lr": 3.3277502718835896e-08, "epoch": 6.885895404120443, "percentage": 98.37, "elapsed_time": "7:20:41", "remaining_time": "0:07:18"}
|
||||
{"current_steps": 4350, "total_steps": 4417, "loss": 0.2724, "lr": 2.8876115549176796e-08, "epoch": 6.893819334389858, "percentage": 98.48, "elapsed_time": "7:21:10", "remaining_time": "0:06:47"}
|
||||
{"current_steps": 4355, "total_steps": 4417, "loss": 0.2854, "lr": 2.4786593947625503e-08, "epoch": 6.901743264659271, "percentage": 98.6, "elapsed_time": "7:21:39", "remaining_time": "0:06:17"}
|
||||
{"current_steps": 4360, "total_steps": 4417, "loss": 0.2749, "lr": 2.1009001775440518e-08, "epoch": 6.9096671949286845, "percentage": 98.71, "elapsed_time": "7:22:06", "remaining_time": "0:05:46"}
|
||||
{"current_steps": 4365, "total_steps": 4417, "loss": 0.2632, "lr": 1.7543398022832337e-08, "epoch": 6.917591125198098, "percentage": 98.82, "elapsed_time": "7:22:41", "remaining_time": "0:05:16"}
|
||||
{"current_steps": 4370, "total_steps": 4417, "loss": 0.2851, "lr": 1.4389836808066382e-08, "epoch": 6.925515055467512, "percentage": 98.94, "elapsed_time": "7:23:17", "remaining_time": "0:04:46"}
|
||||
{"current_steps": 4375, "total_steps": 4417, "loss": 0.291, "lr": 1.1548367376599257e-08, "epoch": 6.933438985736926, "percentage": 99.05, "elapsed_time": "7:23:49", "remaining_time": "0:04:15"}
|
||||
{"current_steps": 4380, "total_steps": 4417, "loss": 0.2756, "lr": 9.019034100319347e-09, "epoch": 6.941362916006339, "percentage": 99.16, "elapsed_time": "7:24:14", "remaining_time": "0:03:45"}
|
||||
{"current_steps": 4385, "total_steps": 4417, "loss": 0.2754, "lr": 6.801876476854041e-09, "epoch": 6.949286846275752, "percentage": 99.28, "elapsed_time": "7:24:40", "remaining_time": "0:03:14"}
|
||||
{"current_steps": 4390, "total_steps": 4417, "loss": 0.2924, "lr": 4.896929128943573e-09, "epoch": 6.957210776545167, "percentage": 99.39, "elapsed_time": "7:25:01", "remaining_time": "0:02:44"}
|
||||
{"current_steps": 4395, "total_steps": 4417, "loss": 0.284, "lr": 3.3042218039147643e-09, "epoch": 6.96513470681458, "percentage": 99.5, "elapsed_time": "7:25:32", "remaining_time": "0:02:13"}
|
||||
{"current_steps": 4400, "total_steps": 4417, "loss": 0.2665, "lr": 2.0237793732036384e-09, "epoch": 6.9730586370839935, "percentage": 99.62, "elapsed_time": "7:26:01", "remaining_time": "0:01:43"}
|
||||
{"current_steps": 4405, "total_steps": 4417, "loss": 0.2711, "lr": 1.0556218319757172e-09, "epoch": 6.980982567353407, "percentage": 99.73, "elapsed_time": "7:26:27", "remaining_time": "0:01:12"}
|
||||
{"current_steps": 4410, "total_steps": 4417, "loss": 0.2734, "lr": 3.997642988107231e-10, "epoch": 6.988906497622821, "percentage": 99.84, "elapsed_time": "7:26:58", "remaining_time": "0:00:42"}
|
||||
{"current_steps": 4415, "total_steps": 4417, "loss": 0.2769, "lr": 5.62170154649877e-11, "epoch": 6.996830427892235, "percentage": 99.95, "elapsed_time": "7:27:31", "remaining_time": "0:00:12"}
|
||||
{"current_steps": 4417, "total_steps": 4417, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "7:27:46", "remaining_time": "0:00:00"}
|
||||
9760
trainer_state.json
Normal file
9760
trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8224339c3cfd2c96331fdb68431ce90fc4d1dd160374a59a9ad6842b4a94463a
|
||||
size 8593
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 43 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user