初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/a1-nemotron_bash_withtests Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: other
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: sft_a1_nemotron_bash_withtests__Qwen3-8B
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# sft_a1_nemotron_bash_withtests__Qwen3-8B
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--exp_rpt_nemotron-bash-withtests_glm_4.7_traces_jupiter/snapshots/715628be9b8527ffb9e5318c14acf0fbd3077e50_thinking_preprocessed dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 128
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.1+cu130
|
||||
- Datasets 4.7.0
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.002699137637295694,
|
||||
"achieved_tflops_per_gpu_theoretical": 613.2107258709934,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.15148700773715973,
|
||||
"mfu_percent": 0.00019075177648732817,
|
||||
"mfu_percent_theoretical": 43.33644705802074,
|
||||
"total_flos": 883859848232960.0,
|
||||
"train_loss": 0.2488628376376909,
|
||||
"train_runtime": 20466.2555,
|
||||
"train_samples_per_second": 2.895,
|
||||
"train_steps_per_second": 0.181,
|
||||
"valid_targets_mean": 2503.4,
|
||||
"valid_targets_min": 435
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:879e4205eb7d0867306f6f13895796934c4718ff59a5c0ef5d1b5355502ef960
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d562ed72af4f8b190ccefd3a72b75d529a195c043174c5425df7d9a120141812
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8f0fe2183bc49040f6c3c94936d43921b9395b7a7e88f9fea90c282f6dfadd45
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1702bb685c91ecbd934b60e1875aac4c3e5f4bfad07c1e563fac147b450da520
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "715628be9b8527ffb9e5318c14acf0fbd3077e50_thinking_preprocessed",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "raoof1",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--exp_rpt_nemotron-bash-withtests_glm_4.7_traces_jupiter/snapshots/715628be9b8527ffb9e5318c14acf0fbd3077e50_thinking_preprocessed",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/DCAgent/a1-nemotron_bash_withtests/blob/main/config.json",
|
||||
"wandb_link": null,
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
||||
size 11422654
|
||||
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.002699137637295694,
|
||||
"achieved_tflops_per_gpu_theoretical": 613.2107258709934,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.15148700773715973,
|
||||
"mfu_percent": 0.00019075177648732817,
|
||||
"mfu_percent_theoretical": 43.33644705802074,
|
||||
"total_flos": 883859848232960.0,
|
||||
"train_loss": 0.2488628376376909,
|
||||
"train_runtime": 20466.2555,
|
||||
"train_samples_per_second": 2.895,
|
||||
"train_steps_per_second": 0.181,
|
||||
"valid_targets_mean": 2503.4,
|
||||
"valid_targets_min": 435
|
||||
}
|
||||
741
trainer_log.jsonl
Normal file
741
trainer_log.jsonl
Normal file
@@ -0,0 +1,741 @@
|
||||
{"current_steps": 5, "total_steps": 3703, "loss": 0.7184, "lr": 4.3126684636118604e-07, "epoch": 0.00945179584120983, "percentage": 0.14, "elapsed_time": "0:00:42", "remaining_time": "8:44:09"}
|
||||
{"current_steps": 10, "total_steps": 3703, "loss": 0.7439, "lr": 9.703504043126686e-07, "epoch": 0.01890359168241966, "percentage": 0.27, "elapsed_time": "0:01:19", "remaining_time": "8:06:23"}
|
||||
{"current_steps": 15, "total_steps": 3703, "loss": 0.683, "lr": 1.509433962264151e-06, "epoch": 0.02835538752362949, "percentage": 0.41, "elapsed_time": "0:01:48", "remaining_time": "7:23:40"}
|
||||
{"current_steps": 20, "total_steps": 3703, "loss": 0.6521, "lr": 2.0485175202156334e-06, "epoch": 0.03780718336483932, "percentage": 0.54, "elapsed_time": "0:02:22", "remaining_time": "7:17:01"}
|
||||
{"current_steps": 25, "total_steps": 3703, "loss": 0.6232, "lr": 2.587601078167116e-06, "epoch": 0.04725897920604915, "percentage": 0.68, "elapsed_time": "0:02:48", "remaining_time": "6:52:38"}
|
||||
{"current_steps": 30, "total_steps": 3703, "loss": 0.541, "lr": 3.126684636118599e-06, "epoch": 0.05671077504725898, "percentage": 0.81, "elapsed_time": "0:03:13", "remaining_time": "6:34:11"}
|
||||
{"current_steps": 35, "total_steps": 3703, "loss": 0.5206, "lr": 3.665768194070081e-06, "epoch": 0.0661625708884688, "percentage": 0.95, "elapsed_time": "0:03:43", "remaining_time": "6:30:35"}
|
||||
{"current_steps": 40, "total_steps": 3703, "loss": 0.4649, "lr": 4.204851752021563e-06, "epoch": 0.07561436672967864, "percentage": 1.08, "elapsed_time": "0:04:12", "remaining_time": "6:25:41"}
|
||||
{"current_steps": 45, "total_steps": 3703, "loss": 0.5114, "lr": 4.7439353099730466e-06, "epoch": 0.08506616257088846, "percentage": 1.22, "elapsed_time": "0:04:43", "remaining_time": "6:24:44"}
|
||||
{"current_steps": 50, "total_steps": 3703, "loss": 0.488, "lr": 5.283018867924529e-06, "epoch": 0.0945179584120983, "percentage": 1.35, "elapsed_time": "0:05:10", "remaining_time": "6:18:11"}
|
||||
{"current_steps": 55, "total_steps": 3703, "loss": 0.4683, "lr": 5.822102425876012e-06, "epoch": 0.10396975425330812, "percentage": 1.49, "elapsed_time": "0:05:37", "remaining_time": "6:13:34"}
|
||||
{"current_steps": 60, "total_steps": 3703, "loss": 0.4784, "lr": 6.3611859838274934e-06, "epoch": 0.11342155009451796, "percentage": 1.62, "elapsed_time": "0:06:03", "remaining_time": "6:08:14"}
|
||||
{"current_steps": 65, "total_steps": 3703, "loss": 0.4596, "lr": 6.9002695417789766e-06, "epoch": 0.12287334593572778, "percentage": 1.76, "elapsed_time": "0:06:33", "remaining_time": "6:06:57"}
|
||||
{"current_steps": 70, "total_steps": 3703, "loss": 0.4469, "lr": 7.439353099730459e-06, "epoch": 0.1323251417769376, "percentage": 1.89, "elapsed_time": "0:07:02", "remaining_time": "6:05:19"}
|
||||
{"current_steps": 75, "total_steps": 3703, "loss": 0.4482, "lr": 7.978436657681942e-06, "epoch": 0.14177693761814744, "percentage": 2.03, "elapsed_time": "0:07:26", "remaining_time": "5:59:37"}
|
||||
{"current_steps": 80, "total_steps": 3703, "loss": 0.4537, "lr": 8.517520215633423e-06, "epoch": 0.15122873345935728, "percentage": 2.16, "elapsed_time": "0:07:49", "remaining_time": "5:54:09"}
|
||||
{"current_steps": 85, "total_steps": 3703, "loss": 0.4243, "lr": 9.056603773584907e-06, "epoch": 0.16068052930056712, "percentage": 2.3, "elapsed_time": "0:08:17", "remaining_time": "5:52:47"}
|
||||
{"current_steps": 90, "total_steps": 3703, "loss": 0.4334, "lr": 9.595687331536388e-06, "epoch": 0.17013232514177692, "percentage": 2.43, "elapsed_time": "0:08:50", "remaining_time": "5:55:04"}
|
||||
{"current_steps": 95, "total_steps": 3703, "loss": 0.4451, "lr": 1.0134770889487871e-05, "epoch": 0.17958412098298676, "percentage": 2.57, "elapsed_time": "0:09:12", "remaining_time": "5:49:37"}
|
||||
{"current_steps": 100, "total_steps": 3703, "loss": 0.4145, "lr": 1.0673854447439354e-05, "epoch": 0.1890359168241966, "percentage": 2.7, "elapsed_time": "0:09:40", "remaining_time": "5:48:31"}
|
||||
{"current_steps": 105, "total_steps": 3703, "loss": 0.3712, "lr": 1.1212938005390836e-05, "epoch": 0.19848771266540643, "percentage": 2.84, "elapsed_time": "0:10:04", "remaining_time": "5:45:14"}
|
||||
{"current_steps": 110, "total_steps": 3703, "loss": 0.4319, "lr": 1.1752021563342319e-05, "epoch": 0.20793950850661624, "percentage": 2.97, "elapsed_time": "0:10:31", "remaining_time": "5:44:00"}
|
||||
{"current_steps": 115, "total_steps": 3703, "loss": 0.3874, "lr": 1.2291105121293802e-05, "epoch": 0.21739130434782608, "percentage": 3.11, "elapsed_time": "0:10:55", "remaining_time": "5:40:57"}
|
||||
{"current_steps": 120, "total_steps": 3703, "loss": 0.4089, "lr": 1.2830188679245283e-05, "epoch": 0.22684310018903592, "percentage": 3.24, "elapsed_time": "0:11:28", "remaining_time": "5:42:36"}
|
||||
{"current_steps": 125, "total_steps": 3703, "loss": 0.3993, "lr": 1.3369272237196767e-05, "epoch": 0.23629489603024575, "percentage": 3.38, "elapsed_time": "0:11:54", "remaining_time": "5:40:54"}
|
||||
{"current_steps": 130, "total_steps": 3703, "loss": 0.3659, "lr": 1.390835579514825e-05, "epoch": 0.24574669187145556, "percentage": 3.51, "elapsed_time": "0:12:21", "remaining_time": "5:39:41"}
|
||||
{"current_steps": 135, "total_steps": 3703, "loss": 0.3523, "lr": 1.4447439353099733e-05, "epoch": 0.2551984877126654, "percentage": 3.65, "elapsed_time": "0:12:50", "remaining_time": "5:39:12"}
|
||||
{"current_steps": 140, "total_steps": 3703, "loss": 0.36, "lr": 1.4986522911051213e-05, "epoch": 0.2646502835538752, "percentage": 3.78, "elapsed_time": "0:13:12", "remaining_time": "5:36:18"}
|
||||
{"current_steps": 145, "total_steps": 3703, "loss": 0.3809, "lr": 1.5525606469002698e-05, "epoch": 0.2741020793950851, "percentage": 3.92, "elapsed_time": "0:13:43", "remaining_time": "5:36:35"}
|
||||
{"current_steps": 150, "total_steps": 3703, "loss": 0.3522, "lr": 1.606469002695418e-05, "epoch": 0.2835538752362949, "percentage": 4.05, "elapsed_time": "0:14:07", "remaining_time": "5:34:32"}
|
||||
{"current_steps": 155, "total_steps": 3703, "loss": 0.3396, "lr": 1.6603773584905664e-05, "epoch": 0.29300567107750475, "percentage": 4.19, "elapsed_time": "0:14:32", "remaining_time": "5:32:44"}
|
||||
{"current_steps": 160, "total_steps": 3703, "loss": 0.3842, "lr": 1.7142857142857142e-05, "epoch": 0.30245746691871456, "percentage": 4.32, "elapsed_time": "0:14:56", "remaining_time": "5:30:54"}
|
||||
{"current_steps": 165, "total_steps": 3703, "loss": 0.3282, "lr": 1.7681940700808627e-05, "epoch": 0.31190926275992437, "percentage": 4.46, "elapsed_time": "0:15:24", "remaining_time": "5:30:16"}
|
||||
{"current_steps": 170, "total_steps": 3703, "loss": 0.3299, "lr": 1.8221024258760108e-05, "epoch": 0.32136105860113423, "percentage": 4.59, "elapsed_time": "0:15:51", "remaining_time": "5:29:34"}
|
||||
{"current_steps": 175, "total_steps": 3703, "loss": 0.3572, "lr": 1.8760107816711593e-05, "epoch": 0.33081285444234404, "percentage": 4.73, "elapsed_time": "0:16:17", "remaining_time": "5:28:18"}
|
||||
{"current_steps": 180, "total_steps": 3703, "loss": 0.3976, "lr": 1.9299191374663074e-05, "epoch": 0.34026465028355385, "percentage": 4.86, "elapsed_time": "0:16:43", "remaining_time": "5:27:13"}
|
||||
{"current_steps": 185, "total_steps": 3703, "loss": 0.3248, "lr": 1.9838274932614556e-05, "epoch": 0.3497164461247637, "percentage": 5.0, "elapsed_time": "0:17:07", "remaining_time": "5:25:42"}
|
||||
{"current_steps": 190, "total_steps": 3703, "loss": 0.3515, "lr": 2.037735849056604e-05, "epoch": 0.3591682419659735, "percentage": 5.13, "elapsed_time": "0:17:31", "remaining_time": "5:24:05"}
|
||||
{"current_steps": 195, "total_steps": 3703, "loss": 0.3805, "lr": 2.0916442048517522e-05, "epoch": 0.3686200378071834, "percentage": 5.27, "elapsed_time": "0:18:02", "remaining_time": "5:24:36"}
|
||||
{"current_steps": 200, "total_steps": 3703, "loss": 0.3483, "lr": 2.1455525606469007e-05, "epoch": 0.3780718336483932, "percentage": 5.4, "elapsed_time": "0:18:25", "remaining_time": "5:22:46"}
|
||||
{"current_steps": 205, "total_steps": 3703, "loss": 0.3643, "lr": 2.199460916442049e-05, "epoch": 0.387523629489603, "percentage": 5.54, "elapsed_time": "0:18:53", "remaining_time": "5:22:26"}
|
||||
{"current_steps": 210, "total_steps": 3703, "loss": 0.3351, "lr": 2.253369272237197e-05, "epoch": 0.39697542533081287, "percentage": 5.67, "elapsed_time": "0:19:18", "remaining_time": "5:21:17"}
|
||||
{"current_steps": 215, "total_steps": 3703, "loss": 0.3531, "lr": 2.307277628032345e-05, "epoch": 0.4064272211720227, "percentage": 5.81, "elapsed_time": "0:19:46", "remaining_time": "5:20:41"}
|
||||
{"current_steps": 220, "total_steps": 3703, "loss": 0.3261, "lr": 2.3611859838274933e-05, "epoch": 0.4158790170132325, "percentage": 5.94, "elapsed_time": "0:20:12", "remaining_time": "5:20:03"}
|
||||
{"current_steps": 225, "total_steps": 3703, "loss": 0.3433, "lr": 2.4150943396226418e-05, "epoch": 0.42533081285444235, "percentage": 6.08, "elapsed_time": "0:20:35", "remaining_time": "5:18:24"}
|
||||
{"current_steps": 230, "total_steps": 3703, "loss": 0.292, "lr": 2.46900269541779e-05, "epoch": 0.43478260869565216, "percentage": 6.21, "elapsed_time": "0:20:55", "remaining_time": "5:16:04"}
|
||||
{"current_steps": 235, "total_steps": 3703, "loss": 0.3006, "lr": 2.5229110512129384e-05, "epoch": 0.444234404536862, "percentage": 6.35, "elapsed_time": "0:21:20", "remaining_time": "5:14:53"}
|
||||
{"current_steps": 240, "total_steps": 3703, "loss": 0.3327, "lr": 2.5768194070080865e-05, "epoch": 0.45368620037807184, "percentage": 6.48, "elapsed_time": "0:21:46", "remaining_time": "5:14:10"}
|
||||
{"current_steps": 245, "total_steps": 3703, "loss": 0.2847, "lr": 2.6307277628032347e-05, "epoch": 0.46313799621928164, "percentage": 6.62, "elapsed_time": "0:22:07", "remaining_time": "5:12:18"}
|
||||
{"current_steps": 250, "total_steps": 3703, "loss": 0.3253, "lr": 2.684636118598383e-05, "epoch": 0.4725897920604915, "percentage": 6.75, "elapsed_time": "0:22:36", "remaining_time": "5:12:16"}
|
||||
{"current_steps": 255, "total_steps": 3703, "loss": 0.4052, "lr": 2.7385444743935313e-05, "epoch": 0.4820415879017013, "percentage": 6.89, "elapsed_time": "0:23:10", "remaining_time": "5:13:17"}
|
||||
{"current_steps": 260, "total_steps": 3703, "loss": 0.3332, "lr": 2.7924528301886794e-05, "epoch": 0.4914933837429111, "percentage": 7.02, "elapsed_time": "0:23:35", "remaining_time": "5:12:18"}
|
||||
{"current_steps": 265, "total_steps": 3703, "loss": 0.3529, "lr": 2.8463611859838276e-05, "epoch": 0.500945179584121, "percentage": 7.16, "elapsed_time": "0:24:07", "remaining_time": "5:12:58"}
|
||||
{"current_steps": 270, "total_steps": 3703, "loss": 0.3252, "lr": 2.9002695417789757e-05, "epoch": 0.5103969754253308, "percentage": 7.29, "elapsed_time": "0:24:41", "remaining_time": "5:13:58"}
|
||||
{"current_steps": 275, "total_steps": 3703, "loss": 0.3454, "lr": 2.9541778975741242e-05, "epoch": 0.5198487712665406, "percentage": 7.43, "elapsed_time": "0:25:19", "remaining_time": "5:15:38"}
|
||||
{"current_steps": 280, "total_steps": 3703, "loss": 0.3583, "lr": 3.0080862533692724e-05, "epoch": 0.5293005671077504, "percentage": 7.56, "elapsed_time": "0:25:46", "remaining_time": "5:15:01"}
|
||||
{"current_steps": 285, "total_steps": 3703, "loss": 0.3635, "lr": 3.061994609164421e-05, "epoch": 0.5387523629489603, "percentage": 7.7, "elapsed_time": "0:26:19", "remaining_time": "5:15:47"}
|
||||
{"current_steps": 290, "total_steps": 3703, "loss": 0.336, "lr": 3.115902964959569e-05, "epoch": 0.5482041587901701, "percentage": 7.83, "elapsed_time": "0:26:57", "remaining_time": "5:17:13"}
|
||||
{"current_steps": 295, "total_steps": 3703, "loss": 0.3304, "lr": 3.169811320754717e-05, "epoch": 0.55765595463138, "percentage": 7.97, "elapsed_time": "0:27:27", "remaining_time": "5:17:16"}
|
||||
{"current_steps": 300, "total_steps": 3703, "loss": 0.3282, "lr": 3.223719676549865e-05, "epoch": 0.5671077504725898, "percentage": 8.1, "elapsed_time": "0:27:54", "remaining_time": "5:16:32"}
|
||||
{"current_steps": 305, "total_steps": 3703, "loss": 0.3151, "lr": 3.2776280323450134e-05, "epoch": 0.5765595463137996, "percentage": 8.24, "elapsed_time": "0:28:24", "remaining_time": "5:16:25"}
|
||||
{"current_steps": 310, "total_steps": 3703, "loss": 0.3055, "lr": 3.3315363881401616e-05, "epoch": 0.5860113421550095, "percentage": 8.37, "elapsed_time": "0:28:44", "remaining_time": "5:14:36"}
|
||||
{"current_steps": 315, "total_steps": 3703, "loss": 0.3173, "lr": 3.3854447439353104e-05, "epoch": 0.5954631379962193, "percentage": 8.51, "elapsed_time": "0:29:13", "remaining_time": "5:14:16"}
|
||||
{"current_steps": 320, "total_steps": 3703, "loss": 0.2993, "lr": 3.4393530997304585e-05, "epoch": 0.6049149338374291, "percentage": 8.64, "elapsed_time": "0:29:35", "remaining_time": "5:12:52"}
|
||||
{"current_steps": 325, "total_steps": 3703, "loss": 0.3123, "lr": 3.493261455525607e-05, "epoch": 0.6143667296786389, "percentage": 8.78, "elapsed_time": "0:30:10", "remaining_time": "5:13:37"}
|
||||
{"current_steps": 330, "total_steps": 3703, "loss": 0.3178, "lr": 3.547169811320755e-05, "epoch": 0.6238185255198487, "percentage": 8.91, "elapsed_time": "0:30:34", "remaining_time": "5:12:29"}
|
||||
{"current_steps": 335, "total_steps": 3703, "loss": 0.2889, "lr": 3.6010781671159037e-05, "epoch": 0.6332703213610587, "percentage": 9.05, "elapsed_time": "0:31:01", "remaining_time": "5:11:55"}
|
||||
{"current_steps": 340, "total_steps": 3703, "loss": 0.3231, "lr": 3.654986522911052e-05, "epoch": 0.6427221172022685, "percentage": 9.18, "elapsed_time": "0:31:37", "remaining_time": "5:12:49"}
|
||||
{"current_steps": 345, "total_steps": 3703, "loss": 0.2961, "lr": 3.708894878706199e-05, "epoch": 0.6521739130434783, "percentage": 9.32, "elapsed_time": "0:32:07", "remaining_time": "5:12:39"}
|
||||
{"current_steps": 350, "total_steps": 3703, "loss": 0.3203, "lr": 3.762803234501348e-05, "epoch": 0.6616257088846881, "percentage": 9.45, "elapsed_time": "0:32:35", "remaining_time": "5:12:11"}
|
||||
{"current_steps": 355, "total_steps": 3703, "loss": 0.3305, "lr": 3.816711590296496e-05, "epoch": 0.6710775047258979, "percentage": 9.59, "elapsed_time": "0:33:02", "remaining_time": "5:11:38"}
|
||||
{"current_steps": 360, "total_steps": 3703, "loss": 0.3336, "lr": 3.8706199460916444e-05, "epoch": 0.6805293005671077, "percentage": 9.72, "elapsed_time": "0:33:30", "remaining_time": "5:11:11"}
|
||||
{"current_steps": 365, "total_steps": 3703, "loss": 0.3999, "lr": 3.9245283018867925e-05, "epoch": 0.6899810964083176, "percentage": 9.86, "elapsed_time": "0:34:05", "remaining_time": "5:11:45"}
|
||||
{"current_steps": 370, "total_steps": 3703, "loss": 0.325, "lr": 3.9784366576819413e-05, "epoch": 0.6994328922495274, "percentage": 9.99, "elapsed_time": "0:34:33", "remaining_time": "5:11:16"}
|
||||
{"current_steps": 375, "total_steps": 3703, "loss": 0.3122, "lr": 3.999991999226427e-05, "epoch": 0.7088846880907372, "percentage": 10.13, "elapsed_time": "0:34:50", "remaining_time": "5:09:11"}
|
||||
{"current_steps": 380, "total_steps": 3703, "loss": 0.3244, "lr": 3.9999431058419585e-05, "epoch": 0.718336483931947, "percentage": 10.26, "elapsed_time": "0:35:23", "remaining_time": "5:09:32"}
|
||||
{"current_steps": 385, "total_steps": 3703, "loss": 0.329, "lr": 3.999849765032536e-05, "epoch": 0.7277882797731569, "percentage": 10.4, "elapsed_time": "0:36:00", "remaining_time": "5:10:17"}
|
||||
{"current_steps": 390, "total_steps": 3703, "loss": 0.3247, "lr": 3.999711978872596e-05, "epoch": 0.7372400756143668, "percentage": 10.53, "elapsed_time": "0:36:23", "remaining_time": "5:09:04"}
|
||||
{"current_steps": 395, "total_steps": 3703, "loss": 0.305, "lr": 3.9995297504243475e-05, "epoch": 0.7466918714555766, "percentage": 10.67, "elapsed_time": "0:36:50", "remaining_time": "5:08:34"}
|
||||
{"current_steps": 400, "total_steps": 3703, "loss": 0.2936, "lr": 3.9993030837376985e-05, "epoch": 0.7561436672967864, "percentage": 10.8, "elapsed_time": "0:37:20", "remaining_time": "5:08:18"}
|
||||
{"current_steps": 405, "total_steps": 3703, "loss": 0.3353, "lr": 3.999031983850166e-05, "epoch": 0.7655954631379962, "percentage": 10.94, "elapsed_time": "0:37:53", "remaining_time": "5:08:32"}
|
||||
{"current_steps": 410, "total_steps": 3703, "loss": 0.2723, "lr": 3.9987164567867677e-05, "epoch": 0.775047258979206, "percentage": 11.07, "elapsed_time": "0:38:20", "remaining_time": "5:07:55"}
|
||||
{"current_steps": 415, "total_steps": 3703, "loss": 0.2783, "lr": 3.998356509559886e-05, "epoch": 0.7844990548204159, "percentage": 11.21, "elapsed_time": "0:38:56", "remaining_time": "5:08:28"}
|
||||
{"current_steps": 420, "total_steps": 3703, "loss": 0.2955, "lr": 3.997952150169114e-05, "epoch": 0.7939508506616257, "percentage": 11.34, "elapsed_time": "0:39:16", "remaining_time": "5:06:56"}
|
||||
{"current_steps": 425, "total_steps": 3703, "loss": 0.3113, "lr": 3.997503387601071e-05, "epoch": 0.8034026465028355, "percentage": 11.48, "elapsed_time": "0:39:42", "remaining_time": "5:06:18"}
|
||||
{"current_steps": 430, "total_steps": 3703, "loss": 0.284, "lr": 3.9970102318292136e-05, "epoch": 0.8128544423440454, "percentage": 11.61, "elapsed_time": "0:40:06", "remaining_time": "5:05:19"}
|
||||
{"current_steps": 435, "total_steps": 3703, "loss": 0.3116, "lr": 3.996472693813604e-05, "epoch": 0.8223062381852552, "percentage": 11.75, "elapsed_time": "0:40:45", "remaining_time": "5:06:15"}
|
||||
{"current_steps": 440, "total_steps": 3703, "loss": 0.3224, "lr": 3.995890785500673e-05, "epoch": 0.831758034026465, "percentage": 11.88, "elapsed_time": "0:41:17", "remaining_time": "5:06:09"}
|
||||
{"current_steps": 445, "total_steps": 3703, "loss": 0.314, "lr": 3.995264519822952e-05, "epoch": 0.8412098298676749, "percentage": 12.02, "elapsed_time": "0:41:50", "remaining_time": "5:06:17"}
|
||||
{"current_steps": 450, "total_steps": 3703, "loss": 0.2937, "lr": 3.994593910698784e-05, "epoch": 0.8506616257088847, "percentage": 12.15, "elapsed_time": "0:42:10", "remaining_time": "5:04:52"}
|
||||
{"current_steps": 455, "total_steps": 3703, "loss": 0.2891, "lr": 3.9938789730320184e-05, "epoch": 0.8601134215500945, "percentage": 12.29, "elapsed_time": "0:42:41", "remaining_time": "5:04:46"}
|
||||
{"current_steps": 460, "total_steps": 3703, "loss": 0.2988, "lr": 3.993119722711676e-05, "epoch": 0.8695652173913043, "percentage": 12.42, "elapsed_time": "0:43:07", "remaining_time": "5:04:01"}
|
||||
{"current_steps": 465, "total_steps": 3703, "loss": 0.3231, "lr": 3.9923161766115975e-05, "epoch": 0.8790170132325141, "percentage": 12.56, "elapsed_time": "0:43:39", "remaining_time": "5:04:02"}
|
||||
{"current_steps": 470, "total_steps": 3703, "loss": 0.3337, "lr": 3.991468352590069e-05, "epoch": 0.888468809073724, "percentage": 12.69, "elapsed_time": "0:44:18", "remaining_time": "5:04:45"}
|
||||
{"current_steps": 475, "total_steps": 3703, "loss": 0.3376, "lr": 3.990576269489424e-05, "epoch": 0.8979206049149339, "percentage": 12.83, "elapsed_time": "0:44:53", "remaining_time": "5:05:06"}
|
||||
{"current_steps": 480, "total_steps": 3703, "loss": 0.3326, "lr": 3.9896399471356234e-05, "epoch": 0.9073724007561437, "percentage": 12.96, "elapsed_time": "0:45:23", "remaining_time": "5:04:45"}
|
||||
{"current_steps": 485, "total_steps": 3703, "loss": 0.3476, "lr": 3.9886594063378185e-05, "epoch": 0.9168241965973535, "percentage": 13.1, "elapsed_time": "0:45:51", "remaining_time": "5:04:17"}
|
||||
{"current_steps": 490, "total_steps": 3703, "loss": 0.2931, "lr": 3.987634668887887e-05, "epoch": 0.9262759924385633, "percentage": 13.23, "elapsed_time": "0:46:19", "remaining_time": "5:03:46"}
|
||||
{"current_steps": 495, "total_steps": 3703, "loss": 0.2834, "lr": 3.986565757559945e-05, "epoch": 0.9357277882797732, "percentage": 13.37, "elapsed_time": "0:46:49", "remaining_time": "5:03:27"}
|
||||
{"current_steps": 500, "total_steps": 3703, "loss": 0.316, "lr": 3.985452696109849e-05, "epoch": 0.945179584120983, "percentage": 13.5, "elapsed_time": "0:47:17", "remaining_time": "5:02:58"}
|
||||
{"current_steps": 505, "total_steps": 3703, "loss": 0.3121, "lr": 3.984295509274659e-05, "epoch": 0.9546313799621928, "percentage": 13.64, "elapsed_time": "0:47:37", "remaining_time": "5:01:38"}
|
||||
{"current_steps": 510, "total_steps": 3703, "loss": 0.3704, "lr": 3.983094222772094e-05, "epoch": 0.9640831758034026, "percentage": 13.77, "elapsed_time": "0:48:08", "remaining_time": "5:01:25"}
|
||||
{"current_steps": 515, "total_steps": 3703, "loss": 0.3553, "lr": 3.981848863299959e-05, "epoch": 0.9735349716446124, "percentage": 13.91, "elapsed_time": "0:48:44", "remaining_time": "5:01:43"}
|
||||
{"current_steps": 520, "total_steps": 3703, "loss": 0.306, "lr": 3.9805594585355536e-05, "epoch": 0.9829867674858223, "percentage": 14.04, "elapsed_time": "0:49:12", "remaining_time": "5:01:15"}
|
||||
{"current_steps": 525, "total_steps": 3703, "loss": 0.3036, "lr": 3.9792260371350526e-05, "epoch": 0.9924385633270322, "percentage": 14.18, "elapsed_time": "0:49:43", "remaining_time": "5:00:59"}
|
||||
{"current_steps": 530, "total_steps": 3703, "loss": 0.3444, "lr": 3.977848628732872e-05, "epoch": 1.001890359168242, "percentage": 14.31, "elapsed_time": "0:50:19", "remaining_time": "5:01:17"}
|
||||
{"current_steps": 535, "total_steps": 3703, "loss": 0.2797, "lr": 3.976427263941013e-05, "epoch": 1.011342155009452, "percentage": 14.45, "elapsed_time": "0:50:42", "remaining_time": "5:00:14"}
|
||||
{"current_steps": 540, "total_steps": 3703, "loss": 0.287, "lr": 3.9749619743483754e-05, "epoch": 1.0207939508506616, "percentage": 14.58, "elapsed_time": "0:51:11", "remaining_time": "4:59:48"}
|
||||
{"current_steps": 545, "total_steps": 3703, "loss": 0.2785, "lr": 3.9734527925200594e-05, "epoch": 1.0302457466918715, "percentage": 14.72, "elapsed_time": "0:51:37", "remaining_time": "4:59:08"}
|
||||
{"current_steps": 550, "total_steps": 3703, "loss": 0.2824, "lr": 3.9718997519966444e-05, "epoch": 1.0396975425330812, "percentage": 14.85, "elapsed_time": "0:51:56", "remaining_time": "4:57:47"}
|
||||
{"current_steps": 555, "total_steps": 3703, "loss": 0.3006, "lr": 3.970302887293437e-05, "epoch": 1.0491493383742911, "percentage": 14.99, "elapsed_time": "0:52:23", "remaining_time": "4:57:09"}
|
||||
{"current_steps": 560, "total_steps": 3703, "loss": 0.3004, "lr": 3.968662233899708e-05, "epoch": 1.0586011342155008, "percentage": 15.12, "elapsed_time": "0:52:49", "remaining_time": "4:56:30"}
|
||||
{"current_steps": 565, "total_steps": 3703, "loss": 0.2853, "lr": 3.966977828277905e-05, "epoch": 1.0680529300567108, "percentage": 15.26, "elapsed_time": "0:53:18", "remaining_time": "4:56:04"}
|
||||
{"current_steps": 570, "total_steps": 3703, "loss": 0.2729, "lr": 3.96524970786284e-05, "epoch": 1.0775047258979207, "percentage": 15.39, "elapsed_time": "0:53:54", "remaining_time": "4:56:19"}
|
||||
{"current_steps": 575, "total_steps": 3703, "loss": 0.2509, "lr": 3.963477911060855e-05, "epoch": 1.0869565217391304, "percentage": 15.53, "elapsed_time": "0:54:13", "remaining_time": "4:54:59"}
|
||||
{"current_steps": 580, "total_steps": 3703, "loss": 0.2756, "lr": 3.961662477248973e-05, "epoch": 1.0964083175803403, "percentage": 15.66, "elapsed_time": "0:54:41", "remaining_time": "4:54:29"}
|
||||
{"current_steps": 585, "total_steps": 3703, "loss": 0.2903, "lr": 3.959803446774022e-05, "epoch": 1.10586011342155, "percentage": 15.8, "elapsed_time": "0:55:12", "remaining_time": "4:54:13"}
|
||||
{"current_steps": 590, "total_steps": 3703, "loss": 0.2684, "lr": 3.957900860951736e-05, "epoch": 1.11531190926276, "percentage": 15.93, "elapsed_time": "0:55:39", "remaining_time": "4:53:37"}
|
||||
{"current_steps": 595, "total_steps": 3703, "loss": 0.2795, "lr": 3.9559547620658366e-05, "epoch": 1.1247637051039698, "percentage": 16.07, "elapsed_time": "0:56:07", "remaining_time": "4:53:11"}
|
||||
{"current_steps": 600, "total_steps": 3703, "loss": 0.2734, "lr": 3.9539651933670977e-05, "epoch": 1.1342155009451795, "percentage": 16.2, "elapsed_time": "0:56:40", "remaining_time": "4:53:05"}
|
||||
{"current_steps": 605, "total_steps": 3703, "loss": 0.3088, "lr": 3.9519321990723796e-05, "epoch": 1.1436672967863895, "percentage": 16.34, "elapsed_time": "0:57:14", "remaining_time": "4:53:07"}
|
||||
{"current_steps": 610, "total_steps": 3703, "loss": 0.3059, "lr": 3.949855824363647e-05, "epoch": 1.1531190926275992, "percentage": 16.47, "elapsed_time": "0:57:45", "remaining_time": "4:52:51"}
|
||||
{"current_steps": 615, "total_steps": 3703, "loss": 0.279, "lr": 3.94773611538697e-05, "epoch": 1.162570888468809, "percentage": 16.61, "elapsed_time": "0:58:12", "remaining_time": "4:52:15"}
|
||||
{"current_steps": 620, "total_steps": 3703, "loss": 0.2674, "lr": 3.945573119251489e-05, "epoch": 1.172022684310019, "percentage": 16.74, "elapsed_time": "0:58:40", "remaining_time": "4:51:44"}
|
||||
{"current_steps": 625, "total_steps": 3703, "loss": 0.2763, "lr": 3.9433668840283756e-05, "epoch": 1.1814744801512287, "percentage": 16.88, "elapsed_time": "0:59:11", "remaining_time": "4:51:31"}
|
||||
{"current_steps": 630, "total_steps": 3703, "loss": 0.272, "lr": 3.9411174587497636e-05, "epoch": 1.1909262759924386, "percentage": 17.01, "elapsed_time": "0:59:39", "remaining_time": "4:50:58"}
|
||||
{"current_steps": 635, "total_steps": 3703, "loss": 0.325, "lr": 3.938824893407655e-05, "epoch": 1.2003780718336483, "percentage": 17.15, "elapsed_time": "1:00:14", "remaining_time": "4:51:05"}
|
||||
{"current_steps": 640, "total_steps": 3703, "loss": 0.2928, "lr": 3.9364892389528116e-05, "epoch": 1.2098298676748582, "percentage": 17.28, "elapsed_time": "1:00:46", "remaining_time": "4:50:51"}
|
||||
{"current_steps": 645, "total_steps": 3703, "loss": 0.2779, "lr": 3.9341105472936234e-05, "epoch": 1.2192816635160681, "percentage": 17.42, "elapsed_time": "1:01:12", "remaining_time": "4:50:11"}
|
||||
{"current_steps": 650, "total_steps": 3703, "loss": 0.3009, "lr": 3.9316888712949546e-05, "epoch": 1.2287334593572778, "percentage": 17.55, "elapsed_time": "1:01:42", "remaining_time": "4:49:49"}
|
||||
{"current_steps": 655, "total_steps": 3703, "loss": 0.2935, "lr": 3.9292242647769664e-05, "epoch": 1.2381852551984878, "percentage": 17.69, "elapsed_time": "1:02:12", "remaining_time": "4:49:30"}
|
||||
{"current_steps": 660, "total_steps": 3703, "loss": 0.3244, "lr": 3.926716782513924e-05, "epoch": 1.2476370510396975, "percentage": 17.82, "elapsed_time": "1:02:41", "remaining_time": "4:49:04"}
|
||||
{"current_steps": 665, "total_steps": 3703, "loss": 0.27, "lr": 3.924166480232977e-05, "epoch": 1.2570888468809074, "percentage": 17.96, "elapsed_time": "1:03:05", "remaining_time": "4:48:15"}
|
||||
{"current_steps": 670, "total_steps": 3703, "loss": 0.2455, "lr": 3.921573414612923e-05, "epoch": 1.2665406427221173, "percentage": 18.09, "elapsed_time": "1:03:26", "remaining_time": "4:47:10"}
|
||||
{"current_steps": 675, "total_steps": 3703, "loss": 0.3046, "lr": 3.918937643282946e-05, "epoch": 1.275992438563327, "percentage": 18.23, "elapsed_time": "1:03:48", "remaining_time": "4:46:15"}
|
||||
{"current_steps": 680, "total_steps": 3703, "loss": 0.292, "lr": 3.9162592248213364e-05, "epoch": 1.285444234404537, "percentage": 18.36, "elapsed_time": "1:04:16", "remaining_time": "4:45:44"}
|
||||
{"current_steps": 685, "total_steps": 3703, "loss": 0.26, "lr": 3.913538218754189e-05, "epoch": 1.2948960302457466, "percentage": 18.5, "elapsed_time": "1:04:47", "remaining_time": "4:45:25"}
|
||||
{"current_steps": 690, "total_steps": 3703, "loss": 0.3009, "lr": 3.9107746855540815e-05, "epoch": 1.3043478260869565, "percentage": 18.63, "elapsed_time": "1:05:16", "remaining_time": "4:45:00"}
|
||||
{"current_steps": 695, "total_steps": 3703, "loss": 0.2702, "lr": 3.907968686638728e-05, "epoch": 1.3137996219281662, "percentage": 18.77, "elapsed_time": "1:05:43", "remaining_time": "4:44:29"}
|
||||
{"current_steps": 700, "total_steps": 3703, "loss": 0.2636, "lr": 3.9051202843696154e-05, "epoch": 1.3232514177693762, "percentage": 18.9, "elapsed_time": "1:06:09", "remaining_time": "4:43:47"}
|
||||
{"current_steps": 705, "total_steps": 3703, "loss": 0.2413, "lr": 3.902229542050617e-05, "epoch": 1.332703213610586, "percentage": 19.04, "elapsed_time": "1:06:39", "remaining_time": "4:43:27"}
|
||||
{"current_steps": 710, "total_steps": 3703, "loss": 0.2577, "lr": 3.899296523926588e-05, "epoch": 1.3421550094517958, "percentage": 19.17, "elapsed_time": "1:07:06", "remaining_time": "4:42:55"}
|
||||
{"current_steps": 715, "total_steps": 3703, "loss": 0.2817, "lr": 3.896321295181932e-05, "epoch": 1.3516068052930057, "percentage": 19.31, "elapsed_time": "1:07:24", "remaining_time": "4:41:40"}
|
||||
{"current_steps": 720, "total_steps": 3703, "loss": 0.2758, "lr": 3.8933039219391604e-05, "epoch": 1.3610586011342156, "percentage": 19.44, "elapsed_time": "1:07:48", "remaining_time": "4:40:56"}
|
||||
{"current_steps": 725, "total_steps": 3703, "loss": 0.2909, "lr": 3.890244471257415e-05, "epoch": 1.3705103969754253, "percentage": 19.58, "elapsed_time": "1:08:22", "remaining_time": "4:40:53"}
|
||||
{"current_steps": 730, "total_steps": 3703, "loss": 0.2651, "lr": 3.8871430111309817e-05, "epoch": 1.3799621928166352, "percentage": 19.71, "elapsed_time": "1:08:48", "remaining_time": "4:40:13"}
|
||||
{"current_steps": 735, "total_steps": 3703, "loss": 0.3056, "lr": 3.883999610487782e-05, "epoch": 1.389413988657845, "percentage": 19.85, "elapsed_time": "1:09:20", "remaining_time": "4:39:59"}
|
||||
{"current_steps": 740, "total_steps": 3703, "loss": 0.2944, "lr": 3.880814339187832e-05, "epoch": 1.3988657844990549, "percentage": 19.98, "elapsed_time": "1:09:47", "remaining_time": "4:39:27"}
|
||||
{"current_steps": 745, "total_steps": 3703, "loss": 0.2628, "lr": 3.877587268021701e-05, "epoch": 1.4083175803402646, "percentage": 20.12, "elapsed_time": "1:10:10", "remaining_time": "4:38:39"}
|
||||
{"current_steps": 750, "total_steps": 3703, "loss": 0.2776, "lr": 3.874318468708931e-05, "epoch": 1.4177693761814745, "percentage": 20.25, "elapsed_time": "1:10:39", "remaining_time": "4:38:13"}
|
||||
{"current_steps": 755, "total_steps": 3703, "loss": 0.269, "lr": 3.871008013896444e-05, "epoch": 1.4272211720226844, "percentage": 20.39, "elapsed_time": "1:11:07", "remaining_time": "4:37:41"}
|
||||
{"current_steps": 760, "total_steps": 3703, "loss": 0.2736, "lr": 3.8676559771569294e-05, "epoch": 1.436672967863894, "percentage": 20.52, "elapsed_time": "1:11:42", "remaining_time": "4:37:39"}
|
||||
{"current_steps": 765, "total_steps": 3703, "loss": 0.2405, "lr": 3.864262432987206e-05, "epoch": 1.446124763705104, "percentage": 20.66, "elapsed_time": "1:12:03", "remaining_time": "4:36:46"}
|
||||
{"current_steps": 770, "total_steps": 3703, "loss": 0.2779, "lr": 3.860827456806571e-05, "epoch": 1.455576559546314, "percentage": 20.79, "elapsed_time": "1:12:31", "remaining_time": "4:36:15"}
|
||||
{"current_steps": 775, "total_steps": 3703, "loss": 0.2457, "lr": 3.857351124955118e-05, "epoch": 1.4650283553875236, "percentage": 20.93, "elapsed_time": "1:12:56", "remaining_time": "4:35:33"}
|
||||
{"current_steps": 780, "total_steps": 3703, "loss": 0.2534, "lr": 3.853833514692044e-05, "epoch": 1.4744801512287333, "percentage": 21.06, "elapsed_time": "1:13:29", "remaining_time": "4:35:23"}
|
||||
{"current_steps": 785, "total_steps": 3703, "loss": 0.2974, "lr": 3.850274704193932e-05, "epoch": 1.4839319470699432, "percentage": 21.2, "elapsed_time": "1:14:03", "remaining_time": "4:35:16"}
|
||||
{"current_steps": 790, "total_steps": 3703, "loss": 0.308, "lr": 3.846674772553014e-05, "epoch": 1.4933837429111532, "percentage": 21.33, "elapsed_time": "1:14:31", "remaining_time": "4:34:48"}
|
||||
{"current_steps": 795, "total_steps": 3703, "loss": 0.3056, "lr": 3.843033799775411e-05, "epoch": 1.5028355387523629, "percentage": 21.47, "elapsed_time": "1:15:02", "remaining_time": "4:34:29"}
|
||||
{"current_steps": 800, "total_steps": 3703, "loss": 0.2931, "lr": 3.839351866779358e-05, "epoch": 1.5122873345935728, "percentage": 21.6, "elapsed_time": "1:15:30", "remaining_time": "4:33:59"}
|
||||
{"current_steps": 805, "total_steps": 3703, "loss": 0.3096, "lr": 3.835629055393401e-05, "epoch": 1.5217391304347827, "percentage": 21.74, "elapsed_time": "1:15:57", "remaining_time": "4:33:25"}
|
||||
{"current_steps": 810, "total_steps": 3703, "loss": 0.2916, "lr": 3.8318654483545865e-05, "epoch": 1.5311909262759924, "percentage": 21.87, "elapsed_time": "1:16:17", "remaining_time": "4:32:27"}
|
||||
{"current_steps": 815, "total_steps": 3703, "loss": 0.2982, "lr": 3.828061129306612e-05, "epoch": 1.5406427221172023, "percentage": 22.01, "elapsed_time": "1:16:48", "remaining_time": "4:32:11"}
|
||||
{"current_steps": 820, "total_steps": 3703, "loss": 0.2594, "lr": 3.824216182797976e-05, "epoch": 1.5500945179584122, "percentage": 22.14, "elapsed_time": "1:17:14", "remaining_time": "4:31:34"}
|
||||
{"current_steps": 825, "total_steps": 3703, "loss": 0.2915, "lr": 3.8203306942800956e-05, "epoch": 1.559546313799622, "percentage": 22.28, "elapsed_time": "1:17:36", "remaining_time": "4:30:45"}
|
||||
{"current_steps": 830, "total_steps": 3703, "loss": 0.2942, "lr": 3.8164047501054064e-05, "epoch": 1.5689981096408316, "percentage": 22.41, "elapsed_time": "1:18:02", "remaining_time": "4:30:09"}
|
||||
{"current_steps": 835, "total_steps": 3703, "loss": 0.2563, "lr": 3.8124384375254454e-05, "epoch": 1.5784499054820416, "percentage": 22.55, "elapsed_time": "1:18:29", "remaining_time": "4:29:35"}
|
||||
{"current_steps": 840, "total_steps": 3703, "loss": 0.3539, "lr": 3.808431844688911e-05, "epoch": 1.5879017013232515, "percentage": 22.68, "elapsed_time": "1:18:57", "remaining_time": "4:29:08"}
|
||||
{"current_steps": 845, "total_steps": 3703, "loss": 0.2678, "lr": 3.8043850606397026e-05, "epoch": 1.5973534971644612, "percentage": 22.82, "elapsed_time": "1:19:24", "remaining_time": "4:28:35"}
|
||||
{"current_steps": 850, "total_steps": 3703, "loss": 0.3009, "lr": 3.800298175314943e-05, "epoch": 1.606805293005671, "percentage": 22.95, "elapsed_time": "1:19:55", "remaining_time": "4:28:15"}
|
||||
{"current_steps": 855, "total_steps": 3703, "loss": 0.2884, "lr": 3.796171279542983e-05, "epoch": 1.616257088846881, "percentage": 23.09, "elapsed_time": "1:20:26", "remaining_time": "4:27:57"}
|
||||
{"current_steps": 860, "total_steps": 3703, "loss": 0.2685, "lr": 3.792004465041374e-05, "epoch": 1.6257088846880907, "percentage": 23.22, "elapsed_time": "1:20:54", "remaining_time": "4:27:29"}
|
||||
{"current_steps": 865, "total_steps": 3703, "loss": 0.306, "lr": 3.787797824414839e-05, "epoch": 1.6351606805293004, "percentage": 23.36, "elapsed_time": "1:21:29", "remaining_time": "4:27:21"}
|
||||
{"current_steps": 870, "total_steps": 3703, "loss": 0.2486, "lr": 3.7835514511532106e-05, "epoch": 1.6446124763705106, "percentage": 23.49, "elapsed_time": "1:21:50", "remaining_time": "4:26:29"}
|
||||
{"current_steps": 875, "total_steps": 3703, "loss": 0.2731, "lr": 3.779265439629349e-05, "epoch": 1.6540642722117203, "percentage": 23.63, "elapsed_time": "1:22:15", "remaining_time": "4:25:52"}
|
||||
{"current_steps": 880, "total_steps": 3703, "loss": 0.2802, "lr": 3.774939885097054e-05, "epoch": 1.66351606805293, "percentage": 23.76, "elapsed_time": "1:22:50", "remaining_time": "4:25:45"}
|
||||
{"current_steps": 885, "total_steps": 3703, "loss": 0.2719, "lr": 3.7705748836889394e-05, "epoch": 1.6729678638941399, "percentage": 23.9, "elapsed_time": "1:23:21", "remaining_time": "4:25:24"}
|
||||
{"current_steps": 890, "total_steps": 3703, "loss": 0.2638, "lr": 3.7661705324143015e-05, "epoch": 1.6824196597353498, "percentage": 24.03, "elapsed_time": "1:23:50", "remaining_time": "4:25:00"}
|
||||
{"current_steps": 895, "total_steps": 3703, "loss": 0.3051, "lr": 3.761726929156961e-05, "epoch": 1.6918714555765595, "percentage": 24.17, "elapsed_time": "1:24:16", "remaining_time": "4:24:25"}
|
||||
{"current_steps": 900, "total_steps": 3703, "loss": 0.2892, "lr": 3.757244172673089e-05, "epoch": 1.7013232514177694, "percentage": 24.3, "elapsed_time": "1:24:40", "remaining_time": "4:23:42"}
|
||||
{"current_steps": 905, "total_steps": 3703, "loss": 0.2507, "lr": 3.75272236258901e-05, "epoch": 1.7107750472589793, "percentage": 24.44, "elapsed_time": "1:25:06", "remaining_time": "4:23:06"}
|
||||
{"current_steps": 910, "total_steps": 3703, "loss": 0.29, "lr": 3.74816159939899e-05, "epoch": 1.720226843100189, "percentage": 24.57, "elapsed_time": "1:25:28", "remaining_time": "4:22:20"}
|
||||
{"current_steps": 915, "total_steps": 3703, "loss": 0.2555, "lr": 3.743561984463002e-05, "epoch": 1.7296786389413987, "percentage": 24.71, "elapsed_time": "1:26:00", "remaining_time": "4:22:05"}
|
||||
{"current_steps": 920, "total_steps": 3703, "loss": 0.2591, "lr": 3.738923620004475e-05, "epoch": 1.7391304347826086, "percentage": 24.84, "elapsed_time": "1:26:31", "remaining_time": "4:21:44"}
|
||||
{"current_steps": 925, "total_steps": 3703, "loss": 0.2591, "lr": 3.734246609108018e-05, "epoch": 1.7485822306238186, "percentage": 24.98, "elapsed_time": "1:27:06", "remaining_time": "4:21:36"}
|
||||
{"current_steps": 930, "total_steps": 3703, "loss": 0.3027, "lr": 3.729531055717135e-05, "epoch": 1.7580340264650283, "percentage": 25.11, "elapsed_time": "1:27:38", "remaining_time": "4:21:19"}
|
||||
{"current_steps": 935, "total_steps": 3703, "loss": 0.2848, "lr": 3.724777064631909e-05, "epoch": 1.7674858223062382, "percentage": 25.25, "elapsed_time": "1:28:07", "remaining_time": "4:20:53"}
|
||||
{"current_steps": 940, "total_steps": 3703, "loss": 0.29, "lr": 3.719984741506676e-05, "epoch": 1.776937618147448, "percentage": 25.38, "elapsed_time": "1:28:37", "remaining_time": "4:20:30"}
|
||||
{"current_steps": 945, "total_steps": 3703, "loss": 0.2782, "lr": 3.7151541928476775e-05, "epoch": 1.7863894139886578, "percentage": 25.52, "elapsed_time": "1:29:03", "remaining_time": "4:19:53"}
|
||||
{"current_steps": 950, "total_steps": 3703, "loss": 0.3121, "lr": 3.710285526010693e-05, "epoch": 1.7958412098298677, "percentage": 25.65, "elapsed_time": "1:29:32", "remaining_time": "4:19:28"}
|
||||
{"current_steps": 955, "total_steps": 3703, "loss": 0.2758, "lr": 3.705378849198651e-05, "epoch": 1.8052930056710776, "percentage": 25.79, "elapsed_time": "1:29:57", "remaining_time": "4:18:50"}
|
||||
{"current_steps": 960, "total_steps": 3703, "loss": 0.2567, "lr": 3.700434271459229e-05, "epoch": 1.8147448015122873, "percentage": 25.92, "elapsed_time": "1:30:27", "remaining_time": "4:18:29"}
|
||||
{"current_steps": 965, "total_steps": 3703, "loss": 0.2767, "lr": 3.6954519026824265e-05, "epoch": 1.824196597353497, "percentage": 26.06, "elapsed_time": "1:30:58", "remaining_time": "4:18:06"}
|
||||
{"current_steps": 970, "total_steps": 3703, "loss": 0.2731, "lr": 3.6904318535981254e-05, "epoch": 1.833648393194707, "percentage": 26.19, "elapsed_time": "1:31:21", "remaining_time": "4:17:25"}
|
||||
{"current_steps": 975, "total_steps": 3703, "loss": 0.2775, "lr": 3.6853742357736265e-05, "epoch": 1.8431001890359169, "percentage": 26.33, "elapsed_time": "1:31:43", "remaining_time": "4:16:39"}
|
||||
{"current_steps": 980, "total_steps": 3703, "loss": 0.2442, "lr": 3.6802791616111716e-05, "epoch": 1.8525519848771266, "percentage": 26.47, "elapsed_time": "1:32:10", "remaining_time": "4:16:06"}
|
||||
{"current_steps": 985, "total_steps": 3703, "loss": 0.2709, "lr": 3.6751467443454455e-05, "epoch": 1.8620037807183365, "percentage": 26.6, "elapsed_time": "1:32:35", "remaining_time": "4:15:30"}
|
||||
{"current_steps": 990, "total_steps": 3703, "loss": 0.2605, "lr": 3.6699770980410586e-05, "epoch": 1.8714555765595464, "percentage": 26.74, "elapsed_time": "1:33:00", "remaining_time": "4:14:51"}
|
||||
{"current_steps": 995, "total_steps": 3703, "loss": 0.2571, "lr": 3.664770337590011e-05, "epoch": 1.8809073724007561, "percentage": 26.87, "elapsed_time": "1:33:23", "remaining_time": "4:14:09"}
|
||||
{"current_steps": 1000, "total_steps": 3703, "loss": 0.2896, "lr": 3.659526578709144e-05, "epoch": 1.8903591682419658, "percentage": 27.01, "elapsed_time": "1:33:47", "remaining_time": "4:13:30"}
|
||||
{"current_steps": 1005, "total_steps": 3703, "loss": 0.2496, "lr": 3.654245937937561e-05, "epoch": 1.899810964083176, "percentage": 27.14, "elapsed_time": "1:34:17", "remaining_time": "4:13:08"}
|
||||
{"current_steps": 1010, "total_steps": 3703, "loss": 0.2945, "lr": 3.6489285326340424e-05, "epoch": 1.9092627599243857, "percentage": 27.28, "elapsed_time": "1:34:40", "remaining_time": "4:12:26"}
|
||||
{"current_steps": 1015, "total_steps": 3703, "loss": 0.2857, "lr": 3.6435744809744376e-05, "epoch": 1.9187145557655954, "percentage": 27.41, "elapsed_time": "1:35:05", "remaining_time": "4:11:48"}
|
||||
{"current_steps": 1020, "total_steps": 3703, "loss": 0.2529, "lr": 3.638183901949036e-05, "epoch": 1.9281663516068053, "percentage": 27.55, "elapsed_time": "1:35:27", "remaining_time": "4:11:06"}
|
||||
{"current_steps": 1025, "total_steps": 3703, "loss": 0.2634, "lr": 3.6327569153599236e-05, "epoch": 1.9376181474480152, "percentage": 27.68, "elapsed_time": "1:35:56", "remaining_time": "4:10:39"}
|
||||
{"current_steps": 1030, "total_steps": 3703, "loss": 0.2826, "lr": 3.62729364181832e-05, "epoch": 1.947069943289225, "percentage": 27.82, "elapsed_time": "1:36:25", "remaining_time": "4:10:14"}
|
||||
{"current_steps": 1035, "total_steps": 3703, "loss": 0.2734, "lr": 3.6217942027419025e-05, "epoch": 1.9565217391304348, "percentage": 27.95, "elapsed_time": "1:36:47", "remaining_time": "4:09:30"}
|
||||
{"current_steps": 1040, "total_steps": 3703, "loss": 0.2667, "lr": 3.616258720352097e-05, "epoch": 1.9659735349716447, "percentage": 28.09, "elapsed_time": "1:37:14", "remaining_time": "4:08:59"}
|
||||
{"current_steps": 1045, "total_steps": 3703, "loss": 0.2466, "lr": 3.6106873176713764e-05, "epoch": 1.9754253308128544, "percentage": 28.22, "elapsed_time": "1:37:45", "remaining_time": "4:08:40"}
|
||||
{"current_steps": 1050, "total_steps": 3703, "loss": 0.3324, "lr": 3.60508011852051e-05, "epoch": 1.9848771266540641, "percentage": 28.36, "elapsed_time": "1:38:23", "remaining_time": "4:08:37"}
|
||||
{"current_steps": 1055, "total_steps": 3703, "loss": 0.2414, "lr": 3.5994372475158276e-05, "epoch": 1.994328922495274, "percentage": 28.49, "elapsed_time": "1:38:53", "remaining_time": "4:08:11"}
|
||||
{"current_steps": 1060, "total_steps": 3703, "loss": 0.2389, "lr": 3.593758830066438e-05, "epoch": 2.003780718336484, "percentage": 28.63, "elapsed_time": "1:39:18", "remaining_time": "4:07:36"}
|
||||
{"current_steps": 1065, "total_steps": 3703, "loss": 0.2581, "lr": 3.5880449923714484e-05, "epoch": 2.0132325141776937, "percentage": 28.76, "elapsed_time": "1:39:42", "remaining_time": "4:06:57"}
|
||||
{"current_steps": 1070, "total_steps": 3703, "loss": 0.2702, "lr": 3.582295861417158e-05, "epoch": 2.022684310018904, "percentage": 28.9, "elapsed_time": "1:40:15", "remaining_time": "4:06:43"}
|
||||
{"current_steps": 1075, "total_steps": 3703, "loss": 0.2663, "lr": 3.576511564974233e-05, "epoch": 2.0321361058601135, "percentage": 29.03, "elapsed_time": "1:40:36", "remaining_time": "4:05:57"}
|
||||
{"current_steps": 1080, "total_steps": 3703, "loss": 0.2342, "lr": 3.5706922315948726e-05, "epoch": 2.041587901701323, "percentage": 29.17, "elapsed_time": "1:41:04", "remaining_time": "4:05:28"}
|
||||
{"current_steps": 1085, "total_steps": 3703, "loss": 0.2689, "lr": 3.5648379906099474e-05, "epoch": 2.051039697542533, "percentage": 29.3, "elapsed_time": "1:41:25", "remaining_time": "4:04:44"}
|
||||
{"current_steps": 1090, "total_steps": 3703, "loss": 0.2718, "lr": 3.558948972126127e-05, "epoch": 2.060491493383743, "percentage": 29.44, "elapsed_time": "1:41:58", "remaining_time": "4:04:26"}
|
||||
{"current_steps": 1095, "total_steps": 3703, "loss": 0.2592, "lr": 3.5530253070229886e-05, "epoch": 2.0699432892249527, "percentage": 29.57, "elapsed_time": "1:42:24", "remaining_time": "4:03:53"}
|
||||
{"current_steps": 1100, "total_steps": 3703, "loss": 0.2452, "lr": 3.547067126950106e-05, "epoch": 2.0793950850661624, "percentage": 29.71, "elapsed_time": "1:42:51", "remaining_time": "4:03:24"}
|
||||
{"current_steps": 1105, "total_steps": 3703, "loss": 0.2547, "lr": 3.541074564324129e-05, "epoch": 2.0888468809073726, "percentage": 29.84, "elapsed_time": "1:43:14", "remaining_time": "4:02:45"}
|
||||
{"current_steps": 1110, "total_steps": 3703, "loss": 0.2425, "lr": 3.5350477523258334e-05, "epoch": 2.0982986767485823, "percentage": 29.98, "elapsed_time": "1:43:40", "remaining_time": "4:02:10"}
|
||||
{"current_steps": 1115, "total_steps": 3703, "loss": 0.2451, "lr": 3.528986824897167e-05, "epoch": 2.107750472589792, "percentage": 30.11, "elapsed_time": "1:44:03", "remaining_time": "4:01:32"}
|
||||
{"current_steps": 1120, "total_steps": 3703, "loss": 0.2577, "lr": 3.522891916738269e-05, "epoch": 2.1172022684310017, "percentage": 30.25, "elapsed_time": "1:44:30", "remaining_time": "4:01:00"}
|
||||
{"current_steps": 1125, "total_steps": 3703, "loss": 0.2776, "lr": 3.516763163304481e-05, "epoch": 2.126654064272212, "percentage": 30.38, "elapsed_time": "1:45:05", "remaining_time": "4:00:50"}
|
||||
{"current_steps": 1130, "total_steps": 3703, "loss": 0.2795, "lr": 3.5106007008033306e-05, "epoch": 2.1361058601134215, "percentage": 30.52, "elapsed_time": "1:45:35", "remaining_time": "4:00:24"}
|
||||
{"current_steps": 1135, "total_steps": 3703, "loss": 0.2725, "lr": 3.50440466619151e-05, "epoch": 2.145557655954631, "percentage": 30.65, "elapsed_time": "1:45:58", "remaining_time": "3:59:46"}
|
||||
{"current_steps": 1140, "total_steps": 3703, "loss": 0.25, "lr": 3.498175197171827e-05, "epoch": 2.1550094517958414, "percentage": 30.79, "elapsed_time": "1:46:27", "remaining_time": "3:59:20"}
|
||||
{"current_steps": 1145, "total_steps": 3703, "loss": 0.235, "lr": 3.491912432190147e-05, "epoch": 2.164461247637051, "percentage": 30.92, "elapsed_time": "1:46:53", "remaining_time": "3:58:48"}
|
||||
{"current_steps": 1150, "total_steps": 3703, "loss": 0.2209, "lr": 3.485616510432321e-05, "epoch": 2.1739130434782608, "percentage": 31.06, "elapsed_time": "1:47:18", "remaining_time": "3:58:12"}
|
||||
{"current_steps": 1155, "total_steps": 3703, "loss": 0.25, "lr": 3.479287571821082e-05, "epoch": 2.183364839319471, "percentage": 31.19, "elapsed_time": "1:47:39", "remaining_time": "3:57:29"}
|
||||
{"current_steps": 1160, "total_steps": 3703, "loss": 0.2527, "lr": 3.4729257570129436e-05, "epoch": 2.1928166351606806, "percentage": 31.33, "elapsed_time": "1:48:00", "remaining_time": "3:56:47"}
|
||||
{"current_steps": 1165, "total_steps": 3703, "loss": 0.2762, "lr": 3.466531207395072e-05, "epoch": 2.2022684310018903, "percentage": 31.46, "elapsed_time": "1:48:34", "remaining_time": "3:56:33"}
|
||||
{"current_steps": 1170, "total_steps": 3703, "loss": 0.2502, "lr": 3.46010406508214e-05, "epoch": 2.2117202268431, "percentage": 31.6, "elapsed_time": "1:48:58", "remaining_time": "3:55:55"}
|
||||
{"current_steps": 1175, "total_steps": 3703, "loss": 0.2265, "lr": 3.453644472913176e-05, "epoch": 2.22117202268431, "percentage": 31.73, "elapsed_time": "1:49:23", "remaining_time": "3:55:22"}
|
||||
{"current_steps": 1180, "total_steps": 3703, "loss": 0.2307, "lr": 3.4471525744483826e-05, "epoch": 2.23062381852552, "percentage": 31.87, "elapsed_time": "1:49:42", "remaining_time": "3:54:34"}
|
||||
{"current_steps": 1185, "total_steps": 3703, "loss": 0.256, "lr": 3.440628513965947e-05, "epoch": 2.2400756143667295, "percentage": 32.0, "elapsed_time": "1:50:15", "remaining_time": "3:54:17"}
|
||||
{"current_steps": 1190, "total_steps": 3703, "loss": 0.2737, "lr": 3.4340724364588405e-05, "epoch": 2.2495274102079397, "percentage": 32.14, "elapsed_time": "1:50:43", "remaining_time": "3:53:48"}
|
||||
{"current_steps": 1195, "total_steps": 3703, "loss": 0.2615, "lr": 3.4274844876315885e-05, "epoch": 2.2589792060491494, "percentage": 32.27, "elapsed_time": "1:51:15", "remaining_time": "3:53:30"}
|
||||
{"current_steps": 1200, "total_steps": 3703, "loss": 0.2659, "lr": 3.4208648138970366e-05, "epoch": 2.268431001890359, "percentage": 32.41, "elapsed_time": "1:51:46", "remaining_time": "3:53:08"}
|
||||
{"current_steps": 1205, "total_steps": 3703, "loss": 0.2482, "lr": 3.4142135623730954e-05, "epoch": 2.2778827977315688, "percentage": 32.54, "elapsed_time": "1:52:19", "remaining_time": "3:52:51"}
|
||||
{"current_steps": 1210, "total_steps": 3703, "loss": 0.2316, "lr": 3.407530880879472e-05, "epoch": 2.287334593572779, "percentage": 32.68, "elapsed_time": "1:52:44", "remaining_time": "3:52:17"}
|
||||
{"current_steps": 1215, "total_steps": 3703, "loss": 0.2368, "lr": 3.400816917934383e-05, "epoch": 2.2967863894139886, "percentage": 32.81, "elapsed_time": "1:53:14", "remaining_time": "3:51:52"}
|
||||
{"current_steps": 1220, "total_steps": 3703, "loss": 0.2524, "lr": 3.394071822751255e-05, "epoch": 2.3062381852551983, "percentage": 32.95, "elapsed_time": "1:53:40", "remaining_time": "3:51:22"}
|
||||
{"current_steps": 1225, "total_steps": 3703, "loss": 0.2814, "lr": 3.3872957452354085e-05, "epoch": 2.3156899810964084, "percentage": 33.08, "elapsed_time": "1:54:09", "remaining_time": "3:50:56"}
|
||||
{"current_steps": 1230, "total_steps": 3703, "loss": 0.2381, "lr": 3.380488835980726e-05, "epoch": 2.325141776937618, "percentage": 33.22, "elapsed_time": "1:54:31", "remaining_time": "3:50:15"}
|
||||
{"current_steps": 1235, "total_steps": 3703, "loss": 0.2461, "lr": 3.373651246266306e-05, "epoch": 2.334593572778828, "percentage": 33.35, "elapsed_time": "1:55:04", "remaining_time": "3:49:56"}
|
||||
{"current_steps": 1240, "total_steps": 3703, "loss": 0.2349, "lr": 3.366783128053097e-05, "epoch": 2.344045368620038, "percentage": 33.49, "elapsed_time": "1:55:44", "remaining_time": "3:49:52"}
|
||||
{"current_steps": 1245, "total_steps": 3703, "loss": 0.2751, "lr": 3.359884633980528e-05, "epoch": 2.3534971644612477, "percentage": 33.62, "elapsed_time": "1:56:14", "remaining_time": "3:49:29"}
|
||||
{"current_steps": 1250, "total_steps": 3703, "loss": 0.2579, "lr": 3.352955917363108e-05, "epoch": 2.3629489603024574, "percentage": 33.76, "elapsed_time": "1:56:33", "remaining_time": "3:48:44"}
|
||||
{"current_steps": 1255, "total_steps": 3703, "loss": 0.2518, "lr": 3.345997132187022e-05, "epoch": 2.3724007561436675, "percentage": 33.89, "elapsed_time": "1:56:58", "remaining_time": "3:48:10"}
|
||||
{"current_steps": 1260, "total_steps": 3703, "loss": 0.2364, "lr": 3.339008433106713e-05, "epoch": 2.381852551984877, "percentage": 34.03, "elapsed_time": "1:57:29", "remaining_time": "3:47:47"}
|
||||
{"current_steps": 1265, "total_steps": 3703, "loss": 0.2555, "lr": 3.331989975441437e-05, "epoch": 2.391304347826087, "percentage": 34.16, "elapsed_time": "1:57:46", "remaining_time": "3:46:59"}
|
||||
{"current_steps": 1270, "total_steps": 3703, "loss": 0.2721, "lr": 3.324941915171817e-05, "epoch": 2.4007561436672966, "percentage": 34.3, "elapsed_time": "1:58:26", "remaining_time": "3:46:53"}
|
||||
{"current_steps": 1275, "total_steps": 3703, "loss": 0.2376, "lr": 3.3178644089363726e-05, "epoch": 2.4102079395085068, "percentage": 34.43, "elapsed_time": "1:58:50", "remaining_time": "3:46:17"}
|
||||
{"current_steps": 1280, "total_steps": 3703, "loss": 0.2735, "lr": 3.310757614028043e-05, "epoch": 2.4196597353497165, "percentage": 34.57, "elapsed_time": "1:59:19", "remaining_time": "3:45:52"}
|
||||
{"current_steps": 1285, "total_steps": 3703, "loss": 0.2444, "lr": 3.303621688390688e-05, "epoch": 2.429111531190926, "percentage": 34.7, "elapsed_time": "1:59:47", "remaining_time": "3:45:24"}
|
||||
{"current_steps": 1290, "total_steps": 3703, "loss": 0.2283, "lr": 3.2964567906155775e-05, "epoch": 2.4385633270321363, "percentage": 34.84, "elapsed_time": "2:00:10", "remaining_time": "3:44:47"}
|
||||
{"current_steps": 1295, "total_steps": 3703, "loss": 0.2281, "lr": 3.28926307993787e-05, "epoch": 2.448015122873346, "percentage": 34.97, "elapsed_time": "2:00:36", "remaining_time": "3:44:15"}
|
||||
{"current_steps": 1300, "total_steps": 3703, "loss": 0.2387, "lr": 3.282040716233073e-05, "epoch": 2.4574669187145557, "percentage": 35.11, "elapsed_time": "2:00:58", "remaining_time": "3:43:37"}
|
||||
{"current_steps": 1305, "total_steps": 3703, "loss": 0.2503, "lr": 3.274789860013484e-05, "epoch": 2.4669187145557654, "percentage": 35.24, "elapsed_time": "2:01:33", "remaining_time": "3:43:22"}
|
||||
{"current_steps": 1310, "total_steps": 3703, "loss": 0.2294, "lr": 3.267510672424633e-05, "epoch": 2.4763705103969755, "percentage": 35.38, "elapsed_time": "2:01:58", "remaining_time": "3:42:49"}
|
||||
{"current_steps": 1315, "total_steps": 3703, "loss": 0.2404, "lr": 3.260203315241693e-05, "epoch": 2.4858223062381852, "percentage": 35.51, "elapsed_time": "2:02:29", "remaining_time": "3:42:25"}
|
||||
{"current_steps": 1320, "total_steps": 3703, "loss": 0.2618, "lr": 3.25286795086589e-05, "epoch": 2.495274102079395, "percentage": 35.65, "elapsed_time": "2:02:56", "remaining_time": "3:41:56"}
|
||||
{"current_steps": 1325, "total_steps": 3703, "loss": 0.2283, "lr": 3.245504742320889e-05, "epoch": 2.504725897920605, "percentage": 35.78, "elapsed_time": "2:03:14", "remaining_time": "3:41:11"}
|
||||
{"current_steps": 1330, "total_steps": 3703, "loss": 0.2649, "lr": 3.238113853249176e-05, "epoch": 2.5141776937618148, "percentage": 35.92, "elapsed_time": "2:03:44", "remaining_time": "3:40:46"}
|
||||
{"current_steps": 1335, "total_steps": 3703, "loss": 0.2904, "lr": 3.230695447908416e-05, "epoch": 2.5236294896030245, "percentage": 36.05, "elapsed_time": "2:04:11", "remaining_time": "3:40:17"}
|
||||
{"current_steps": 1340, "total_steps": 3703, "loss": 0.2304, "lr": 3.223249691167808e-05, "epoch": 2.5330812854442346, "percentage": 36.19, "elapsed_time": "2:04:37", "remaining_time": "3:39:45"}
|
||||
{"current_steps": 1345, "total_steps": 3703, "loss": 0.3012, "lr": 3.215776748504415e-05, "epoch": 2.5425330812854443, "percentage": 36.32, "elapsed_time": "2:05:08", "remaining_time": "3:39:23"}
|
||||
{"current_steps": 1350, "total_steps": 3703, "loss": 0.2635, "lr": 3.208276785999491e-05, "epoch": 2.551984877126654, "percentage": 36.46, "elapsed_time": "2:05:48", "remaining_time": "3:39:17"}
|
||||
{"current_steps": 1355, "total_steps": 3703, "loss": 0.2681, "lr": 3.200749970334788e-05, "epoch": 2.561436672967864, "percentage": 36.59, "elapsed_time": "2:06:15", "remaining_time": "3:38:47"}
|
||||
{"current_steps": 1360, "total_steps": 3703, "loss": 0.2354, "lr": 3.193196468788852e-05, "epoch": 2.570888468809074, "percentage": 36.73, "elapsed_time": "2:06:37", "remaining_time": "3:38:08"}
|
||||
{"current_steps": 1365, "total_steps": 3703, "loss": 0.2341, "lr": 3.1856164492333045e-05, "epoch": 2.5803402646502835, "percentage": 36.86, "elapsed_time": "2:07:01", "remaining_time": "3:37:33"}
|
||||
{"current_steps": 1370, "total_steps": 3703, "loss": 0.2299, "lr": 3.178010080129114e-05, "epoch": 2.5897920604914932, "percentage": 37.0, "elapsed_time": "2:07:26", "remaining_time": "3:37:00"}
|
||||
{"current_steps": 1375, "total_steps": 3703, "loss": 0.2739, "lr": 3.1703775305228476e-05, "epoch": 2.599243856332703, "percentage": 37.13, "elapsed_time": "2:07:51", "remaining_time": "3:36:28"}
|
||||
{"current_steps": 1380, "total_steps": 3703, "loss": 0.2292, "lr": 3.16271897004292e-05, "epoch": 2.608695652173913, "percentage": 37.27, "elapsed_time": "2:08:16", "remaining_time": "3:35:56"}
|
||||
{"current_steps": 1385, "total_steps": 3703, "loss": 0.2275, "lr": 3.1550345688958186e-05, "epoch": 2.618147448015123, "percentage": 37.4, "elapsed_time": "2:08:43", "remaining_time": "3:35:25"}
|
||||
{"current_steps": 1390, "total_steps": 3703, "loss": 0.2255, "lr": 3.147324497862323e-05, "epoch": 2.6275992438563325, "percentage": 37.54, "elapsed_time": "2:09:07", "remaining_time": "3:34:52"}
|
||||
{"current_steps": 1395, "total_steps": 3703, "loss": 0.2362, "lr": 3.139588928293711e-05, "epoch": 2.6370510396975426, "percentage": 37.67, "elapsed_time": "2:09:32", "remaining_time": "3:34:19"}
|
||||
{"current_steps": 1400, "total_steps": 3703, "loss": 0.2699, "lr": 3.131828032107945e-05, "epoch": 2.6465028355387523, "percentage": 37.81, "elapsed_time": "2:10:00", "remaining_time": "3:33:52"}
|
||||
{"current_steps": 1405, "total_steps": 3703, "loss": 0.2325, "lr": 3.124041981785859e-05, "epoch": 2.655954631379962, "percentage": 37.94, "elapsed_time": "2:10:19", "remaining_time": "3:33:10"}
|
||||
{"current_steps": 1410, "total_steps": 3703, "loss": 0.2675, "lr": 3.1162309503673176e-05, "epoch": 2.665406427221172, "percentage": 38.08, "elapsed_time": "2:10:53", "remaining_time": "3:32:51"}
|
||||
{"current_steps": 1415, "total_steps": 3703, "loss": 0.2746, "lr": 3.108395111447376e-05, "epoch": 2.674858223062382, "percentage": 38.21, "elapsed_time": "2:11:29", "remaining_time": "3:32:36"}
|
||||
{"current_steps": 1420, "total_steps": 3703, "loss": 0.2484, "lr": 3.1005346391724195e-05, "epoch": 2.6843100189035916, "percentage": 38.35, "elapsed_time": "2:11:53", "remaining_time": "3:32:02"}
|
||||
{"current_steps": 1425, "total_steps": 3703, "loss": 0.2622, "lr": 3.092649708236293e-05, "epoch": 2.6937618147448017, "percentage": 38.48, "elapsed_time": "2:12:19", "remaining_time": "3:31:31"}
|
||||
{"current_steps": 1430, "total_steps": 3703, "loss": 0.2347, "lr": 3.08474049387642e-05, "epoch": 2.7032136105860114, "percentage": 38.62, "elapsed_time": "2:12:48", "remaining_time": "3:31:05"}
|
||||
{"current_steps": 1435, "total_steps": 3703, "loss": 0.2587, "lr": 3.076807171869907e-05, "epoch": 2.712665406427221, "percentage": 38.75, "elapsed_time": "2:13:14", "remaining_time": "3:30:35"}
|
||||
{"current_steps": 1440, "total_steps": 3703, "loss": 0.2303, "lr": 3.068849918529635e-05, "epoch": 2.7221172022684312, "percentage": 38.89, "elapsed_time": "2:13:39", "remaining_time": "3:30:02"}
|
||||
{"current_steps": 1445, "total_steps": 3703, "loss": 0.2233, "lr": 3.060868910700348e-05, "epoch": 2.731568998109641, "percentage": 39.02, "elapsed_time": "2:14:09", "remaining_time": "3:29:38"}
|
||||
{"current_steps": 1450, "total_steps": 3703, "loss": 0.2461, "lr": 3.052864325754712e-05, "epoch": 2.7410207939508506, "percentage": 39.16, "elapsed_time": "2:14:47", "remaining_time": "3:29:26"}
|
||||
{"current_steps": 1455, "total_steps": 3703, "loss": 0.2507, "lr": 3.0448363415893838e-05, "epoch": 2.7504725897920603, "percentage": 39.29, "elapsed_time": "2:15:12", "remaining_time": "3:28:53"}
|
||||
{"current_steps": 1460, "total_steps": 3703, "loss": 0.2528, "lr": 3.0367851366210507e-05, "epoch": 2.7599243856332705, "percentage": 39.43, "elapsed_time": "2:15:39", "remaining_time": "3:28:24"}
|
||||
{"current_steps": 1465, "total_steps": 3703, "loss": 0.2564, "lr": 3.028710889782466e-05, "epoch": 2.76937618147448, "percentage": 39.56, "elapsed_time": "2:16:01", "remaining_time": "3:27:47"}
|
||||
{"current_steps": 1470, "total_steps": 3703, "loss": 0.259, "lr": 3.020613780518476e-05, "epoch": 2.77882797731569, "percentage": 39.7, "elapsed_time": "2:16:30", "remaining_time": "3:27:21"}
|
||||
{"current_steps": 1475, "total_steps": 3703, "loss": 0.231, "lr": 3.0124939887820264e-05, "epoch": 2.7882797731568996, "percentage": 39.83, "elapsed_time": "2:16:55", "remaining_time": "3:26:49"}
|
||||
{"current_steps": 1480, "total_steps": 3703, "loss": 0.2531, "lr": 3.0043516950301695e-05, "epoch": 2.7977315689981097, "percentage": 39.97, "elapsed_time": "2:17:21", "remaining_time": "3:26:19"}
|
||||
{"current_steps": 1485, "total_steps": 3703, "loss": 0.2338, "lr": 2.996187080220047e-05, "epoch": 2.8071833648393194, "percentage": 40.1, "elapsed_time": "2:17:51", "remaining_time": "3:25:54"}
|
||||
{"current_steps": 1490, "total_steps": 3703, "loss": 0.2218, "lr": 2.9880003258048723e-05, "epoch": 2.816635160680529, "percentage": 40.24, "elapsed_time": "2:18:19", "remaining_time": "3:25:26"}
|
||||
{"current_steps": 1495, "total_steps": 3703, "loss": 0.2375, "lr": 2.9797916137298988e-05, "epoch": 2.8260869565217392, "percentage": 40.37, "elapsed_time": "2:18:40", "remaining_time": "3:24:48"}
|
||||
{"current_steps": 1500, "total_steps": 3703, "loss": 0.2474, "lr": 2.9715611264283723e-05, "epoch": 2.835538752362949, "percentage": 40.51, "elapsed_time": "2:19:08", "remaining_time": "3:24:20"}
|
||||
{"current_steps": 1505, "total_steps": 3703, "loss": 0.2703, "lr": 2.96330904681748e-05, "epoch": 2.8449905482041586, "percentage": 40.64, "elapsed_time": "2:19:44", "remaining_time": "3:24:05"}
|
||||
{"current_steps": 1510, "total_steps": 3703, "loss": 0.2698, "lr": 2.955035558294283e-05, "epoch": 2.854442344045369, "percentage": 40.78, "elapsed_time": "2:20:13", "remaining_time": "3:23:38"}
|
||||
{"current_steps": 1515, "total_steps": 3703, "loss": 0.2358, "lr": 2.946740844731643e-05, "epoch": 2.8638941398865785, "percentage": 40.91, "elapsed_time": "2:20:47", "remaining_time": "3:23:20"}
|
||||
{"current_steps": 1520, "total_steps": 3703, "loss": 0.2678, "lr": 2.9384250904741328e-05, "epoch": 2.873345935727788, "percentage": 41.05, "elapsed_time": "2:21:20", "remaining_time": "3:22:58"}
|
||||
{"current_steps": 1525, "total_steps": 3703, "loss": 0.227, "lr": 2.9300884803339412e-05, "epoch": 2.8827977315689983, "percentage": 41.18, "elapsed_time": "2:21:45", "remaining_time": "3:22:27"}
|
||||
{"current_steps": 1530, "total_steps": 3703, "loss": 0.2996, "lr": 2.921731199586766e-05, "epoch": 2.892249527410208, "percentage": 41.32, "elapsed_time": "2:22:06", "remaining_time": "3:21:50"}
|
||||
{"current_steps": 1535, "total_steps": 3703, "loss": 0.2324, "lr": 2.9133534339676954e-05, "epoch": 2.9017013232514177, "percentage": 41.45, "elapsed_time": "2:22:29", "remaining_time": "3:21:14"}
|
||||
{"current_steps": 1540, "total_steps": 3703, "loss": 0.2241, "lr": 2.904955369667079e-05, "epoch": 2.911153119092628, "percentage": 41.59, "elapsed_time": "2:22:51", "remaining_time": "3:20:39"}
|
||||
{"current_steps": 1545, "total_steps": 3703, "loss": 0.2518, "lr": 2.896537193326394e-05, "epoch": 2.9206049149338376, "percentage": 41.72, "elapsed_time": "2:23:14", "remaining_time": "3:20:05"}
|
||||
{"current_steps": 1550, "total_steps": 3703, "loss": 0.2833, "lr": 2.8880990920340934e-05, "epoch": 2.9300567107750473, "percentage": 41.86, "elapsed_time": "2:23:46", "remaining_time": "3:19:42"}
|
||||
{"current_steps": 1555, "total_steps": 3703, "loss": 0.2541, "lr": 2.879641253321447e-05, "epoch": 2.939508506616257, "percentage": 41.99, "elapsed_time": "2:24:21", "remaining_time": "3:19:24"}
|
||||
{"current_steps": 1560, "total_steps": 3703, "loss": 0.2471, "lr": 2.8711638651583797e-05, "epoch": 2.9489603024574667, "percentage": 42.13, "elapsed_time": "2:24:58", "remaining_time": "3:19:09"}
|
||||
{"current_steps": 1565, "total_steps": 3703, "loss": 0.2474, "lr": 2.862667115949287e-05, "epoch": 2.958412098298677, "percentage": 42.26, "elapsed_time": "2:25:28", "remaining_time": "3:18:43"}
|
||||
{"current_steps": 1570, "total_steps": 3703, "loss": 0.2818, "lr": 2.8541511945288523e-05, "epoch": 2.9678638941398865, "percentage": 42.4, "elapsed_time": "2:25:49", "remaining_time": "3:18:07"}
|
||||
{"current_steps": 1575, "total_steps": 3703, "loss": 0.2496, "lr": 2.8456162901578487e-05, "epoch": 2.977315689981096, "percentage": 42.53, "elapsed_time": "2:26:11", "remaining_time": "3:17:30"}
|
||||
{"current_steps": 1580, "total_steps": 3703, "loss": 0.263, "lr": 2.837062592518933e-05, "epoch": 2.9867674858223063, "percentage": 42.67, "elapsed_time": "2:26:43", "remaining_time": "3:17:08"}
|
||||
{"current_steps": 1585, "total_steps": 3703, "loss": 0.2328, "lr": 2.82849029171243e-05, "epoch": 2.996219281663516, "percentage": 42.8, "elapsed_time": "2:27:14", "remaining_time": "3:16:45"}
|
||||
{"current_steps": 1590, "total_steps": 3703, "loss": 0.2613, "lr": 2.8198995782521077e-05, "epoch": 3.0056710775047257, "percentage": 42.94, "elapsed_time": "2:27:54", "remaining_time": "3:16:34"}
|
||||
{"current_steps": 1595, "total_steps": 3703, "loss": 0.2345, "lr": 2.8112906430609422e-05, "epoch": 3.015122873345936, "percentage": 43.07, "elapsed_time": "2:28:15", "remaining_time": "3:15:56"}
|
||||
{"current_steps": 1600, "total_steps": 3703, "loss": 0.2201, "lr": 2.8026636774668783e-05, "epoch": 3.0245746691871456, "percentage": 43.21, "elapsed_time": "2:28:38", "remaining_time": "3:15:21"}
|
||||
{"current_steps": 1605, "total_steps": 3703, "loss": 0.2314, "lr": 2.794018873198572e-05, "epoch": 3.0340264650283553, "percentage": 43.34, "elapsed_time": "2:29:05", "remaining_time": "3:14:53"}
|
||||
{"current_steps": 1610, "total_steps": 3703, "loss": 0.2283, "lr": 2.7853564223811335e-05, "epoch": 3.0434782608695654, "percentage": 43.48, "elapsed_time": "2:29:28", "remaining_time": "3:14:19"}
|
||||
{"current_steps": 1615, "total_steps": 3703, "loss": 0.218, "lr": 2.776676517531856e-05, "epoch": 3.052930056710775, "percentage": 43.61, "elapsed_time": "2:29:56", "remaining_time": "3:13:51"}
|
||||
{"current_steps": 1620, "total_steps": 3703, "loss": 0.2302, "lr": 2.7679793515559353e-05, "epoch": 3.062381852551985, "percentage": 43.75, "elapsed_time": "2:30:23", "remaining_time": "3:13:21"}
|
||||
{"current_steps": 1625, "total_steps": 3703, "loss": 0.2229, "lr": 2.759265117742188e-05, "epoch": 3.0718336483931945, "percentage": 43.88, "elapsed_time": "2:30:44", "remaining_time": "3:12:45"}
|
||||
{"current_steps": 1630, "total_steps": 3703, "loss": 0.231, "lr": 2.7505340097587488e-05, "epoch": 3.0812854442344046, "percentage": 44.02, "elapsed_time": "2:31:18", "remaining_time": "3:12:26"}
|
||||
{"current_steps": 1635, "total_steps": 3703, "loss": 0.2509, "lr": 2.741786221648771e-05, "epoch": 3.0907372400756143, "percentage": 44.15, "elapsed_time": "2:31:51", "remaining_time": "3:12:04"}
|
||||
{"current_steps": 1640, "total_steps": 3703, "loss": 0.2472, "lr": 2.7330219478261138e-05, "epoch": 3.100189035916824, "percentage": 44.29, "elapsed_time": "2:32:19", "remaining_time": "3:11:36"}
|
||||
{"current_steps": 1645, "total_steps": 3703, "loss": 0.218, "lr": 2.724241383071019e-05, "epoch": 3.109640831758034, "percentage": 44.42, "elapsed_time": "2:32:40", "remaining_time": "3:11:00"}
|
||||
{"current_steps": 1650, "total_steps": 3703, "loss": 0.2221, "lr": 2.7154447225257842e-05, "epoch": 3.119092627599244, "percentage": 44.56, "elapsed_time": "2:33:10", "remaining_time": "3:10:35"}
|
||||
{"current_steps": 1655, "total_steps": 3703, "loss": 0.2531, "lr": 2.706632161690426e-05, "epoch": 3.1285444234404536, "percentage": 44.69, "elapsed_time": "2:33:36", "remaining_time": "3:10:04"}
|
||||
{"current_steps": 1660, "total_steps": 3703, "loss": 0.213, "lr": 2.697803896418334e-05, "epoch": 3.1379962192816637, "percentage": 44.83, "elapsed_time": "2:34:01", "remaining_time": "3:09:34"}
|
||||
{"current_steps": 1665, "total_steps": 3703, "loss": 0.2067, "lr": 2.688960122911918e-05, "epoch": 3.1474480151228734, "percentage": 44.96, "elapsed_time": "2:34:25", "remaining_time": "3:09:01"}
|
||||
{"current_steps": 1670, "total_steps": 3703, "loss": 0.1883, "lr": 2.6801010377182498e-05, "epoch": 3.156899810964083, "percentage": 45.1, "elapsed_time": "2:34:46", "remaining_time": "3:08:24"}
|
||||
{"current_steps": 1675, "total_steps": 3703, "loss": 0.2303, "lr": 2.6712268377246913e-05, "epoch": 3.166351606805293, "percentage": 45.23, "elapsed_time": "2:35:05", "remaining_time": "3:07:46"}
|
||||
{"current_steps": 1680, "total_steps": 3703, "loss": 0.2246, "lr": 2.6623377201545232e-05, "epoch": 3.175803402646503, "percentage": 45.37, "elapsed_time": "2:35:26", "remaining_time": "3:07:11"}
|
||||
{"current_steps": 1685, "total_steps": 3703, "loss": 0.2309, "lr": 2.6534338825625577e-05, "epoch": 3.1852551984877127, "percentage": 45.5, "elapsed_time": "2:35:57", "remaining_time": "3:06:46"}
|
||||
{"current_steps": 1690, "total_steps": 3703, "loss": 0.224, "lr": 2.644515522830751e-05, "epoch": 3.1947069943289224, "percentage": 45.64, "elapsed_time": "2:36:31", "remaining_time": "3:06:26"}
|
||||
{"current_steps": 1695, "total_steps": 3703, "loss": 0.2224, "lr": 2.6355828391638036e-05, "epoch": 3.2041587901701325, "percentage": 45.77, "elapsed_time": "2:37:02", "remaining_time": "3:06:01"}
|
||||
{"current_steps": 1700, "total_steps": 3703, "loss": 0.2285, "lr": 2.6266360300847563e-05, "epoch": 3.213610586011342, "percentage": 45.91, "elapsed_time": "2:37:27", "remaining_time": "3:05:31"}
|
||||
{"current_steps": 1705, "total_steps": 3703, "loss": 0.205, "lr": 2.6176752944305783e-05, "epoch": 3.223062381852552, "percentage": 46.04, "elapsed_time": "2:37:52", "remaining_time": "3:05:00"}
|
||||
{"current_steps": 1710, "total_steps": 3703, "loss": 0.2174, "lr": 2.6087008313477466e-05, "epoch": 3.232514177693762, "percentage": 46.18, "elapsed_time": "2:38:14", "remaining_time": "3:04:26"}
|
||||
{"current_steps": 1715, "total_steps": 3703, "loss": 0.2227, "lr": 2.5997128402878233e-05, "epoch": 3.2419659735349717, "percentage": 46.31, "elapsed_time": "2:38:42", "remaining_time": "3:03:58"}
|
||||
{"current_steps": 1720, "total_steps": 3703, "loss": 0.2136, "lr": 2.5907115210030197e-05, "epoch": 3.2514177693761814, "percentage": 46.45, "elapsed_time": "2:39:01", "remaining_time": "3:03:20"}
|
||||
{"current_steps": 1725, "total_steps": 3703, "loss": 0.234, "lr": 2.5816970735417578e-05, "epoch": 3.260869565217391, "percentage": 46.58, "elapsed_time": "2:39:24", "remaining_time": "3:02:46"}
|
||||
{"current_steps": 1730, "total_steps": 3703, "loss": 0.2233, "lr": 2.5726696982442258e-05, "epoch": 3.2703213610586013, "percentage": 46.72, "elapsed_time": "2:39:51", "remaining_time": "3:02:18"}
|
||||
{"current_steps": 1735, "total_steps": 3703, "loss": 0.2167, "lr": 2.5636295957379233e-05, "epoch": 3.279773156899811, "percentage": 46.85, "elapsed_time": "2:40:18", "remaining_time": "3:01:50"}
|
||||
{"current_steps": 1740, "total_steps": 3703, "loss": 0.248, "lr": 2.554576966933205e-05, "epoch": 3.2892249527410207, "percentage": 46.99, "elapsed_time": "2:40:40", "remaining_time": "3:01:16"}
|
||||
{"current_steps": 1745, "total_steps": 3703, "loss": 0.2379, "lr": 2.5455120130188135e-05, "epoch": 3.298676748582231, "percentage": 47.12, "elapsed_time": "2:41:08", "remaining_time": "3:00:48"}
|
||||
{"current_steps": 1750, "total_steps": 3703, "loss": 0.2179, "lr": 2.5364349354574088e-05, "epoch": 3.3081285444234405, "percentage": 47.26, "elapsed_time": "2:41:33", "remaining_time": "3:00:18"}
|
||||
{"current_steps": 1755, "total_steps": 3703, "loss": 0.236, "lr": 2.527345935981093e-05, "epoch": 3.31758034026465, "percentage": 47.39, "elapsed_time": "2:42:02", "remaining_time": "2:59:51"}
|
||||
{"current_steps": 1760, "total_steps": 3703, "loss": 0.238, "lr": 2.5182452165869228e-05, "epoch": 3.32703213610586, "percentage": 47.53, "elapsed_time": "2:42:38", "remaining_time": "2:59:33"}
|
||||
{"current_steps": 1765, "total_steps": 3703, "loss": 0.2199, "lr": 2.5091329795324216e-05, "epoch": 3.33648393194707, "percentage": 47.66, "elapsed_time": "2:43:05", "remaining_time": "2:59:04"}
|
||||
{"current_steps": 1770, "total_steps": 3703, "loss": 0.2534, "lr": 2.500009427331088e-05, "epoch": 3.3459357277882797, "percentage": 47.8, "elapsed_time": "2:43:34", "remaining_time": "2:58:38"}
|
||||
{"current_steps": 1775, "total_steps": 3703, "loss": 0.2593, "lr": 2.4908747627478907e-05, "epoch": 3.3553875236294894, "percentage": 47.93, "elapsed_time": "2:44:07", "remaining_time": "2:58:16"}
|
||||
{"current_steps": 1780, "total_steps": 3703, "loss": 0.2205, "lr": 2.481729188794764e-05, "epoch": 3.3648393194706996, "percentage": 48.07, "elapsed_time": "2:44:35", "remaining_time": "2:57:48"}
|
||||
{"current_steps": 1785, "total_steps": 3703, "loss": 0.2462, "lr": 2.472572908726096e-05, "epoch": 3.3742911153119093, "percentage": 48.2, "elapsed_time": "2:44:56", "remaining_time": "2:57:13"}
|
||||
{"current_steps": 1790, "total_steps": 3703, "loss": 0.2014, "lr": 2.4634061260342107e-05, "epoch": 3.383742911153119, "percentage": 48.34, "elapsed_time": "2:45:20", "remaining_time": "2:56:42"}
|
||||
{"current_steps": 1795, "total_steps": 3703, "loss": 0.2696, "lr": 2.4542290444448474e-05, "epoch": 3.393194706994329, "percentage": 48.47, "elapsed_time": "2:45:47", "remaining_time": "2:56:13"}
|
||||
{"current_steps": 1800, "total_steps": 3703, "loss": 0.2287, "lr": 2.445041867912629e-05, "epoch": 3.402646502835539, "percentage": 48.61, "elapsed_time": "2:46:13", "remaining_time": "2:55:43"}
|
||||
{"current_steps": 1805, "total_steps": 3703, "loss": 0.2295, "lr": 2.4358448006165345e-05, "epoch": 3.4120982986767485, "percentage": 48.74, "elapsed_time": "2:46:36", "remaining_time": "2:55:11"}
|
||||
{"current_steps": 1810, "total_steps": 3703, "loss": 0.2332, "lr": 2.4266380469553586e-05, "epoch": 3.421550094517958, "percentage": 48.88, "elapsed_time": "2:47:10", "remaining_time": "2:54:49"}
|
||||
{"current_steps": 1815, "total_steps": 3703, "loss": 0.2613, "lr": 2.4174218115431664e-05, "epoch": 3.4310018903591684, "percentage": 49.01, "elapsed_time": "2:47:38", "remaining_time": "2:54:22"}
|
||||
{"current_steps": 1820, "total_steps": 3703, "loss": 0.2226, "lr": 2.408196299204751e-05, "epoch": 3.440453686200378, "percentage": 49.15, "elapsed_time": "2:48:02", "remaining_time": "2:53:51"}
|
||||
{"current_steps": 1825, "total_steps": 3703, "loss": 0.2543, "lr": 2.3989617149710795e-05, "epoch": 3.4499054820415878, "percentage": 49.28, "elapsed_time": "2:48:35", "remaining_time": "2:53:29"}
|
||||
{"current_steps": 1830, "total_steps": 3703, "loss": 0.2572, "lr": 2.3897182640747336e-05, "epoch": 3.459357277882798, "percentage": 49.42, "elapsed_time": "2:49:09", "remaining_time": "2:53:07"}
|
||||
{"current_steps": 1835, "total_steps": 3703, "loss": 0.2293, "lr": 2.3804661519453532e-05, "epoch": 3.4688090737240076, "percentage": 49.55, "elapsed_time": "2:49:37", "remaining_time": "2:52:41"}
|
||||
{"current_steps": 1840, "total_steps": 3703, "loss": 0.217, "lr": 2.3712055842050676e-05, "epoch": 3.4782608695652173, "percentage": 49.69, "elapsed_time": "2:50:00", "remaining_time": "2:52:07"}
|
||||
{"current_steps": 1845, "total_steps": 3703, "loss": 0.2229, "lr": 2.3619367666639256e-05, "epoch": 3.487712665406427, "percentage": 49.82, "elapsed_time": "2:50:24", "remaining_time": "2:51:36"}
|
||||
{"current_steps": 1850, "total_steps": 3703, "loss": 0.2465, "lr": 2.3526599053153235e-05, "epoch": 3.497164461247637, "percentage": 49.96, "elapsed_time": "2:50:56", "remaining_time": "2:51:13"}
|
||||
{"current_steps": 1855, "total_steps": 3703, "loss": 0.2415, "lr": 2.3433752063314254e-05, "epoch": 3.506616257088847, "percentage": 50.09, "elapsed_time": "2:51:22", "remaining_time": "2:50:44"}
|
||||
{"current_steps": 1860, "total_steps": 3703, "loss": 0.2144, "lr": 2.3340828760585827e-05, "epoch": 3.5160680529300565, "percentage": 50.23, "elapsed_time": "2:51:47", "remaining_time": "2:50:12"}
|
||||
{"current_steps": 1865, "total_steps": 3703, "loss": 0.2101, "lr": 2.3247831210127454e-05, "epoch": 3.5255198487712667, "percentage": 50.36, "elapsed_time": "2:52:11", "remaining_time": "2:49:42"}
|
||||
{"current_steps": 1870, "total_steps": 3703, "loss": 0.22, "lr": 2.3154761478748752e-05, "epoch": 3.5349716446124764, "percentage": 50.5, "elapsed_time": "2:52:32", "remaining_time": "2:49:07"}
|
||||
{"current_steps": 1875, "total_steps": 3703, "loss": 0.2388, "lr": 2.3061621634863524e-05, "epoch": 3.544423440453686, "percentage": 50.63, "elapsed_time": "2:53:04", "remaining_time": "2:48:44"}
|
||||
{"current_steps": 1880, "total_steps": 3703, "loss": 0.2019, "lr": 2.296841374844375e-05, "epoch": 3.553875236294896, "percentage": 50.77, "elapsed_time": "2:53:23", "remaining_time": "2:48:07"}
|
||||
{"current_steps": 1885, "total_steps": 3703, "loss": 0.212, "lr": 2.287513989097364e-05, "epoch": 3.563327032136106, "percentage": 50.9, "elapsed_time": "2:53:50", "remaining_time": "2:47:39"}
|
||||
{"current_steps": 1890, "total_steps": 3703, "loss": 0.2358, "lr": 2.2781802135403537e-05, "epoch": 3.5727788279773156, "percentage": 51.04, "elapsed_time": "2:54:19", "remaining_time": "2:47:13"}
|
||||
{"current_steps": 1895, "total_steps": 3703, "loss": 0.1856, "lr": 2.2688402556103906e-05, "epoch": 3.5822306238185257, "percentage": 51.17, "elapsed_time": "2:54:41", "remaining_time": "2:46:40"}
|
||||
{"current_steps": 1900, "total_steps": 3703, "loss": 0.216, "lr": 2.2594943228819202e-05, "epoch": 3.5916824196597354, "percentage": 51.31, "elapsed_time": "2:55:06", "remaining_time": "2:46:10"}
|
||||
{"current_steps": 1905, "total_steps": 3703, "loss": 0.2454, "lr": 2.2501426230621703e-05, "epoch": 3.601134215500945, "percentage": 51.44, "elapsed_time": "2:55:31", "remaining_time": "2:45:40"}
|
||||
{"current_steps": 1910, "total_steps": 3703, "loss": 0.2192, "lr": 2.240785363986543e-05, "epoch": 3.610586011342155, "percentage": 51.58, "elapsed_time": "2:55:51", "remaining_time": "2:45:05"}
|
||||
{"current_steps": 1915, "total_steps": 3703, "loss": 0.2542, "lr": 2.2314227536139893e-05, "epoch": 3.620037807183365, "percentage": 51.71, "elapsed_time": "2:56:25", "remaining_time": "2:44:43"}
|
||||
{"current_steps": 1920, "total_steps": 3703, "loss": 0.2824, "lr": 2.2220550000223886e-05, "epoch": 3.6294896030245747, "percentage": 51.85, "elapsed_time": "2:56:55", "remaining_time": "2:44:18"}
|
||||
{"current_steps": 1925, "total_steps": 3703, "loss": 0.2642, "lr": 2.212682311403926e-05, "epoch": 3.6389413988657844, "percentage": 51.98, "elapsed_time": "2:57:27", "remaining_time": "2:43:54"}
|
||||
{"current_steps": 1930, "total_steps": 3703, "loss": 0.2487, "lr": 2.2033048960604648e-05, "epoch": 3.648393194706994, "percentage": 52.12, "elapsed_time": "2:57:58", "remaining_time": "2:43:30"}
|
||||
{"current_steps": 1935, "total_steps": 3703, "loss": 0.263, "lr": 2.1939229623989146e-05, "epoch": 3.6578449905482042, "percentage": 52.25, "elapsed_time": "2:58:31", "remaining_time": "2:43:07"}
|
||||
{"current_steps": 1940, "total_steps": 3703, "loss": 0.2377, "lr": 2.184536718926604e-05, "epoch": 3.667296786389414, "percentage": 52.39, "elapsed_time": "2:58:56", "remaining_time": "2:42:37"}
|
||||
{"current_steps": 1945, "total_steps": 3703, "loss": 0.2033, "lr": 2.1751463742466437e-05, "epoch": 3.6767485822306236, "percentage": 52.52, "elapsed_time": "2:59:27", "remaining_time": "2:42:12"}
|
||||
{"current_steps": 1950, "total_steps": 3703, "loss": 0.243, "lr": 2.1657521370532897e-05, "epoch": 3.6862003780718338, "percentage": 52.66, "elapsed_time": "2:59:51", "remaining_time": "2:41:41"}
|
||||
{"current_steps": 1955, "total_steps": 3703, "loss": 0.2074, "lr": 2.1563542161273077e-05, "epoch": 3.6956521739130435, "percentage": 52.8, "elapsed_time": "3:00:19", "remaining_time": "2:41:14"}
|
||||
{"current_steps": 1960, "total_steps": 3703, "loss": 0.2255, "lr": 2.146952820331332e-05, "epoch": 3.705103969754253, "percentage": 52.93, "elapsed_time": "3:00:41", "remaining_time": "2:40:41"}
|
||||
{"current_steps": 1965, "total_steps": 3703, "loss": 0.2167, "lr": 2.1375481586052237e-05, "epoch": 3.7145557655954633, "percentage": 53.07, "elapsed_time": "3:01:09", "remaining_time": "2:40:13"}
|
||||
{"current_steps": 1970, "total_steps": 3703, "loss": 0.2315, "lr": 2.128140439961426e-05, "epoch": 3.724007561436673, "percentage": 53.2, "elapsed_time": "3:01:38", "remaining_time": "2:39:47"}
|
||||
{"current_steps": 1975, "total_steps": 3703, "loss": 0.2211, "lr": 2.1187298734803214e-05, "epoch": 3.7334593572778827, "percentage": 53.34, "elapsed_time": "3:02:06", "remaining_time": "2:39:20"}
|
||||
{"current_steps": 1980, "total_steps": 3703, "loss": 0.253, "lr": 2.1093166683055832e-05, "epoch": 3.742911153119093, "percentage": 53.47, "elapsed_time": "3:02:40", "remaining_time": "2:38:57"}
|
||||
{"current_steps": 1985, "total_steps": 3703, "loss": 0.2402, "lr": 2.0999010336395273e-05, "epoch": 3.7523629489603025, "percentage": 53.61, "elapsed_time": "3:03:08", "remaining_time": "2:38:30"}
|
||||
{"current_steps": 1990, "total_steps": 3703, "loss": 0.2562, "lr": 2.0904831787384645e-05, "epoch": 3.7618147448015122, "percentage": 53.74, "elapsed_time": "3:03:45", "remaining_time": "2:38:10"}
|
||||
{"current_steps": 1995, "total_steps": 3703, "loss": 0.2321, "lr": 2.081063312908049e-05, "epoch": 3.7712665406427224, "percentage": 53.88, "elapsed_time": "3:04:15", "remaining_time": "2:37:44"}
|
||||
{"current_steps": 2000, "total_steps": 3703, "loss": 0.2397, "lr": 2.0716416454986242e-05, "epoch": 3.780718336483932, "percentage": 54.01, "elapsed_time": "3:04:42", "remaining_time": "2:37:16"}
|
||||
{"current_steps": 2005, "total_steps": 3703, "loss": 0.2272, "lr": 2.0622183859005762e-05, "epoch": 3.7901701323251418, "percentage": 54.15, "elapsed_time": "3:05:16", "remaining_time": "2:36:54"}
|
||||
{"current_steps": 2010, "total_steps": 3703, "loss": 0.2448, "lr": 2.052793743539673e-05, "epoch": 3.7996219281663515, "percentage": 54.28, "elapsed_time": "3:05:46", "remaining_time": "2:36:28"}
|
||||
{"current_steps": 2015, "total_steps": 3703, "loss": 0.2176, "lr": 2.043367927872416e-05, "epoch": 3.809073724007561, "percentage": 54.42, "elapsed_time": "3:06:08", "remaining_time": "2:35:56"}
|
||||
{"current_steps": 2020, "total_steps": 3703, "loss": 0.2623, "lr": 2.0339411483813812e-05, "epoch": 3.8185255198487713, "percentage": 54.55, "elapsed_time": "3:06:35", "remaining_time": "2:35:27"}
|
||||
{"current_steps": 2025, "total_steps": 3703, "loss": 0.2184, "lr": 2.0245136145705648e-05, "epoch": 3.827977315689981, "percentage": 54.69, "elapsed_time": "3:07:02", "remaining_time": "2:34:59"}
|
||||
{"current_steps": 2030, "total_steps": 3703, "loss": 0.2504, "lr": 2.0150855359607293e-05, "epoch": 3.8374291115311907, "percentage": 54.82, "elapsed_time": "3:07:35", "remaining_time": "2:34:36"}
|
||||
{"current_steps": 2035, "total_steps": 3703, "loss": 0.222, "lr": 2.0056571220847427e-05, "epoch": 3.846880907372401, "percentage": 54.96, "elapsed_time": "3:08:02", "remaining_time": "2:34:07"}
|
||||
{"current_steps": 2040, "total_steps": 3703, "loss": 0.2176, "lr": 1.9962285824829245e-05, "epoch": 3.8563327032136105, "percentage": 55.09, "elapsed_time": "3:08:30", "remaining_time": "2:33:40"}
|
||||
{"current_steps": 2045, "total_steps": 3703, "loss": 0.2249, "lr": 1.986800126698389e-05, "epoch": 3.8657844990548202, "percentage": 55.23, "elapsed_time": "3:08:56", "remaining_time": "2:33:11"}
|
||||
{"current_steps": 2050, "total_steps": 3703, "loss": 0.242, "lr": 1.9773719642723883e-05, "epoch": 3.8752362948960304, "percentage": 55.36, "elapsed_time": "3:09:22", "remaining_time": "2:32:42"}
|
||||
{"current_steps": 2055, "total_steps": 3703, "loss": 0.222, "lr": 1.967944304739653e-05, "epoch": 3.88468809073724, "percentage": 55.5, "elapsed_time": "3:09:52", "remaining_time": "2:32:16"}
|
||||
{"current_steps": 2060, "total_steps": 3703, "loss": 0.2308, "lr": 1.958517357623738e-05, "epoch": 3.89413988657845, "percentage": 55.63, "elapsed_time": "3:10:25", "remaining_time": "2:31:52"}
|
||||
{"current_steps": 2065, "total_steps": 3703, "loss": 0.2259, "lr": 1.949091332432367e-05, "epoch": 3.90359168241966, "percentage": 55.77, "elapsed_time": "3:10:57", "remaining_time": "2:31:28"}
|
||||
{"current_steps": 2070, "total_steps": 3703, "loss": 0.195, "lr": 1.939666438652772e-05, "epoch": 3.9130434782608696, "percentage": 55.9, "elapsed_time": "3:11:24", "remaining_time": "2:30:59"}
|
||||
{"current_steps": 2075, "total_steps": 3703, "loss": 0.2225, "lr": 1.9302428857470406e-05, "epoch": 3.9224952741020793, "percentage": 56.04, "elapsed_time": "3:11:49", "remaining_time": "2:30:29"}
|
||||
{"current_steps": 2080, "total_steps": 3703, "loss": 0.2492, "lr": 1.9208208831474618e-05, "epoch": 3.9319470699432895, "percentage": 56.17, "elapsed_time": "3:12:17", "remaining_time": "2:30:02"}
|
||||
{"current_steps": 2085, "total_steps": 3703, "loss": 0.225, "lr": 1.9114006402518676e-05, "epoch": 3.941398865784499, "percentage": 56.31, "elapsed_time": "3:12:51", "remaining_time": "2:29:39"}
|
||||
{"current_steps": 2090, "total_steps": 3703, "loss": 0.256, "lr": 1.901982366418985e-05, "epoch": 3.950850661625709, "percentage": 56.44, "elapsed_time": "3:13:20", "remaining_time": "2:29:12"}
|
||||
{"current_steps": 2095, "total_steps": 3703, "loss": 0.2595, "lr": 1.892566270963777e-05, "epoch": 3.9603024574669186, "percentage": 56.58, "elapsed_time": "3:13:51", "remaining_time": "2:28:47"}
|
||||
{"current_steps": 2100, "total_steps": 3703, "loss": 0.2318, "lr": 1.883152563152795e-05, "epoch": 3.9697542533081287, "percentage": 56.71, "elapsed_time": "3:14:17", "remaining_time": "2:28:18"}
|
||||
{"current_steps": 2105, "total_steps": 3703, "loss": 0.2416, "lr": 1.8737414521995268e-05, "epoch": 3.9792060491493384, "percentage": 56.85, "elapsed_time": "3:14:41", "remaining_time": "2:27:47"}
|
||||
{"current_steps": 2110, "total_steps": 3703, "loss": 0.2284, "lr": 1.8643331472597445e-05, "epoch": 3.988657844990548, "percentage": 56.98, "elapsed_time": "3:15:09", "remaining_time": "2:27:20"}
|
||||
{"current_steps": 2115, "total_steps": 3703, "loss": 0.206, "lr": 1.8549278574268618e-05, "epoch": 3.998109640831758, "percentage": 57.12, "elapsed_time": "3:15:40", "remaining_time": "2:26:55"}
|
||||
{"current_steps": 2120, "total_steps": 3703, "loss": 0.2085, "lr": 1.8455257917272814e-05, "epoch": 4.007561436672968, "percentage": 57.25, "elapsed_time": "3:16:17", "remaining_time": "2:26:34"}
|
||||
{"current_steps": 2125, "total_steps": 3703, "loss": 0.2144, "lr": 1.836127159115752e-05, "epoch": 4.017013232514178, "percentage": 57.39, "elapsed_time": "3:16:41", "remaining_time": "2:26:03"}
|
||||
{"current_steps": 2130, "total_steps": 3703, "loss": 0.2265, "lr": 1.8267321684707246e-05, "epoch": 4.026465028355387, "percentage": 57.52, "elapsed_time": "3:17:16", "remaining_time": "2:25:41"}
|
||||
{"current_steps": 2135, "total_steps": 3703, "loss": 0.2334, "lr": 1.817341028589709e-05, "epoch": 4.0359168241965975, "percentage": 57.66, "elapsed_time": "3:17:41", "remaining_time": "2:25:11"}
|
||||
{"current_steps": 2140, "total_steps": 3703, "loss": 0.1976, "lr": 1.8079539481846366e-05, "epoch": 4.045368620037808, "percentage": 57.79, "elapsed_time": "3:18:12", "remaining_time": "2:24:45"}
|
||||
{"current_steps": 2145, "total_steps": 3703, "loss": 0.2189, "lr": 1.7985711358772165e-05, "epoch": 4.054820415879017, "percentage": 57.93, "elapsed_time": "3:18:39", "remaining_time": "2:24:17"}
|
||||
{"current_steps": 2150, "total_steps": 3703, "loss": 0.1999, "lr": 1.789192800194305e-05, "epoch": 4.064272211720227, "percentage": 58.06, "elapsed_time": "3:19:05", "remaining_time": "2:23:48"}
|
||||
{"current_steps": 2155, "total_steps": 3703, "loss": 0.2101, "lr": 1.7798191495632656e-05, "epoch": 4.073724007561436, "percentage": 58.2, "elapsed_time": "3:19:25", "remaining_time": "2:23:15"}
|
||||
{"current_steps": 2160, "total_steps": 3703, "loss": 0.2206, "lr": 1.7704503923073414e-05, "epoch": 4.083175803402646, "percentage": 58.33, "elapsed_time": "3:19:52", "remaining_time": "2:22:46"}
|
||||
{"current_steps": 2165, "total_steps": 3703, "loss": 0.2189, "lr": 1.7610867366410228e-05, "epoch": 4.0926275992438566, "percentage": 58.47, "elapsed_time": "3:20:20", "remaining_time": "2:22:19"}
|
||||
{"current_steps": 2170, "total_steps": 3703, "loss": 0.2161, "lr": 1.751728390665422e-05, "epoch": 4.102079395085066, "percentage": 58.6, "elapsed_time": "3:20:44", "remaining_time": "2:21:48"}
|
||||
{"current_steps": 2175, "total_steps": 3703, "loss": 0.2034, "lr": 1.742375562363645e-05, "epoch": 4.111531190926276, "percentage": 58.74, "elapsed_time": "3:21:11", "remaining_time": "2:21:20"}
|
||||
{"current_steps": 2180, "total_steps": 3703, "loss": 0.225, "lr": 1.7330284595961714e-05, "epoch": 4.120982986767486, "percentage": 58.87, "elapsed_time": "3:21:36", "remaining_time": "2:20:50"}
|
||||
{"current_steps": 2185, "total_steps": 3703, "loss": 0.2109, "lr": 1.7236872900962364e-05, "epoch": 4.130434782608695, "percentage": 59.01, "elapsed_time": "3:22:06", "remaining_time": "2:20:24"}
|
||||
{"current_steps": 2190, "total_steps": 3703, "loss": 0.1925, "lr": 1.7143522614652087e-05, "epoch": 4.1398865784499055, "percentage": 59.14, "elapsed_time": "3:22:30", "remaining_time": "2:19:54"}
|
||||
{"current_steps": 2195, "total_steps": 3703, "loss": 0.2226, "lr": 1.7050235811679842e-05, "epoch": 4.149338374291116, "percentage": 59.28, "elapsed_time": "3:22:56", "remaining_time": "2:19:25"}
|
||||
{"current_steps": 2200, "total_steps": 3703, "loss": 0.2148, "lr": 1.6957014565283686e-05, "epoch": 4.158790170132325, "percentage": 59.41, "elapsed_time": "3:23:18", "remaining_time": "2:18:53"}
|
||||
{"current_steps": 2205, "total_steps": 3703, "loss": 0.2146, "lr": 1.6863860947244734e-05, "epoch": 4.168241965973535, "percentage": 59.55, "elapsed_time": "3:23:49", "remaining_time": "2:18:28"}
|
||||
{"current_steps": 2210, "total_steps": 3703, "loss": 0.2071, "lr": 1.67707770278411e-05, "epoch": 4.177693761814745, "percentage": 59.68, "elapsed_time": "3:24:20", "remaining_time": "2:18:03"}
|
||||
{"current_steps": 2215, "total_steps": 3703, "loss": 0.2545, "lr": 1.6677764875801896e-05, "epoch": 4.187145557655954, "percentage": 59.82, "elapsed_time": "3:24:46", "remaining_time": "2:17:33"}
|
||||
{"current_steps": 2220, "total_steps": 3703, "loss": 0.2367, "lr": 1.658482655826125e-05, "epoch": 4.196597353497165, "percentage": 59.95, "elapsed_time": "3:25:19", "remaining_time": "2:17:09"}
|
||||
{"current_steps": 2225, "total_steps": 3703, "loss": 0.1956, "lr": 1.649196414071237e-05, "epoch": 4.206049149338375, "percentage": 60.09, "elapsed_time": "3:25:45", "remaining_time": "2:16:41"}
|
||||
{"current_steps": 2230, "total_steps": 3703, "loss": 0.2054, "lr": 1.6399179686961626e-05, "epoch": 4.215500945179584, "percentage": 60.22, "elapsed_time": "3:26:12", "remaining_time": "2:16:12"}
|
||||
{"current_steps": 2235, "total_steps": 3703, "loss": 0.2347, "lr": 1.630647525908271e-05, "epoch": 4.224952741020794, "percentage": 60.36, "elapsed_time": "3:26:44", "remaining_time": "2:15:47"}
|
||||
{"current_steps": 2240, "total_steps": 3703, "loss": 0.1981, "lr": 1.621385291737076e-05, "epoch": 4.234404536862003, "percentage": 60.49, "elapsed_time": "3:27:10", "remaining_time": "2:15:18"}
|
||||
{"current_steps": 2245, "total_steps": 3703, "loss": 0.2479, "lr": 1.6121314720296655e-05, "epoch": 4.2438563327032135, "percentage": 60.63, "elapsed_time": "3:27:43", "remaining_time": "2:14:54"}
|
||||
{"current_steps": 2250, "total_steps": 3703, "loss": 0.2164, "lr": 1.6028862724461162e-05, "epoch": 4.253308128544424, "percentage": 60.76, "elapsed_time": "3:28:10", "remaining_time": "2:14:26"}
|
||||
{"current_steps": 2255, "total_steps": 3703, "loss": 0.1914, "lr": 1.593649898454932e-05, "epoch": 4.262759924385633, "percentage": 60.9, "elapsed_time": "3:28:28", "remaining_time": "2:13:52"}
|
||||
{"current_steps": 2260, "total_steps": 3703, "loss": 0.2273, "lr": 1.5844225553284708e-05, "epoch": 4.272211720226843, "percentage": 61.03, "elapsed_time": "3:29:02", "remaining_time": "2:13:28"}
|
||||
{"current_steps": 2265, "total_steps": 3703, "loss": 0.2004, "lr": 1.5752044481383875e-05, "epoch": 4.281663516068053, "percentage": 61.17, "elapsed_time": "3:29:30", "remaining_time": "2:13:01"}
|
||||
{"current_steps": 2270, "total_steps": 3703, "loss": 0.2397, "lr": 1.565995781751073e-05, "epoch": 4.291115311909262, "percentage": 61.3, "elapsed_time": "3:29:59", "remaining_time": "2:12:33"}
|
||||
{"current_steps": 2275, "total_steps": 3703, "loss": 0.1872, "lr": 1.556796760823105e-05, "epoch": 4.300567107750473, "percentage": 61.44, "elapsed_time": "3:30:27", "remaining_time": "2:12:05"}
|
||||
{"current_steps": 2280, "total_steps": 3703, "loss": 0.2391, "lr": 1.5476075897966943e-05, "epoch": 4.310018903591683, "percentage": 61.57, "elapsed_time": "3:31:00", "remaining_time": "2:11:41"}
|
||||
{"current_steps": 2285, "total_steps": 3703, "loss": 0.1983, "lr": 1.538428472895145e-05, "epoch": 4.319470699432892, "percentage": 61.71, "elapsed_time": "3:31:31", "remaining_time": "2:11:15"}
|
||||
{"current_steps": 2290, "total_steps": 3703, "loss": 0.219, "lr": 1.5292596141183156e-05, "epoch": 4.328922495274102, "percentage": 61.84, "elapsed_time": "3:32:02", "remaining_time": "2:10:49"}
|
||||
{"current_steps": 2295, "total_steps": 3703, "loss": 0.2057, "lr": 1.5201012172380834e-05, "epoch": 4.338374291115312, "percentage": 61.98, "elapsed_time": "3:32:25", "remaining_time": "2:10:19"}
|
||||
{"current_steps": 2300, "total_steps": 3703, "loss": 0.1857, "lr": 1.5109534857938181e-05, "epoch": 4.3478260869565215, "percentage": 62.11, "elapsed_time": "3:32:52", "remaining_time": "2:09:51"}
|
||||
{"current_steps": 2305, "total_steps": 3703, "loss": 0.215, "lr": 1.501816623087857e-05, "epoch": 4.357277882797732, "percentage": 62.25, "elapsed_time": "3:33:18", "remaining_time": "2:09:22"}
|
||||
{"current_steps": 2310, "total_steps": 3703, "loss": 0.1988, "lr": 1.4926908321809856e-05, "epoch": 4.366729678638942, "percentage": 62.38, "elapsed_time": "3:33:44", "remaining_time": "2:08:53"}
|
||||
{"current_steps": 2315, "total_steps": 3703, "loss": 0.2363, "lr": 1.4835763158879264e-05, "epoch": 4.376181474480151, "percentage": 62.52, "elapsed_time": "3:34:13", "remaining_time": "2:08:26"}
|
||||
{"current_steps": 2320, "total_steps": 3703, "loss": 0.2097, "lr": 1.474473276772831e-05, "epoch": 4.385633270321361, "percentage": 62.65, "elapsed_time": "3:34:41", "remaining_time": "2:07:59"}
|
||||
{"current_steps": 2325, "total_steps": 3703, "loss": 0.1913, "lr": 1.4653819171447802e-05, "epoch": 4.39508506616257, "percentage": 62.79, "elapsed_time": "3:35:03", "remaining_time": "2:07:27"}
|
||||
{"current_steps": 2330, "total_steps": 3703, "loss": 0.2307, "lr": 1.4563024390532828e-05, "epoch": 4.404536862003781, "percentage": 62.92, "elapsed_time": "3:35:32", "remaining_time": "2:07:00"}
|
||||
{"current_steps": 2335, "total_steps": 3703, "loss": 0.2239, "lr": 1.4472350442837892e-05, "epoch": 4.413988657844991, "percentage": 63.06, "elapsed_time": "3:36:01", "remaining_time": "2:06:33"}
|
||||
{"current_steps": 2340, "total_steps": 3703, "loss": 0.2013, "lr": 1.4381799343532073e-05, "epoch": 4.4234404536862, "percentage": 63.19, "elapsed_time": "3:36:22", "remaining_time": "2:06:02"}
|
||||
{"current_steps": 2345, "total_steps": 3703, "loss": 0.2242, "lr": 1.4291373105054201e-05, "epoch": 4.43289224952741, "percentage": 63.33, "elapsed_time": "3:36:46", "remaining_time": "2:05:32"}
|
||||
{"current_steps": 2350, "total_steps": 3703, "loss": 0.246, "lr": 1.4201073737068182e-05, "epoch": 4.44234404536862, "percentage": 63.46, "elapsed_time": "3:37:17", "remaining_time": "2:05:06"}
|
||||
{"current_steps": 2355, "total_steps": 3703, "loss": 0.2328, "lr": 1.411090324641829e-05, "epoch": 4.4517958412098295, "percentage": 63.6, "elapsed_time": "3:37:37", "remaining_time": "2:04:34"}
|
||||
{"current_steps": 2360, "total_steps": 3703, "loss": 0.209, "lr": 1.4020863637084597e-05, "epoch": 4.46124763705104, "percentage": 63.73, "elapsed_time": "3:38:01", "remaining_time": "2:04:04"}
|
||||
{"current_steps": 2365, "total_steps": 3703, "loss": 0.2209, "lr": 1.3930956910138407e-05, "epoch": 4.47069943289225, "percentage": 63.87, "elapsed_time": "3:38:26", "remaining_time": "2:03:35"}
|
||||
{"current_steps": 2370, "total_steps": 3703, "loss": 0.1861, "lr": 1.3841185063697817e-05, "epoch": 4.480151228733459, "percentage": 64.0, "elapsed_time": "3:38:51", "remaining_time": "2:03:06"}
|
||||
{"current_steps": 2375, "total_steps": 3703, "loss": 0.204, "lr": 1.3751550092883275e-05, "epoch": 4.489603024574669, "percentage": 64.14, "elapsed_time": "3:39:30", "remaining_time": "2:02:44"}
|
||||
{"current_steps": 2380, "total_steps": 3703, "loss": 0.2515, "lr": 1.366205398977329e-05, "epoch": 4.499054820415879, "percentage": 64.27, "elapsed_time": "3:40:05", "remaining_time": "2:02:20"}
|
||||
{"current_steps": 2385, "total_steps": 3703, "loss": 0.2277, "lr": 1.3572698743360086e-05, "epoch": 4.508506616257089, "percentage": 64.41, "elapsed_time": "3:40:33", "remaining_time": "2:01:53"}
|
||||
{"current_steps": 2390, "total_steps": 3703, "loss": 0.1996, "lr": 1.3483486339505476e-05, "epoch": 4.517958412098299, "percentage": 64.54, "elapsed_time": "3:41:02", "remaining_time": "2:01:26"}
|
||||
{"current_steps": 2395, "total_steps": 3703, "loss": 0.2138, "lr": 1.3394418760896665e-05, "epoch": 4.527410207939509, "percentage": 64.68, "elapsed_time": "3:41:27", "remaining_time": "2:00:56"}
|
||||
{"current_steps": 2400, "total_steps": 3703, "loss": 0.1978, "lr": 1.3305497987002214e-05, "epoch": 4.536862003780718, "percentage": 64.81, "elapsed_time": "3:41:57", "remaining_time": "2:00:30"}
|
||||
{"current_steps": 2405, "total_steps": 3703, "loss": 0.2072, "lr": 1.3216725994028065e-05, "epoch": 4.546313799621928, "percentage": 64.95, "elapsed_time": "3:42:25", "remaining_time": "2:00:02"}
|
||||
{"current_steps": 2410, "total_steps": 3703, "loss": 0.2396, "lr": 1.3128104754873592e-05, "epoch": 4.5557655954631375, "percentage": 65.08, "elapsed_time": "3:42:52", "remaining_time": "1:59:34"}
|
||||
{"current_steps": 2415, "total_steps": 3703, "loss": 0.2381, "lr": 1.3039636239087751e-05, "epoch": 4.565217391304348, "percentage": 65.22, "elapsed_time": "3:43:22", "remaining_time": "1:59:07"}
|
||||
{"current_steps": 2420, "total_steps": 3703, "loss": 0.2231, "lr": 1.2951322412825333e-05, "epoch": 4.574669187145558, "percentage": 65.35, "elapsed_time": "3:43:46", "remaining_time": "1:58:38"}
|
||||
{"current_steps": 2425, "total_steps": 3703, "loss": 0.2424, "lr": 1.2863165238803252e-05, "epoch": 4.584120982986768, "percentage": 65.49, "elapsed_time": "3:44:14", "remaining_time": "1:58:10"}
|
||||
{"current_steps": 2430, "total_steps": 3703, "loss": 0.2144, "lr": 1.2775166676256942e-05, "epoch": 4.593572778827977, "percentage": 65.62, "elapsed_time": "3:44:48", "remaining_time": "1:57:46"}
|
||||
{"current_steps": 2435, "total_steps": 3703, "loss": 0.2077, "lr": 1.2687328680896784e-05, "epoch": 4.603024574669187, "percentage": 65.76, "elapsed_time": "3:45:11", "remaining_time": "1:57:15"}
|
||||
{"current_steps": 2440, "total_steps": 3703, "loss": 0.2225, "lr": 1.2599653204864656e-05, "epoch": 4.612476370510397, "percentage": 65.89, "elapsed_time": "3:45:40", "remaining_time": "1:56:48"}
|
||||
{"current_steps": 2445, "total_steps": 3703, "loss": 0.2215, "lr": 1.2512142196690573e-05, "epoch": 4.621928166351607, "percentage": 66.03, "elapsed_time": "3:46:12", "remaining_time": "1:56:23"}
|
||||
{"current_steps": 2450, "total_steps": 3703, "loss": 0.2174, "lr": 1.2424797601249328e-05, "epoch": 4.631379962192817, "percentage": 66.16, "elapsed_time": "3:46:44", "remaining_time": "1:55:57"}
|
||||
{"current_steps": 2455, "total_steps": 3703, "loss": 0.2329, "lr": 1.2337621359717333e-05, "epoch": 4.640831758034026, "percentage": 66.3, "elapsed_time": "3:47:13", "remaining_time": "1:55:30"}
|
||||
{"current_steps": 2460, "total_steps": 3703, "loss": 0.2015, "lr": 1.2250615409529427e-05, "epoch": 4.650283553875236, "percentage": 66.43, "elapsed_time": "3:47:44", "remaining_time": "1:55:04"}
|
||||
{"current_steps": 2465, "total_steps": 3703, "loss": 0.2199, "lr": 1.2163781684335831e-05, "epoch": 4.659735349716446, "percentage": 66.57, "elapsed_time": "3:48:17", "remaining_time": "1:54:39"}
|
||||
{"current_steps": 2470, "total_steps": 3703, "loss": 0.2228, "lr": 1.2077122113959186e-05, "epoch": 4.669187145557656, "percentage": 66.7, "elapsed_time": "3:48:49", "remaining_time": "1:54:13"}
|
||||
{"current_steps": 2475, "total_steps": 3703, "loss": 0.2223, "lr": 1.1990638624351659e-05, "epoch": 4.678638941398866, "percentage": 66.84, "elapsed_time": "3:49:13", "remaining_time": "1:53:44"}
|
||||
{"current_steps": 2480, "total_steps": 3703, "loss": 0.2061, "lr": 1.1904333137552124e-05, "epoch": 4.688090737240076, "percentage": 66.97, "elapsed_time": "3:49:42", "remaining_time": "1:53:16"}
|
||||
{"current_steps": 2485, "total_steps": 3703, "loss": 0.2031, "lr": 1.1818207571643484e-05, "epoch": 4.697542533081285, "percentage": 67.11, "elapsed_time": "3:50:01", "remaining_time": "1:52:44"}
|
||||
{"current_steps": 2490, "total_steps": 3703, "loss": 0.2243, "lr": 1.173226384070999e-05, "epoch": 4.706994328922495, "percentage": 67.24, "elapsed_time": "3:50:22", "remaining_time": "1:52:13"}
|
||||
{"current_steps": 2495, "total_steps": 3703, "loss": 0.1869, "lr": 1.1646503854794746e-05, "epoch": 4.716446124763705, "percentage": 67.38, "elapsed_time": "3:50:46", "remaining_time": "1:51:44"}
|
||||
{"current_steps": 2500, "total_steps": 3703, "loss": 0.1948, "lr": 1.1560929519857246e-05, "epoch": 4.725897920604915, "percentage": 67.51, "elapsed_time": "3:51:14", "remaining_time": "1:51:16"}
|
||||
{"current_steps": 2505, "total_steps": 3703, "loss": 0.2109, "lr": 1.1475542737730998e-05, "epoch": 4.735349716446125, "percentage": 67.65, "elapsed_time": "3:51:43", "remaining_time": "1:50:49"}
|
||||
{"current_steps": 2510, "total_steps": 3703, "loss": 0.2361, "lr": 1.1390345406081286e-05, "epoch": 4.744801512287335, "percentage": 67.78, "elapsed_time": "3:52:14", "remaining_time": "1:50:23"}
|
||||
{"current_steps": 2515, "total_steps": 3703, "loss": 0.2144, "lr": 1.1305339418362978e-05, "epoch": 4.754253308128544, "percentage": 67.92, "elapsed_time": "3:52:37", "remaining_time": "1:49:53"}
|
||||
{"current_steps": 2520, "total_steps": 3703, "loss": 0.206, "lr": 1.1220526663778441e-05, "epoch": 4.763705103969754, "percentage": 68.05, "elapsed_time": "3:52:59", "remaining_time": "1:49:22"}
|
||||
{"current_steps": 2525, "total_steps": 3703, "loss": 0.1953, "lr": 1.113590902723557e-05, "epoch": 4.773156899810964, "percentage": 68.19, "elapsed_time": "3:53:28", "remaining_time": "1:48:55"}
|
||||
{"current_steps": 2530, "total_steps": 3703, "loss": 0.186, "lr": 1.1051488389305875e-05, "epoch": 4.782608695652174, "percentage": 68.32, "elapsed_time": "3:53:51", "remaining_time": "1:48:25"}
|
||||
{"current_steps": 2535, "total_steps": 3703, "loss": 0.1978, "lr": 1.0967266626182726e-05, "epoch": 4.792060491493384, "percentage": 68.46, "elapsed_time": "3:54:21", "remaining_time": "1:47:58"}
|
||||
{"current_steps": 2540, "total_steps": 3703, "loss": 0.209, "lr": 1.0883245609639622e-05, "epoch": 4.801512287334593, "percentage": 68.59, "elapsed_time": "3:54:47", "remaining_time": "1:47:30"}
|
||||
{"current_steps": 2545, "total_steps": 3703, "loss": 0.2604, "lr": 1.0799427206988588e-05, "epoch": 4.810964083175803, "percentage": 68.73, "elapsed_time": "3:55:13", "remaining_time": "1:47:01"}
|
||||
{"current_steps": 2550, "total_steps": 3703, "loss": 0.2167, "lr": 1.0715813281038697e-05, "epoch": 4.8204158790170135, "percentage": 68.86, "elapsed_time": "3:55:43", "remaining_time": "1:46:35"}
|
||||
{"current_steps": 2555, "total_steps": 3703, "loss": 0.2151, "lr": 1.0632405690054652e-05, "epoch": 4.829867674858223, "percentage": 69.0, "elapsed_time": "3:56:12", "remaining_time": "1:46:07"}
|
||||
{"current_steps": 2560, "total_steps": 3703, "loss": 0.2222, "lr": 1.0549206287715524e-05, "epoch": 4.839319470699433, "percentage": 69.13, "elapsed_time": "3:56:41", "remaining_time": "1:45:40"}
|
||||
{"current_steps": 2565, "total_steps": 3703, "loss": 0.1915, "lr": 1.0466216923073497e-05, "epoch": 4.848771266540643, "percentage": 69.27, "elapsed_time": "3:57:06", "remaining_time": "1:45:11"}
|
||||
{"current_steps": 2570, "total_steps": 3703, "loss": 0.2012, "lr": 1.0383439440512814e-05, "epoch": 4.858223062381852, "percentage": 69.4, "elapsed_time": "3:57:32", "remaining_time": "1:44:43"}
|
||||
{"current_steps": 2575, "total_steps": 3703, "loss": 0.1986, "lr": 1.030087567970879e-05, "epoch": 4.8676748582230625, "percentage": 69.54, "elapsed_time": "3:58:06", "remaining_time": "1:44:18"}
|
||||
{"current_steps": 2580, "total_steps": 3703, "loss": 0.2214, "lr": 1.0218527475586902e-05, "epoch": 4.877126654064273, "percentage": 69.67, "elapsed_time": "3:58:31", "remaining_time": "1:43:49"}
|
||||
{"current_steps": 2585, "total_steps": 3703, "loss": 0.2011, "lr": 1.013639665828201e-05, "epoch": 4.886578449905482, "percentage": 69.81, "elapsed_time": "3:59:01", "remaining_time": "1:43:22"}
|
||||
{"current_steps": 2590, "total_steps": 3703, "loss": 0.2028, "lr": 1.0054485053097731e-05, "epoch": 4.896030245746692, "percentage": 69.94, "elapsed_time": "3:59:19", "remaining_time": "1:42:50"}
|
||||
{"current_steps": 2595, "total_steps": 3703, "loss": 0.2148, "lr": 9.972794480465798e-06, "epoch": 4.905482041587902, "percentage": 70.08, "elapsed_time": "3:59:51", "remaining_time": "1:42:24"}
|
||||
{"current_steps": 2600, "total_steps": 3703, "loss": 0.1968, "lr": 9.891326755905652e-06, "epoch": 4.914933837429111, "percentage": 70.21, "elapsed_time": "4:00:16", "remaining_time": "1:41:55"}
|
||||
{"current_steps": 2605, "total_steps": 3703, "loss": 0.2232, "lr": 9.8100836899841e-06, "epoch": 4.9243856332703215, "percentage": 70.35, "elapsed_time": "4:00:39", "remaining_time": "1:41:26"}
|
||||
{"current_steps": 2610, "total_steps": 3703, "loss": 0.1794, "lr": 9.729067088275025e-06, "epoch": 4.933837429111531, "percentage": 70.48, "elapsed_time": "4:01:00", "remaining_time": "1:40:55"}
|
||||
{"current_steps": 2615, "total_steps": 3703, "loss": 0.2426, "lr": 9.648278751319329e-06, "epoch": 4.943289224952741, "percentage": 70.62, "elapsed_time": "4:01:26", "remaining_time": "1:40:27"}
|
||||
{"current_steps": 2620, "total_steps": 3703, "loss": 0.2274, "lr": 9.56772047458485e-06, "epoch": 4.952741020793951, "percentage": 70.75, "elapsed_time": "4:01:54", "remaining_time": "1:39:59"}
|
||||
{"current_steps": 2625, "total_steps": 3703, "loss": 0.2307, "lr": 9.487394048426497e-06, "epoch": 4.96219281663516, "percentage": 70.89, "elapsed_time": "4:02:19", "remaining_time": "1:39:30"}
|
||||
{"current_steps": 2630, "total_steps": 3703, "loss": 0.2005, "lr": 9.407301258046454e-06, "epoch": 4.9716446124763705, "percentage": 71.02, "elapsed_time": "4:02:47", "remaining_time": "1:39:03"}
|
||||
{"current_steps": 2635, "total_steps": 3703, "loss": 0.2081, "lr": 9.327443883454499e-06, "epoch": 4.981096408317581, "percentage": 71.16, "elapsed_time": "4:03:12", "remaining_time": "1:38:34"}
|
||||
{"current_steps": 2640, "total_steps": 3703, "loss": 0.1962, "lr": 9.247823699428452e-06, "epoch": 4.99054820415879, "percentage": 71.29, "elapsed_time": "4:03:46", "remaining_time": "1:38:09"}
|
||||
{"current_steps": 2645, "total_steps": 3703, "loss": 0.1961, "lr": 9.168442475474737e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "4:04:12", "remaining_time": "1:37:41"}
|
||||
{"current_steps": 2650, "total_steps": 3703, "loss": 0.1929, "lr": 9.089301975789029e-06, "epoch": 5.00945179584121, "percentage": 71.56, "elapsed_time": "4:04:39", "remaining_time": "1:37:13"}
|
||||
{"current_steps": 2655, "total_steps": 3703, "loss": 0.2022, "lr": 9.010403959217078e-06, "epoch": 5.018903591682419, "percentage": 71.7, "elapsed_time": "4:05:05", "remaining_time": "1:36:44"}
|
||||
{"current_steps": 2660, "total_steps": 3703, "loss": 0.1939, "lr": 8.931750179215586e-06, "epoch": 5.0283553875236295, "percentage": 71.83, "elapsed_time": "4:05:30", "remaining_time": "1:36:15"}
|
||||
{"current_steps": 2665, "total_steps": 3703, "loss": 0.1844, "lr": 8.853342383813289e-06, "epoch": 5.03780718336484, "percentage": 71.97, "elapsed_time": "4:06:03", "remaining_time": "1:35:50"}
|
||||
{"current_steps": 2670, "total_steps": 3703, "loss": 0.1844, "lr": 8.775182315572044e-06, "epoch": 5.047258979206049, "percentage": 72.1, "elapsed_time": "4:06:28", "remaining_time": "1:35:21"}
|
||||
{"current_steps": 2675, "total_steps": 3703, "loss": 0.1982, "lr": 8.697271711548163e-06, "epoch": 5.056710775047259, "percentage": 72.24, "elapsed_time": "4:06:49", "remaining_time": "1:34:51"}
|
||||
{"current_steps": 2680, "total_steps": 3703, "loss": 0.1875, "lr": 8.619612303253759e-06, "epoch": 5.066162570888469, "percentage": 72.37, "elapsed_time": "4:07:15", "remaining_time": "1:34:22"}
|
||||
{"current_steps": 2685, "total_steps": 3703, "loss": 0.1957, "lr": 8.54220581661829e-06, "epoch": 5.0756143667296785, "percentage": 72.51, "elapsed_time": "4:07:40", "remaining_time": "1:33:54"}
|
||||
{"current_steps": 2690, "total_steps": 3703, "loss": 0.2109, "lr": 8.465053971950188e-06, "epoch": 5.085066162570889, "percentage": 72.64, "elapsed_time": "4:08:10", "remaining_time": "1:33:27"}
|
||||
{"current_steps": 2695, "total_steps": 3703, "loss": 0.1924, "lr": 8.388158483898661e-06, "epoch": 5.094517958412098, "percentage": 72.78, "elapsed_time": "4:08:38", "remaining_time": "1:32:59"}
|
||||
{"current_steps": 2700, "total_steps": 3703, "loss": 0.1792, "lr": 8.31152106141553e-06, "epoch": 5.103969754253308, "percentage": 72.91, "elapsed_time": "4:09:04", "remaining_time": "1:32:31"}
|
||||
{"current_steps": 2705, "total_steps": 3703, "loss": 0.1846, "lr": 8.235143407717282e-06, "epoch": 5.113421550094518, "percentage": 73.05, "elapsed_time": "4:09:23", "remaining_time": "1:32:00"}
|
||||
{"current_steps": 2710, "total_steps": 3703, "loss": 0.2048, "lr": 8.159027220247238e-06, "epoch": 5.122873345935727, "percentage": 73.18, "elapsed_time": "4:09:47", "remaining_time": "1:31:31"}
|
||||
{"current_steps": 2715, "total_steps": 3703, "loss": 0.2026, "lr": 8.083174190637766e-06, "epoch": 5.1323251417769375, "percentage": 73.32, "elapsed_time": "4:10:13", "remaining_time": "1:31:03"}
|
||||
{"current_steps": 2720, "total_steps": 3703, "loss": 0.2025, "lr": 8.00758600467276e-06, "epoch": 5.141776937618148, "percentage": 73.45, "elapsed_time": "4:10:39", "remaining_time": "1:30:35"}
|
||||
{"current_steps": 2725, "total_steps": 3703, "loss": 0.202, "lr": 7.932264342250112e-06, "epoch": 5.151228733459357, "percentage": 73.59, "elapsed_time": "4:11:04", "remaining_time": "1:30:06"}
|
||||
{"current_steps": 2730, "total_steps": 3703, "loss": 0.2005, "lr": 7.857210877344405e-06, "epoch": 5.160680529300567, "percentage": 73.72, "elapsed_time": "4:11:32", "remaining_time": "1:29:39"}
|
||||
{"current_steps": 2735, "total_steps": 3703, "loss": 0.198, "lr": 7.782427277969715e-06, "epoch": 5.170132325141777, "percentage": 73.86, "elapsed_time": "4:12:02", "remaining_time": "1:29:12"}
|
||||
{"current_steps": 2740, "total_steps": 3703, "loss": 0.1971, "lr": 7.707915206142536e-06, "epoch": 5.1795841209829865, "percentage": 73.99, "elapsed_time": "4:12:27", "remaining_time": "1:28:43"}
|
||||
{"current_steps": 2745, "total_steps": 3703, "loss": 0.1982, "lr": 7.63367631784484e-06, "epoch": 5.189035916824197, "percentage": 74.13, "elapsed_time": "4:12:53", "remaining_time": "1:28:15"}
|
||||
{"current_steps": 2750, "total_steps": 3703, "loss": 0.207, "lr": 7.559712262987269e-06, "epoch": 5.198487712665407, "percentage": 74.26, "elapsed_time": "4:13:18", "remaining_time": "1:27:46"}
|
||||
{"current_steps": 2755, "total_steps": 3703, "loss": 0.179, "lr": 7.486024685372468e-06, "epoch": 5.207939508506616, "percentage": 74.4, "elapsed_time": "4:13:47", "remaining_time": "1:27:19"}
|
||||
{"current_steps": 2760, "total_steps": 3703, "loss": 0.2186, "lr": 7.412615222658566e-06, "epoch": 5.217391304347826, "percentage": 74.53, "elapsed_time": "4:14:12", "remaining_time": "1:26:51"}
|
||||
{"current_steps": 2765, "total_steps": 3703, "loss": 0.1769, "lr": 7.339485506322755e-06, "epoch": 5.226843100189036, "percentage": 74.67, "elapsed_time": "4:14:37", "remaining_time": "1:26:22"}
|
||||
{"current_steps": 2770, "total_steps": 3703, "loss": 0.1922, "lr": 7.266637161625074e-06, "epoch": 5.236294896030246, "percentage": 74.8, "elapsed_time": "4:15:00", "remaining_time": "1:25:53"}
|
||||
{"current_steps": 2775, "total_steps": 3703, "loss": 0.2087, "lr": 7.194071807572234e-06, "epoch": 5.245746691871456, "percentage": 74.94, "elapsed_time": "4:15:21", "remaining_time": "1:25:23"}
|
||||
{"current_steps": 2780, "total_steps": 3703, "loss": 0.191, "lr": 7.121791056881688e-06, "epoch": 5.255198487712665, "percentage": 75.07, "elapsed_time": "4:15:50", "remaining_time": "1:24:56"}
|
||||
{"current_steps": 2785, "total_steps": 3703, "loss": 0.2141, "lr": 7.049796515945748e-06, "epoch": 5.264650283553875, "percentage": 75.21, "elapsed_time": "4:16:15", "remaining_time": "1:24:28"}
|
||||
{"current_steps": 2790, "total_steps": 3703, "loss": 0.2339, "lr": 6.9780897847959005e-06, "epoch": 5.274102079395085, "percentage": 75.34, "elapsed_time": "4:16:51", "remaining_time": "1:24:03"}
|
||||
{"current_steps": 2795, "total_steps": 3703, "loss": 0.1958, "lr": 6.906672457067272e-06, "epoch": 5.2835538752362945, "percentage": 75.48, "elapsed_time": "4:17:20", "remaining_time": "1:23:36"}
|
||||
{"current_steps": 2800, "total_steps": 3703, "loss": 0.1937, "lr": 6.835546119963159e-06, "epoch": 5.293005671077505, "percentage": 75.61, "elapsed_time": "4:17:49", "remaining_time": "1:23:08"}
|
||||
{"current_steps": 2805, "total_steps": 3703, "loss": 0.1891, "lr": 6.764712354219798e-06, "epoch": 5.302457466918715, "percentage": 75.75, "elapsed_time": "4:18:18", "remaining_time": "1:22:41"}
|
||||
{"current_steps": 2810, "total_steps": 3703, "loss": 0.2179, "lr": 6.694172734071209e-06, "epoch": 5.311909262759924, "percentage": 75.88, "elapsed_time": "4:18:42", "remaining_time": "1:22:13"}
|
||||
{"current_steps": 2815, "total_steps": 3703, "loss": 0.196, "lr": 6.623928827214234e-06, "epoch": 5.321361058601134, "percentage": 76.02, "elapsed_time": "4:19:11", "remaining_time": "1:21:45"}
|
||||
{"current_steps": 2820, "total_steps": 3703, "loss": 0.1873, "lr": 6.553982194773663e-06, "epoch": 5.330812854442344, "percentage": 76.15, "elapsed_time": "4:19:37", "remaining_time": "1:21:17"}
|
||||
{"current_steps": 2825, "total_steps": 3703, "loss": 0.1772, "lr": 6.4843343912675775e-06, "epoch": 5.340264650283554, "percentage": 76.29, "elapsed_time": "4:19:59", "remaining_time": "1:20:48"}
|
||||
{"current_steps": 2830, "total_steps": 3703, "loss": 0.1887, "lr": 6.4149869645727604e-06, "epoch": 5.349716446124764, "percentage": 76.42, "elapsed_time": "4:20:25", "remaining_time": "1:20:20"}
|
||||
{"current_steps": 2835, "total_steps": 3703, "loss": 0.2267, "lr": 6.34594145589033e-06, "epoch": 5.359168241965974, "percentage": 76.56, "elapsed_time": "4:21:00", "remaining_time": "1:19:54"}
|
||||
{"current_steps": 2840, "total_steps": 3703, "loss": 0.1816, "lr": 6.277199399711462e-06, "epoch": 5.368620037807183, "percentage": 76.69, "elapsed_time": "4:21:18", "remaining_time": "1:19:24"}
|
||||
{"current_steps": 2845, "total_steps": 3703, "loss": 0.1826, "lr": 6.208762323783317e-06, "epoch": 5.378071833648393, "percentage": 76.83, "elapsed_time": "4:21:41", "remaining_time": "1:18:55"}
|
||||
{"current_steps": 2850, "total_steps": 3703, "loss": 0.2102, "lr": 6.140631749075063e-06, "epoch": 5.387523629489603, "percentage": 76.96, "elapsed_time": "4:22:12", "remaining_time": "1:18:28"}
|
||||
{"current_steps": 2855, "total_steps": 3703, "loss": 0.1937, "lr": 6.0728091897440734e-06, "epoch": 5.396975425330813, "percentage": 77.1, "elapsed_time": "4:22:40", "remaining_time": "1:18:01"}
|
||||
{"current_steps": 2860, "total_steps": 3703, "loss": 0.2267, "lr": 6.005296153102285e-06, "epoch": 5.406427221172023, "percentage": 77.23, "elapsed_time": "4:23:07", "remaining_time": "1:17:33"}
|
||||
{"current_steps": 2865, "total_steps": 3703, "loss": 0.2049, "lr": 5.9380941395826926e-06, "epoch": 5.415879017013232, "percentage": 77.37, "elapsed_time": "4:23:36", "remaining_time": "1:17:06"}
|
||||
{"current_steps": 2870, "total_steps": 3703, "loss": 0.2098, "lr": 5.871204642706006e-06, "epoch": 5.425330812854442, "percentage": 77.5, "elapsed_time": "4:24:09", "remaining_time": "1:16:40"}
|
||||
{"current_steps": 2875, "total_steps": 3703, "loss": 0.2075, "lr": 5.8046291490474695e-06, "epoch": 5.434782608695652, "percentage": 77.64, "elapsed_time": "4:24:37", "remaining_time": "1:16:12"}
|
||||
{"current_steps": 2880, "total_steps": 3703, "loss": 0.2016, "lr": 5.73836913820379e-06, "epoch": 5.4442344045368625, "percentage": 77.77, "elapsed_time": "4:25:06", "remaining_time": "1:15:45"}
|
||||
{"current_steps": 2885, "total_steps": 3703, "loss": 0.1898, "lr": 5.672426082760305e-06, "epoch": 5.453686200378072, "percentage": 77.91, "elapsed_time": "4:25:32", "remaining_time": "1:15:17"}
|
||||
{"current_steps": 2890, "total_steps": 3703, "loss": 0.1999, "lr": 5.606801448258199e-06, "epoch": 5.463137996219282, "percentage": 78.04, "elapsed_time": "4:26:01", "remaining_time": "1:14:50"}
|
||||
{"current_steps": 2895, "total_steps": 3703, "loss": 0.2069, "lr": 5.541496693161963e-06, "epoch": 5.472589792060491, "percentage": 78.18, "elapsed_time": "4:26:34", "remaining_time": "1:14:23"}
|
||||
{"current_steps": 2900, "total_steps": 3703, "loss": 0.1963, "lr": 5.4765132688269975e-06, "epoch": 5.482041587901701, "percentage": 78.31, "elapsed_time": "4:27:02", "remaining_time": "1:13:56"}
|
||||
{"current_steps": 2905, "total_steps": 3703, "loss": 0.1777, "lr": 5.411852619467319e-06, "epoch": 5.491493383742911, "percentage": 78.45, "elapsed_time": "4:27:32", "remaining_time": "1:13:29"}
|
||||
{"current_steps": 2910, "total_steps": 3703, "loss": 0.2282, "lr": 5.347516182123482e-06, "epoch": 5.500945179584121, "percentage": 78.58, "elapsed_time": "4:28:05", "remaining_time": "1:13:03"}
|
||||
{"current_steps": 2915, "total_steps": 3703, "loss": 0.2073, "lr": 5.283505386630656e-06, "epoch": 5.510396975425331, "percentage": 78.72, "elapsed_time": "4:28:29", "remaining_time": "1:12:34"}
|
||||
{"current_steps": 2920, "total_steps": 3703, "loss": 0.2282, "lr": 5.219821655586821e-06, "epoch": 5.519848771266541, "percentage": 78.85, "elapsed_time": "4:28:59", "remaining_time": "1:12:07"}
|
||||
{"current_steps": 2925, "total_steps": 3703, "loss": 0.2188, "lr": 5.156466404321159e-06, "epoch": 5.52930056710775, "percentage": 78.99, "elapsed_time": "4:29:27", "remaining_time": "1:11:40"}
|
||||
{"current_steps": 2930, "total_steps": 3703, "loss": 0.1737, "lr": 5.0934410408626235e-06, "epoch": 5.53875236294896, "percentage": 79.13, "elapsed_time": "4:29:55", "remaining_time": "1:11:12"}
|
||||
{"current_steps": 2935, "total_steps": 3703, "loss": 0.1879, "lr": 5.030746965908613e-06, "epoch": 5.5482041587901705, "percentage": 79.26, "elapsed_time": "4:30:23", "remaining_time": "1:10:45"}
|
||||
{"current_steps": 2940, "total_steps": 3703, "loss": 0.1912, "lr": 4.968385572793859e-06, "epoch": 5.55765595463138, "percentage": 79.4, "elapsed_time": "4:30:50", "remaining_time": "1:10:17"}
|
||||
{"current_steps": 2945, "total_steps": 3703, "loss": 0.1902, "lr": 4.906358247459451e-06, "epoch": 5.56710775047259, "percentage": 79.53, "elapsed_time": "4:31:19", "remaining_time": "1:09:50"}
|
||||
{"current_steps": 2950, "total_steps": 3703, "loss": 0.1952, "lr": 4.844666368422055e-06, "epoch": 5.576559546313799, "percentage": 79.67, "elapsed_time": "4:31:47", "remaining_time": "1:09:22"}
|
||||
{"current_steps": 2955, "total_steps": 3703, "loss": 0.1753, "lr": 4.783311306743259e-06, "epoch": 5.586011342155009, "percentage": 79.8, "elapsed_time": "4:32:09", "remaining_time": "1:08:53"}
|
||||
{"current_steps": 2960, "total_steps": 3703, "loss": 0.2245, "lr": 4.722294425999099e-06, "epoch": 5.595463137996219, "percentage": 79.94, "elapsed_time": "4:32:40", "remaining_time": "1:08:26"}
|
||||
{"current_steps": 2965, "total_steps": 3703, "loss": 0.2087, "lr": 4.661617082249765e-06, "epoch": 5.60491493383743, "percentage": 80.07, "elapsed_time": "4:33:08", "remaining_time": "1:07:59"}
|
||||
{"current_steps": 2970, "total_steps": 3703, "loss": 0.2029, "lr": 4.601280624009459e-06, "epoch": 5.614366729678639, "percentage": 80.21, "elapsed_time": "4:33:35", "remaining_time": "1:07:31"}
|
||||
{"current_steps": 2975, "total_steps": 3703, "loss": 0.1703, "lr": 4.541286392216419e-06, "epoch": 5.623818525519849, "percentage": 80.34, "elapsed_time": "4:34:04", "remaining_time": "1:07:03"}
|
||||
{"current_steps": 2980, "total_steps": 3703, "loss": 0.249, "lr": 4.481635720203139e-06, "epoch": 5.633270321361058, "percentage": 80.48, "elapsed_time": "4:34:35", "remaining_time": "1:06:37"}
|
||||
{"current_steps": 2985, "total_steps": 3703, "loss": 0.2176, "lr": 4.4223299336667226e-06, "epoch": 5.642722117202268, "percentage": 80.61, "elapsed_time": "4:34:56", "remaining_time": "1:06:08"}
|
||||
{"current_steps": 2990, "total_steps": 3703, "loss": 0.189, "lr": 4.363370350639405e-06, "epoch": 5.6521739130434785, "percentage": 80.75, "elapsed_time": "4:35:25", "remaining_time": "1:05:40"}
|
||||
{"current_steps": 2995, "total_steps": 3703, "loss": 0.1745, "lr": 4.304758281459283e-06, "epoch": 5.661625708884688, "percentage": 80.88, "elapsed_time": "4:35:51", "remaining_time": "1:05:12"}
|
||||
{"current_steps": 3000, "total_steps": 3703, "loss": 0.2119, "lr": 4.24649502874118e-06, "epoch": 5.671077504725898, "percentage": 81.02, "elapsed_time": "4:36:18", "remaining_time": "1:04:44"}
|
||||
{"current_steps": 3005, "total_steps": 3703, "loss": 0.1985, "lr": 4.1885818873477156e-06, "epoch": 5.680529300567108, "percentage": 81.15, "elapsed_time": "4:36:56", "remaining_time": "1:04:19"}
|
||||
{"current_steps": 3010, "total_steps": 3703, "loss": 0.2073, "lr": 4.131020144360505e-06, "epoch": 5.689981096408317, "percentage": 81.29, "elapsed_time": "4:37:29", "remaining_time": "1:03:53"}
|
||||
{"current_steps": 3015, "total_steps": 3703, "loss": 0.2312, "lr": 4.073811079051557e-06, "epoch": 5.699432892249527, "percentage": 81.42, "elapsed_time": "4:38:05", "remaining_time": "1:03:27"}
|
||||
{"current_steps": 3020, "total_steps": 3703, "loss": 0.1899, "lr": 4.016955962854874e-06, "epoch": 5.708884688090738, "percentage": 81.56, "elapsed_time": "4:38:37", "remaining_time": "1:03:00"}
|
||||
{"current_steps": 3025, "total_steps": 3703, "loss": 0.1957, "lr": 3.9604560593381444e-06, "epoch": 5.718336483931947, "percentage": 81.69, "elapsed_time": "4:39:04", "remaining_time": "1:02:32"}
|
||||
{"current_steps": 3030, "total_steps": 3703, "loss": 0.1896, "lr": 3.9043126241747e-06, "epoch": 5.727788279773157, "percentage": 81.83, "elapsed_time": "4:39:27", "remaining_time": "1:02:04"}
|
||||
{"current_steps": 3035, "total_steps": 3703, "loss": 0.2215, "lr": 3.8485269051156015e-06, "epoch": 5.737240075614367, "percentage": 81.96, "elapsed_time": "4:39:58", "remaining_time": "1:01:37"}
|
||||
{"current_steps": 3040, "total_steps": 3703, "loss": 0.2075, "lr": 3.7931001419618963e-06, "epoch": 5.746691871455576, "percentage": 82.1, "elapsed_time": "4:40:24", "remaining_time": "1:01:09"}
|
||||
{"current_steps": 3045, "total_steps": 3703, "loss": 0.2428, "lr": 3.7380335665370693e-06, "epoch": 5.7561436672967865, "percentage": 82.23, "elapsed_time": "4:40:50", "remaining_time": "1:00:41"}
|
||||
{"current_steps": 3050, "total_steps": 3703, "loss": 0.1806, "lr": 3.6833284026596827e-06, "epoch": 5.765595463137997, "percentage": 82.37, "elapsed_time": "4:41:17", "remaining_time": "1:00:13"}
|
||||
{"current_steps": 3055, "total_steps": 3703, "loss": 0.2117, "lr": 3.6289858661161435e-06, "epoch": 5.775047258979206, "percentage": 82.5, "elapsed_time": "4:41:44", "remaining_time": "0:59:45"}
|
||||
{"current_steps": 3060, "total_steps": 3703, "loss": 0.2012, "lr": 3.5750071646337283e-06, "epoch": 5.784499054820416, "percentage": 82.64, "elapsed_time": "4:42:09", "remaining_time": "0:59:17"}
|
||||
{"current_steps": 3065, "total_steps": 3703, "loss": 0.2178, "lr": 3.5213934978537002e-06, "epoch": 5.793950850661625, "percentage": 82.77, "elapsed_time": "4:42:40", "remaining_time": "0:58:50"}
|
||||
{"current_steps": 3070, "total_steps": 3703, "loss": 0.2083, "lr": 3.4681460573046667e-06, "epoch": 5.803402646502835, "percentage": 82.91, "elapsed_time": "4:43:09", "remaining_time": "0:58:23"}
|
||||
{"current_steps": 3075, "total_steps": 3703, "loss": 0.2247, "lr": 3.415266026376105e-06, "epoch": 5.812854442344046, "percentage": 83.04, "elapsed_time": "4:43:33", "remaining_time": "0:57:54"}
|
||||
{"current_steps": 3080, "total_steps": 3703, "loss": 0.2085, "lr": 3.3627545802920402e-06, "epoch": 5.822306238185255, "percentage": 83.18, "elapsed_time": "4:44:01", "remaining_time": "0:57:27"}
|
||||
{"current_steps": 3085, "total_steps": 3703, "loss": 0.1845, "lr": 3.310612886084961e-06, "epoch": 5.831758034026465, "percentage": 83.31, "elapsed_time": "4:44:25", "remaining_time": "0:56:58"}
|
||||
{"current_steps": 3090, "total_steps": 3703, "loss": 0.2077, "lr": 3.2588421025698525e-06, "epoch": 5.841209829867675, "percentage": 83.45, "elapsed_time": "4:44:58", "remaining_time": "0:56:32"}
|
||||
{"current_steps": 3095, "total_steps": 3703, "loss": 0.2042, "lr": 3.207443380318449e-06, "epoch": 5.850661625708884, "percentage": 83.58, "elapsed_time": "4:45:16", "remaining_time": "0:56:02"}
|
||||
{"current_steps": 3100, "total_steps": 3703, "loss": 0.2288, "lr": 3.1564178616336737e-06, "epoch": 5.8601134215500945, "percentage": 83.72, "elapsed_time": "4:45:44", "remaining_time": "0:55:34"}
|
||||
{"current_steps": 3105, "total_steps": 3703, "loss": 0.2253, "lr": 3.1057666805242336e-06, "epoch": 5.869565217391305, "percentage": 83.85, "elapsed_time": "4:46:17", "remaining_time": "0:55:08"}
|
||||
{"current_steps": 3110, "total_steps": 3703, "loss": 0.2272, "lr": 3.055490962679448e-06, "epoch": 5.879017013232514, "percentage": 83.99, "elapsed_time": "4:46:51", "remaining_time": "0:54:41"}
|
||||
{"current_steps": 3115, "total_steps": 3703, "loss": 0.2031, "lr": 3.005591825444194e-06, "epoch": 5.888468809073724, "percentage": 84.12, "elapsed_time": "4:47:12", "remaining_time": "0:54:12"}
|
||||
{"current_steps": 3120, "total_steps": 3703, "loss": 0.1954, "lr": 2.956070377794096e-06, "epoch": 5.897920604914934, "percentage": 84.26, "elapsed_time": "4:47:38", "remaining_time": "0:53:44"}
|
||||
{"current_steps": 3125, "total_steps": 3703, "loss": 0.1869, "lr": 2.906927720310884e-06, "epoch": 5.9073724007561434, "percentage": 84.39, "elapsed_time": "4:48:17", "remaining_time": "0:53:19"}
|
||||
{"current_steps": 3130, "total_steps": 3703, "loss": 0.2249, "lr": 2.8581649451579083e-06, "epoch": 5.916824196597354, "percentage": 84.53, "elapsed_time": "4:48:39", "remaining_time": "0:52:50"}
|
||||
{"current_steps": 3135, "total_steps": 3703, "loss": 0.2149, "lr": 2.809783136055895e-06, "epoch": 5.926275992438564, "percentage": 84.66, "elapsed_time": "4:49:14", "remaining_time": "0:52:24"}
|
||||
{"current_steps": 3140, "total_steps": 3703, "loss": 0.2053, "lr": 2.761783368258852e-06, "epoch": 5.935727788279773, "percentage": 84.8, "elapsed_time": "4:49:45", "remaining_time": "0:51:57"}
|
||||
{"current_steps": 3145, "total_steps": 3703, "loss": 0.2139, "lr": 2.7141667085301593e-06, "epoch": 5.945179584120983, "percentage": 84.93, "elapsed_time": "4:50:10", "remaining_time": "0:51:29"}
|
||||
{"current_steps": 3150, "total_steps": 3703, "loss": 0.1741, "lr": 2.6669342151188704e-06, "epoch": 5.954631379962192, "percentage": 85.07, "elapsed_time": "4:50:39", "remaining_time": "0:51:01"}
|
||||
{"current_steps": 3155, "total_steps": 3703, "loss": 0.2011, "lr": 2.62008693773621e-06, "epoch": 5.9640831758034025, "percentage": 85.2, "elapsed_time": "4:51:14", "remaining_time": "0:50:35"}
|
||||
{"current_steps": 3160, "total_steps": 3703, "loss": 0.1774, "lr": 2.573625917532212e-06, "epoch": 5.973534971644613, "percentage": 85.34, "elapsed_time": "4:51:41", "remaining_time": "0:50:07"}
|
||||
{"current_steps": 3165, "total_steps": 3703, "loss": 0.2152, "lr": 2.5275521870726107e-06, "epoch": 5.982986767485822, "percentage": 85.47, "elapsed_time": "4:52:11", "remaining_time": "0:49:40"}
|
||||
{"current_steps": 3170, "total_steps": 3703, "loss": 0.1937, "lr": 2.481866770315866e-06, "epoch": 5.992438563327032, "percentage": 85.61, "elapsed_time": "4:52:46", "remaining_time": "0:49:13"}
|
||||
{"current_steps": 3175, "total_steps": 3703, "loss": 0.2162, "lr": 2.4365706825904335e-06, "epoch": 6.001890359168242, "percentage": 85.74, "elapsed_time": "4:53:13", "remaining_time": "0:48:45"}
|
||||
{"current_steps": 3180, "total_steps": 3703, "loss": 0.2062, "lr": 2.391664930572175e-06, "epoch": 6.0113421550094515, "percentage": 85.88, "elapsed_time": "4:53:43", "remaining_time": "0:48:18"}
|
||||
{"current_steps": 3185, "total_steps": 3703, "loss": 0.1819, "lr": 2.347150512262002e-06, "epoch": 6.020793950850662, "percentage": 86.01, "elapsed_time": "4:54:08", "remaining_time": "0:47:50"}
|
||||
{"current_steps": 3190, "total_steps": 3703, "loss": 0.1729, "lr": 2.303028416963693e-06, "epoch": 6.030245746691872, "percentage": 86.15, "elapsed_time": "4:54:32", "remaining_time": "0:47:22"}
|
||||
{"current_steps": 3195, "total_steps": 3703, "loss": 0.2157, "lr": 2.259299625261906e-06, "epoch": 6.039697542533081, "percentage": 86.28, "elapsed_time": "4:55:01", "remaining_time": "0:46:54"}
|
||||
{"current_steps": 3200, "total_steps": 3703, "loss": 0.2041, "lr": 2.2159651090003774e-06, "epoch": 6.049149338374291, "percentage": 86.42, "elapsed_time": "4:55:28", "remaining_time": "0:46:26"}
|
||||
{"current_steps": 3205, "total_steps": 3703, "loss": 0.2312, "lr": 2.173025831260336e-06, "epoch": 6.058601134215501, "percentage": 86.55, "elapsed_time": "4:55:59", "remaining_time": "0:45:59"}
|
||||
{"current_steps": 3210, "total_steps": 3703, "loss": 0.1819, "lr": 2.1304827463390843e-06, "epoch": 6.0680529300567105, "percentage": 86.69, "elapsed_time": "4:56:23", "remaining_time": "0:45:31"}
|
||||
{"current_steps": 3215, "total_steps": 3703, "loss": 0.207, "lr": 2.088336799728814e-06, "epoch": 6.077504725897921, "percentage": 86.82, "elapsed_time": "4:56:51", "remaining_time": "0:45:03"}
|
||||
{"current_steps": 3220, "total_steps": 3703, "loss": 0.2301, "lr": 2.046588928095563e-06, "epoch": 6.086956521739131, "percentage": 86.96, "elapsed_time": "4:57:27", "remaining_time": "0:44:37"}
|
||||
{"current_steps": 3225, "total_steps": 3703, "loss": 0.2109, "lr": 2.005240059258431e-06, "epoch": 6.09640831758034, "percentage": 87.09, "elapsed_time": "4:57:57", "remaining_time": "0:44:09"}
|
||||
{"current_steps": 3230, "total_steps": 3703, "loss": 0.1963, "lr": 1.9642911121689233e-06, "epoch": 6.10586011342155, "percentage": 87.23, "elapsed_time": "4:58:31", "remaining_time": "0:43:42"}
|
||||
{"current_steps": 3235, "total_steps": 3703, "loss": 0.2134, "lr": 1.9237429968905586e-06, "epoch": 6.11531190926276, "percentage": 87.36, "elapsed_time": "4:58:57", "remaining_time": "0:43:15"}
|
||||
{"current_steps": 3240, "total_steps": 3703, "loss": 0.2417, "lr": 1.8835966145786222e-06, "epoch": 6.12476370510397, "percentage": 87.5, "elapsed_time": "4:59:26", "remaining_time": "0:42:47"}
|
||||
{"current_steps": 3245, "total_steps": 3703, "loss": 0.1874, "lr": 1.84385285746016e-06, "epoch": 6.13421550094518, "percentage": 87.63, "elapsed_time": "4:59:54", "remaining_time": "0:42:19"}
|
||||
{"current_steps": 3250, "total_steps": 3703, "loss": 0.1764, "lr": 1.8045126088141262e-06, "epoch": 6.143667296786389, "percentage": 87.77, "elapsed_time": "5:00:26", "remaining_time": "0:41:52"}
|
||||
{"current_steps": 3255, "total_steps": 3703, "loss": 0.201, "lr": 1.7655767429517645e-06, "epoch": 6.153119092627599, "percentage": 87.9, "elapsed_time": "5:00:55", "remaining_time": "0:41:25"}
|
||||
{"current_steps": 3260, "total_steps": 3703, "loss": 0.1771, "lr": 1.727046125197185e-06, "epoch": 6.162570888468809, "percentage": 88.04, "elapsed_time": "5:01:12", "remaining_time": "0:40:55"}
|
||||
{"current_steps": 3265, "total_steps": 3703, "loss": 0.1707, "lr": 1.6889216118681107e-06, "epoch": 6.1720226843100185, "percentage": 88.17, "elapsed_time": "5:01:48", "remaining_time": "0:40:29"}
|
||||
{"current_steps": 3270, "total_steps": 3703, "loss": 0.1704, "lr": 1.6512040502568761e-06, "epoch": 6.181474480151229, "percentage": 88.31, "elapsed_time": "5:02:13", "remaining_time": "0:40:01"}
|
||||
{"current_steps": 3275, "total_steps": 3703, "loss": 0.198, "lr": 1.6138942786115653e-06, "epoch": 6.190926275992439, "percentage": 88.44, "elapsed_time": "5:02:36", "remaining_time": "0:39:32"}
|
||||
{"current_steps": 3280, "total_steps": 3703, "loss": 0.199, "lr": 1.5769931261174055e-06, "epoch": 6.200378071833648, "percentage": 88.58, "elapsed_time": "5:03:02", "remaining_time": "0:39:04"}
|
||||
{"current_steps": 3285, "total_steps": 3703, "loss": 0.2022, "lr": 1.5405014128783236e-06, "epoch": 6.209829867674858, "percentage": 88.71, "elapsed_time": "5:03:36", "remaining_time": "0:38:37"}
|
||||
{"current_steps": 3290, "total_steps": 3703, "loss": 0.2003, "lr": 1.5044199498987456e-06, "epoch": 6.219281663516068, "percentage": 88.85, "elapsed_time": "5:04:05", "remaining_time": "0:38:10"}
|
||||
{"current_steps": 3295, "total_steps": 3703, "loss": 0.2047, "lr": 1.4687495390655282e-06, "epoch": 6.228733459357278, "percentage": 88.98, "elapsed_time": "5:04:39", "remaining_time": "0:37:43"}
|
||||
{"current_steps": 3300, "total_steps": 3703, "loss": 0.186, "lr": 1.4334909731301893e-06, "epoch": 6.238185255198488, "percentage": 89.12, "elapsed_time": "5:05:03", "remaining_time": "0:37:15"}
|
||||
{"current_steps": 3305, "total_steps": 3703, "loss": 0.193, "lr": 1.398645035691244e-06, "epoch": 6.247637051039698, "percentage": 89.25, "elapsed_time": "5:05:30", "remaining_time": "0:36:47"}
|
||||
{"current_steps": 3310, "total_steps": 3703, "loss": 0.2011, "lr": 1.3642125011768204e-06, "epoch": 6.257088846880907, "percentage": 89.39, "elapsed_time": "5:06:03", "remaining_time": "0:36:20"}
|
||||
{"current_steps": 3315, "total_steps": 3703, "loss": 0.199, "lr": 1.3301941348274316e-06, "epoch": 6.266540642722117, "percentage": 89.52, "elapsed_time": "5:06:31", "remaining_time": "0:35:52"}
|
||||
{"current_steps": 3320, "total_steps": 3703, "loss": 0.1829, "lr": 1.2965906926789807e-06, "epoch": 6.2759924385633274, "percentage": 89.66, "elapsed_time": "5:06:58", "remaining_time": "0:35:24"}
|
||||
{"current_steps": 3325, "total_steps": 3703, "loss": 0.1884, "lr": 1.2634029215459442e-06, "epoch": 6.285444234404537, "percentage": 89.79, "elapsed_time": "5:07:21", "remaining_time": "0:34:56"}
|
||||
{"current_steps": 3330, "total_steps": 3703, "loss": 0.2037, "lr": 1.2306315590047912e-06, "epoch": 6.294896030245747, "percentage": 89.93, "elapsed_time": "5:07:43", "remaining_time": "0:34:28"}
|
||||
{"current_steps": 3335, "total_steps": 3703, "loss": 0.1789, "lr": 1.1982773333775822e-06, "epoch": 6.304347826086957, "percentage": 90.06, "elapsed_time": "5:08:11", "remaining_time": "0:34:00"}
|
||||
{"current_steps": 3340, "total_steps": 3703, "loss": 0.1941, "lr": 1.1663409637157685e-06, "epoch": 6.313799621928166, "percentage": 90.2, "elapsed_time": "5:08:38", "remaining_time": "0:33:32"}
|
||||
{"current_steps": 3345, "total_steps": 3703, "loss": 0.2005, "lr": 1.1348231597842508e-06, "epoch": 6.323251417769376, "percentage": 90.33, "elapsed_time": "5:09:09", "remaining_time": "0:33:05"}
|
||||
{"current_steps": 3350, "total_steps": 3703, "loss": 0.1719, "lr": 1.1037246220455611e-06, "epoch": 6.332703213610586, "percentage": 90.47, "elapsed_time": "5:09:34", "remaining_time": "0:32:37"}
|
||||
{"current_steps": 3355, "total_steps": 3703, "loss": 0.2091, "lr": 1.0730460416443233e-06, "epoch": 6.342155009451796, "percentage": 90.6, "elapsed_time": "5:10:05", "remaining_time": "0:32:09"}
|
||||
{"current_steps": 3360, "total_steps": 3703, "loss": 0.2045, "lr": 1.0427881003918783e-06, "epoch": 6.351606805293006, "percentage": 90.74, "elapsed_time": "5:10:35", "remaining_time": "0:31:42"}
|
||||
{"current_steps": 3365, "total_steps": 3703, "loss": 0.2033, "lr": 1.012951470751149e-06, "epoch": 6.361058601134215, "percentage": 90.87, "elapsed_time": "5:11:10", "remaining_time": "0:31:15"}
|
||||
{"current_steps": 3370, "total_steps": 3703, "loss": 0.1724, "lr": 9.835368158216707e-07, "epoch": 6.370510396975425, "percentage": 91.01, "elapsed_time": "5:11:33", "remaining_time": "0:30:47"}
|
||||
{"current_steps": 3375, "total_steps": 3703, "loss": 0.1959, "lr": 9.545447893248827e-07, "epoch": 6.3799621928166355, "percentage": 91.14, "elapsed_time": "5:12:01", "remaining_time": "0:30:19"}
|
||||
{"current_steps": 3380, "total_steps": 3703, "loss": 0.1749, "lr": 9.259760355895664e-07, "epoch": 6.389413988657845, "percentage": 91.28, "elapsed_time": "5:12:30", "remaining_time": "0:29:51"}
|
||||
{"current_steps": 3385, "total_steps": 3703, "loss": 0.1825, "lr": 8.978311895375569e-07, "epoch": 6.398865784499055, "percentage": 91.41, "elapsed_time": "5:12:49", "remaining_time": "0:29:23"}
|
||||
{"current_steps": 3390, "total_steps": 3703, "loss": 0.1756, "lr": 8.701108766696098e-07, "epoch": 6.408317580340265, "percentage": 91.55, "elapsed_time": "5:13:12", "remaining_time": "0:28:55"}
|
||||
{"current_steps": 3395, "total_steps": 3703, "loss": 0.1866, "lr": 8.428157130515169e-07, "epoch": 6.417769376181474, "percentage": 91.68, "elapsed_time": "5:13:28", "remaining_time": "0:28:26"}
|
||||
{"current_steps": 3400, "total_steps": 3703, "loss": 0.1952, "lr": 8.159463053004058e-07, "epoch": 6.427221172022684, "percentage": 91.82, "elapsed_time": "5:13:48", "remaining_time": "0:27:57"}
|
||||
{"current_steps": 3405, "total_steps": 3703, "loss": 0.1847, "lr": 7.89503250571253e-07, "epoch": 6.4366729678638945, "percentage": 91.95, "elapsed_time": "5:14:14", "remaining_time": "0:27:30"}
|
||||
{"current_steps": 3410, "total_steps": 3703, "loss": 0.2057, "lr": 7.634871365436192e-07, "epoch": 6.446124763705104, "percentage": 92.09, "elapsed_time": "5:14:43", "remaining_time": "0:27:02"}
|
||||
{"current_steps": 3415, "total_steps": 3703, "loss": 0.212, "lr": 7.378985414085949e-07, "epoch": 6.455576559546314, "percentage": 92.22, "elapsed_time": "5:15:11", "remaining_time": "0:26:34"}
|
||||
{"current_steps": 3420, "total_steps": 3703, "loss": 0.1792, "lr": 7.127380338559331e-07, "epoch": 6.465028355387524, "percentage": 92.36, "elapsed_time": "5:15:31", "remaining_time": "0:26:06"}
|
||||
{"current_steps": 3425, "total_steps": 3703, "loss": 0.2054, "lr": 6.880061730614307e-07, "epoch": 6.474480151228733, "percentage": 92.49, "elapsed_time": "5:15:54", "remaining_time": "0:25:38"}
|
||||
{"current_steps": 3430, "total_steps": 3703, "loss": 0.2489, "lr": 6.637035086744825e-07, "epoch": 6.4839319470699435, "percentage": 92.63, "elapsed_time": "5:16:30", "remaining_time": "0:25:11"}
|
||||
{"current_steps": 3435, "total_steps": 3703, "loss": 0.1857, "lr": 6.398305808058869e-07, "epoch": 6.493383742911153, "percentage": 92.76, "elapsed_time": "5:16:56", "remaining_time": "0:24:43"}
|
||||
{"current_steps": 3440, "total_steps": 3703, "loss": 0.1938, "lr": 6.163879200158151e-07, "epoch": 6.502835538752363, "percentage": 92.9, "elapsed_time": "5:17:22", "remaining_time": "0:24:15"}
|
||||
{"current_steps": 3445, "total_steps": 3703, "loss": 0.1962, "lr": 5.933760473020411e-07, "epoch": 6.512287334593573, "percentage": 93.03, "elapsed_time": "5:17:49", "remaining_time": "0:23:48"}
|
||||
{"current_steps": 3450, "total_steps": 3703, "loss": 0.1961, "lr": 5.707954740883592e-07, "epoch": 6.521739130434782, "percentage": 93.17, "elapsed_time": "5:18:21", "remaining_time": "0:23:20"}
|
||||
{"current_steps": 3455, "total_steps": 3703, "loss": 0.2526, "lr": 5.486467022132114e-07, "epoch": 6.531190926275992, "percentage": 93.3, "elapsed_time": "5:18:49", "remaining_time": "0:22:53"}
|
||||
{"current_steps": 3460, "total_steps": 3703, "loss": 0.2007, "lr": 5.269302239185359e-07, "epoch": 6.5406427221172025, "percentage": 93.44, "elapsed_time": "5:19:16", "remaining_time": "0:22:25"}
|
||||
{"current_steps": 3465, "total_steps": 3703, "loss": 0.1993, "lr": 5.056465218388363e-07, "epoch": 6.550094517958412, "percentage": 93.57, "elapsed_time": "5:19:50", "remaining_time": "0:21:58"}
|
||||
{"current_steps": 3470, "total_steps": 3703, "loss": 0.172, "lr": 4.847960689904385e-07, "epoch": 6.559546313799622, "percentage": 93.71, "elapsed_time": "5:20:19", "remaining_time": "0:21:30"}
|
||||
{"current_steps": 3475, "total_steps": 3703, "loss": 0.1983, "lr": 4.6437932876099767e-07, "epoch": 6.568998109640832, "percentage": 93.84, "elapsed_time": "5:20:42", "remaining_time": "0:21:02"}
|
||||
{"current_steps": 3480, "total_steps": 3703, "loss": 0.1982, "lr": 4.443967548991857e-07, "epoch": 6.578449905482041, "percentage": 93.98, "elapsed_time": "5:21:07", "remaining_time": "0:20:34"}
|
||||
{"current_steps": 3485, "total_steps": 3703, "loss": 0.1812, "lr": 4.2484879150461067e-07, "epoch": 6.5879017013232515, "percentage": 94.11, "elapsed_time": "5:21:33", "remaining_time": "0:20:06"}
|
||||
{"current_steps": 3490, "total_steps": 3703, "loss": 0.2016, "lr": 4.0573587301794947e-07, "epoch": 6.597353497164462, "percentage": 94.25, "elapsed_time": "5:21:58", "remaining_time": "0:19:39"}
|
||||
{"current_steps": 3495, "total_steps": 3703, "loss": 0.1924, "lr": 3.870584242112885e-07, "epoch": 6.606805293005671, "percentage": 94.38, "elapsed_time": "5:22:27", "remaining_time": "0:19:11"}
|
||||
{"current_steps": 3500, "total_steps": 3703, "loss": 0.2068, "lr": 3.688168601786912e-07, "epoch": 6.616257088846881, "percentage": 94.52, "elapsed_time": "5:22:54", "remaining_time": "0:18:43"}
|
||||
{"current_steps": 3505, "total_steps": 3703, "loss": 0.2, "lr": 3.5101158632696584e-07, "epoch": 6.625708884688091, "percentage": 94.65, "elapsed_time": "5:23:24", "remaining_time": "0:18:16"}
|
||||
{"current_steps": 3510, "total_steps": 3703, "loss": 0.1786, "lr": 3.336429983666545e-07, "epoch": 6.6351606805293, "percentage": 94.79, "elapsed_time": "5:23:52", "remaining_time": "0:17:48"}
|
||||
{"current_steps": 3515, "total_steps": 3703, "loss": 0.203, "lr": 3.1671148230324246e-07, "epoch": 6.644612476370511, "percentage": 94.92, "elapsed_time": "5:24:25", "remaining_time": "0:17:21"}
|
||||
{"current_steps": 3520, "total_steps": 3703, "loss": 0.2049, "lr": 3.0021741442857634e-07, "epoch": 6.65406427221172, "percentage": 95.06, "elapsed_time": "5:24:46", "remaining_time": "0:16:53"}
|
||||
{"current_steps": 3525, "total_steps": 3703, "loss": 0.1949, "lr": 2.8416116131250836e-07, "epoch": 6.66351606805293, "percentage": 95.19, "elapsed_time": "5:25:10", "remaining_time": "0:16:25"}
|
||||
{"current_steps": 3530, "total_steps": 3703, "loss": 0.194, "lr": 2.6854307979474306e-07, "epoch": 6.67296786389414, "percentage": 95.33, "elapsed_time": "5:25:36", "remaining_time": "0:15:57"}
|
||||
{"current_steps": 3535, "total_steps": 3703, "loss": 0.2009, "lr": 2.5336351697690553e-07, "epoch": 6.682419659735349, "percentage": 95.46, "elapsed_time": "5:25:55", "remaining_time": "0:15:29"}
|
||||
{"current_steps": 3540, "total_steps": 3703, "loss": 0.2145, "lr": 2.386228102148347e-07, "epoch": 6.6918714555765595, "percentage": 95.6, "elapsed_time": "5:26:28", "remaining_time": "0:15:01"}
|
||||
{"current_steps": 3545, "total_steps": 3703, "loss": 0.2111, "lr": 2.2432128711107558e-07, "epoch": 6.70132325141777, "percentage": 95.73, "elapsed_time": "5:26:55", "remaining_time": "0:14:34"}
|
||||
{"current_steps": 3550, "total_steps": 3703, "loss": 0.1821, "lr": 2.1045926550760988e-07, "epoch": 6.710775047258979, "percentage": 95.87, "elapsed_time": "5:27:21", "remaining_time": "0:14:06"}
|
||||
{"current_steps": 3555, "total_steps": 3703, "loss": 0.2006, "lr": 1.9703705347878355e-07, "epoch": 6.720226843100189, "percentage": 96.0, "elapsed_time": "5:27:50", "remaining_time": "0:13:38"}
|
||||
{"current_steps": 3560, "total_steps": 3703, "loss": 0.1716, "lr": 1.8405494932446366e-07, "epoch": 6.729678638941399, "percentage": 96.14, "elapsed_time": "5:28:18", "remaining_time": "0:13:11"}
|
||||
{"current_steps": 3565, "total_steps": 3703, "loss": 0.1953, "lr": 1.7151324156340355e-07, "epoch": 6.739130434782608, "percentage": 96.27, "elapsed_time": "5:28:53", "remaining_time": "0:12:43"}
|
||||
{"current_steps": 3570, "total_steps": 3703, "loss": 0.1904, "lr": 1.5941220892684572e-07, "epoch": 6.748582230623819, "percentage": 96.41, "elapsed_time": "5:29:17", "remaining_time": "0:12:16"}
|
||||
{"current_steps": 3575, "total_steps": 3703, "loss": 0.2066, "lr": 1.4775212035230691e-07, "epoch": 6.758034026465029, "percentage": 96.54, "elapsed_time": "5:29:45", "remaining_time": "0:11:48"}
|
||||
{"current_steps": 3580, "total_steps": 3703, "loss": 0.213, "lr": 1.3653323497761607e-07, "epoch": 6.767485822306238, "percentage": 96.68, "elapsed_time": "5:30:11", "remaining_time": "0:11:20"}
|
||||
{"current_steps": 3585, "total_steps": 3703, "loss": 0.2037, "lr": 1.2575580213514792e-07, "epoch": 6.776937618147448, "percentage": 96.81, "elapsed_time": "5:30:37", "remaining_time": "0:10:52"}
|
||||
{"current_steps": 3590, "total_steps": 3703, "loss": 0.2003, "lr": 1.1542006134628747e-07, "epoch": 6.786389413988658, "percentage": 96.95, "elapsed_time": "5:31:11", "remaining_time": "0:10:25"}
|
||||
{"current_steps": 3595, "total_steps": 3703, "loss": 0.2131, "lr": 1.0552624231609632e-07, "epoch": 6.7958412098298675, "percentage": 97.08, "elapsed_time": "5:31:40", "remaining_time": "0:09:57"}
|
||||
{"current_steps": 3600, "total_steps": 3703, "loss": 0.1793, "lr": 9.607456492822132e-08, "epoch": 6.805293005671078, "percentage": 97.22, "elapsed_time": "5:32:06", "remaining_time": "0:09:30"}
|
||||
{"current_steps": 3605, "total_steps": 3703, "loss": 0.1771, "lr": 8.706523924000066e-08, "epoch": 6.814744801512287, "percentage": 97.35, "elapsed_time": "5:32:26", "remaining_time": "0:09:02"}
|
||||
{"current_steps": 3610, "total_steps": 3703, "loss": 0.1948, "lr": 7.849846547778983e-08, "epoch": 6.824196597353497, "percentage": 97.49, "elapsed_time": "5:33:00", "remaining_time": "0:08:34"}
|
||||
{"current_steps": 3615, "total_steps": 3703, "loss": 0.1927, "lr": 7.03744340325252e-08, "epoch": 6.833648393194707, "percentage": 97.62, "elapsed_time": "5:33:23", "remaining_time": "0:08:06"}
|
||||
{"current_steps": 3620, "total_steps": 3703, "loss": 0.1932, "lr": 6.269332545548068e-08, "epoch": 6.843100189035916, "percentage": 97.76, "elapsed_time": "5:33:44", "remaining_time": "0:07:39"}
|
||||
{"current_steps": 3625, "total_steps": 3703, "loss": 0.1783, "lr": 5.5455310454259894e-08, "epoch": 6.852551984877127, "percentage": 97.89, "elapsed_time": "5:34:05", "remaining_time": "0:07:11"}
|
||||
{"current_steps": 3630, "total_steps": 3703, "loss": 0.183, "lr": 4.866054988900581e-08, "epoch": 6.862003780718337, "percentage": 98.03, "elapsed_time": "5:34:23", "remaining_time": "0:06:43"}
|
||||
{"current_steps": 3635, "total_steps": 3703, "loss": 0.2169, "lr": 4.230919476881479e-08, "epoch": 6.871455576559546, "percentage": 98.16, "elapsed_time": "5:34:56", "remaining_time": "0:06:15"}
|
||||
{"current_steps": 3640, "total_steps": 3703, "loss": 0.1669, "lr": 3.640138624839695e-08, "epoch": 6.880907372400756, "percentage": 98.3, "elapsed_time": "5:35:18", "remaining_time": "0:05:48"}
|
||||
{"current_steps": 3645, "total_steps": 3703, "loss": 0.2078, "lr": 3.093725562492544e-08, "epoch": 6.890359168241966, "percentage": 98.43, "elapsed_time": "5:35:44", "remaining_time": "0:05:20"}
|
||||
{"current_steps": 3650, "total_steps": 3703, "loss": 0.1712, "lr": 2.591692433511872e-08, "epoch": 6.8998109640831755, "percentage": 98.57, "elapsed_time": "5:36:10", "remaining_time": "0:04:52"}
|
||||
{"current_steps": 3655, "total_steps": 3703, "loss": 0.1968, "lr": 2.1340503952551606e-08, "epoch": 6.909262759924386, "percentage": 98.7, "elapsed_time": "5:36:32", "remaining_time": "0:04:25"}
|
||||
{"current_steps": 3660, "total_steps": 3703, "loss": 0.2057, "lr": 1.720809618516839e-08, "epoch": 6.918714555765596, "percentage": 98.84, "elapsed_time": "5:37:07", "remaining_time": "0:03:57"}
|
||||
{"current_steps": 3665, "total_steps": 3703, "loss": 0.1934, "lr": 1.351979287302463e-08, "epoch": 6.928166351606805, "percentage": 98.97, "elapsed_time": "5:37:38", "remaining_time": "0:03:30"}
|
||||
{"current_steps": 3670, "total_steps": 3703, "loss": 0.2031, "lr": 1.0275675986242128e-08, "epoch": 6.937618147448015, "percentage": 99.11, "elapsed_time": "5:38:02", "remaining_time": "0:03:02"}
|
||||
{"current_steps": 3675, "total_steps": 3703, "loss": 0.2145, "lr": 7.475817623194826e-09, "epoch": 6.947069943289225, "percentage": 99.24, "elapsed_time": "5:38:24", "remaining_time": "0:02:34"}
|
||||
{"current_steps": 3680, "total_steps": 3703, "loss": 0.1931, "lr": 5.120280008901191e-09, "epoch": 6.956521739130435, "percentage": 99.38, "elapsed_time": "5:38:50", "remaining_time": "0:02:07"}
|
||||
{"current_steps": 3685, "total_steps": 3703, "loss": 0.1851, "lr": 3.2091154936386705e-09, "epoch": 6.965973534971645, "percentage": 99.51, "elapsed_time": "5:39:21", "remaining_time": "0:01:39"}
|
||||
{"current_steps": 3690, "total_steps": 3703, "loss": 0.1806, "lr": 1.7423665517868338e-09, "epoch": 6.975425330812854, "percentage": 99.65, "elapsed_time": "5:39:44", "remaining_time": "0:01:11"}
|
||||
{"current_steps": 3695, "total_steps": 3703, "loss": 0.2002, "lr": 7.200657808792422e-10, "epoch": 6.984877126654064, "percentage": 99.78, "elapsed_time": "5:40:15", "remaining_time": "0:00:44"}
|
||||
{"current_steps": 3700, "total_steps": 3703, "loss": 0.1867, "lr": 1.4223590088180416e-10, "epoch": 6.994328922495274, "percentage": 99.92, "elapsed_time": "5:40:42", "remaining_time": "0:00:16"}
|
||||
{"current_steps": 3703, "total_steps": 3703, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "5:41:03", "remaining_time": "0:00:00"}
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 47 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user