初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/a1-swesmith Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: other
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: sft_a1_swesmith__Qwen3-8B
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# sft_a1_swesmith__Qwen3-8B
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--swesmith-sandboxes-with_tests-gpt-5-mini-passed_glm_4.7_traces/snapshots/b9b0e0d113e9c37dd035f03644315478acc04487_thinking_preprocessed dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 128
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.1+cu130
|
||||
- Datasets 4.7.0
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.004657515168383943,
|
||||
"achieved_tflops_per_gpu_theoretical": 687.7446262531031,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.14940977096557617,
|
||||
"mfu_percent": 0.0003291530154334942,
|
||||
"mfu_percent_theoretical": 48.603860512586785,
|
||||
"total_flos": 1139548609970176.0,
|
||||
"train_loss": 0.2251253145215901,
|
||||
"train_runtime": 15291.7995,
|
||||
"train_samples_per_second": 3.246,
|
||||
"train_steps_per_second": 0.203,
|
||||
"valid_targets_mean": 3384.2,
|
||||
"valid_targets_min": 497
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ef48e0f1872f32bd625d5d8215121aefac8c2291f15468c5c6ec84f2e921f067
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:60d95f9e6294725eeb81d9a2c109f3e010cb6776782b9e86b51ff44d9fb18adc
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4c6ac8212c50dfc042902998573c02cb1138fab1cdcbb2a37398de405058d59a
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c0c45cb28b81a9a95f0a2c75d4dd6f698e1b75c39247b9dcea5de6c9c4c10f61
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "b9b0e0d113e9c37dd035f03644315478acc04487_thinking_preprocessed",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "raoof1",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--swesmith-sandboxes-with_tests-gpt-5-mini-passed_glm_4.7_traces/snapshots/b9b0e0d113e9c37dd035f03644315478acc04487_thinking_preprocessed",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/DCAgent/a1-swesmith/blob/main/config.json",
|
||||
"wandb_link": null,
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
||||
size 11422654
|
||||
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.004657515168383943,
|
||||
"achieved_tflops_per_gpu_theoretical": 687.7446262531031,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.14940977096557617,
|
||||
"mfu_percent": 0.0003291530154334942,
|
||||
"mfu_percent_theoretical": 48.603860512586785,
|
||||
"total_flos": 1139548609970176.0,
|
||||
"train_loss": 0.2251253145215901,
|
||||
"train_runtime": 15291.7995,
|
||||
"train_samples_per_second": 3.246,
|
||||
"train_steps_per_second": 0.203,
|
||||
"valid_targets_mean": 3384.2,
|
||||
"valid_targets_min": 497
|
||||
}
|
||||
622
trainer_log.jsonl
Normal file
622
trainer_log.jsonl
Normal file
@@ -0,0 +1,622 @@
|
||||
{"current_steps": 5, "total_steps": 3108, "loss": 0.8639, "lr": 5.144694533762058e-07, "epoch": 0.01126126126126126, "percentage": 0.16, "elapsed_time": "0:00:32", "remaining_time": "5:40:02"}
|
||||
{"current_steps": 10, "total_steps": 3108, "loss": 0.912, "lr": 1.157556270096463e-06, "epoch": 0.02252252252252252, "percentage": 0.32, "elapsed_time": "0:00:57", "remaining_time": "4:57:50"}
|
||||
{"current_steps": 15, "total_steps": 3108, "loss": 0.8262, "lr": 1.8006430868167204e-06, "epoch": 0.033783783783783786, "percentage": 0.48, "elapsed_time": "0:01:24", "remaining_time": "4:51:56"}
|
||||
{"current_steps": 20, "total_steps": 3108, "loss": 0.7415, "lr": 2.4437299035369775e-06, "epoch": 0.04504504504504504, "percentage": 0.64, "elapsed_time": "0:01:53", "remaining_time": "4:53:17"}
|
||||
{"current_steps": 25, "total_steps": 3108, "loss": 0.6473, "lr": 3.0868167202572353e-06, "epoch": 0.05630630630630631, "percentage": 0.8, "elapsed_time": "0:02:19", "remaining_time": "4:46:06"}
|
||||
{"current_steps": 30, "total_steps": 3108, "loss": 0.5883, "lr": 3.7299035369774923e-06, "epoch": 0.06756756756756757, "percentage": 0.97, "elapsed_time": "0:02:47", "remaining_time": "4:47:11"}
|
||||
{"current_steps": 35, "total_steps": 3108, "loss": 0.5426, "lr": 4.37299035369775e-06, "epoch": 0.07882882882882883, "percentage": 1.13, "elapsed_time": "0:03:11", "remaining_time": "4:40:55"}
|
||||
{"current_steps": 40, "total_steps": 3108, "loss": 0.5092, "lr": 5.016077170418007e-06, "epoch": 0.09009009009009009, "percentage": 1.29, "elapsed_time": "0:03:35", "remaining_time": "4:36:02"}
|
||||
{"current_steps": 45, "total_steps": 3108, "loss": 0.4986, "lr": 5.6591639871382644e-06, "epoch": 0.10135135135135136, "percentage": 1.45, "elapsed_time": "0:04:02", "remaining_time": "4:34:55"}
|
||||
{"current_steps": 50, "total_steps": 3108, "loss": 0.4858, "lr": 6.302250803858521e-06, "epoch": 0.11261261261261261, "percentage": 1.61, "elapsed_time": "0:04:26", "remaining_time": "4:31:59"}
|
||||
{"current_steps": 55, "total_steps": 3108, "loss": 0.4593, "lr": 6.945337620578779e-06, "epoch": 0.12387387387387387, "percentage": 1.77, "elapsed_time": "0:04:49", "remaining_time": "4:28:16"}
|
||||
{"current_steps": 60, "total_steps": 3108, "loss": 0.4559, "lr": 7.588424437299035e-06, "epoch": 0.13513513513513514, "percentage": 1.93, "elapsed_time": "0:05:17", "remaining_time": "4:28:37"}
|
||||
{"current_steps": 65, "total_steps": 3108, "loss": 0.47, "lr": 8.231511254019293e-06, "epoch": 0.1463963963963964, "percentage": 2.09, "elapsed_time": "0:05:43", "remaining_time": "4:28:10"}
|
||||
{"current_steps": 70, "total_steps": 3108, "loss": 0.4082, "lr": 8.874598070739551e-06, "epoch": 0.15765765765765766, "percentage": 2.25, "elapsed_time": "0:06:11", "remaining_time": "4:28:25"}
|
||||
{"current_steps": 75, "total_steps": 3108, "loss": 0.4045, "lr": 9.517684887459809e-06, "epoch": 0.16891891891891891, "percentage": 2.41, "elapsed_time": "0:06:37", "remaining_time": "4:27:51"}
|
||||
{"current_steps": 80, "total_steps": 3108, "loss": 0.4013, "lr": 1.0160771704180067e-05, "epoch": 0.18018018018018017, "percentage": 2.57, "elapsed_time": "0:07:00", "remaining_time": "4:25:23"}
|
||||
{"current_steps": 85, "total_steps": 3108, "loss": 0.3841, "lr": 1.0803858520900323e-05, "epoch": 0.19144144144144143, "percentage": 2.73, "elapsed_time": "0:07:27", "remaining_time": "4:25:22"}
|
||||
{"current_steps": 90, "total_steps": 3108, "loss": 0.3923, "lr": 1.144694533762058e-05, "epoch": 0.20270270270270271, "percentage": 2.9, "elapsed_time": "0:07:53", "remaining_time": "4:24:30"}
|
||||
{"current_steps": 95, "total_steps": 3108, "loss": 0.387, "lr": 1.2090032154340837e-05, "epoch": 0.21396396396396397, "percentage": 3.06, "elapsed_time": "0:08:20", "remaining_time": "4:24:32"}
|
||||
{"current_steps": 100, "total_steps": 3108, "loss": 0.3604, "lr": 1.2733118971061094e-05, "epoch": 0.22522522522522523, "percentage": 3.22, "elapsed_time": "0:08:42", "remaining_time": "4:22:04"}
|
||||
{"current_steps": 105, "total_steps": 3108, "loss": 0.3656, "lr": 1.337620578778135e-05, "epoch": 0.23648648648648649, "percentage": 3.38, "elapsed_time": "0:09:10", "remaining_time": "4:22:29"}
|
||||
{"current_steps": 110, "total_steps": 3108, "loss": 0.3646, "lr": 1.4019292604501608e-05, "epoch": 0.24774774774774774, "percentage": 3.54, "elapsed_time": "0:09:36", "remaining_time": "4:21:54"}
|
||||
{"current_steps": 115, "total_steps": 3108, "loss": 0.3427, "lr": 1.4662379421221866e-05, "epoch": 0.25900900900900903, "percentage": 3.7, "elapsed_time": "0:10:01", "remaining_time": "4:20:56"}
|
||||
{"current_steps": 120, "total_steps": 3108, "loss": 0.3619, "lr": 1.5305466237942124e-05, "epoch": 0.2702702702702703, "percentage": 3.86, "elapsed_time": "0:10:25", "remaining_time": "4:19:37"}
|
||||
{"current_steps": 125, "total_steps": 3108, "loss": 0.3303, "lr": 1.594855305466238e-05, "epoch": 0.28153153153153154, "percentage": 4.02, "elapsed_time": "0:10:51", "remaining_time": "4:18:55"}
|
||||
{"current_steps": 130, "total_steps": 3108, "loss": 0.3314, "lr": 1.659163987138264e-05, "epoch": 0.2927927927927928, "percentage": 4.18, "elapsed_time": "0:11:14", "remaining_time": "4:17:22"}
|
||||
{"current_steps": 135, "total_steps": 3108, "loss": 0.3407, "lr": 1.7234726688102896e-05, "epoch": 0.30405405405405406, "percentage": 4.34, "elapsed_time": "0:11:37", "remaining_time": "4:16:11"}
|
||||
{"current_steps": 140, "total_steps": 3108, "loss": 0.3361, "lr": 1.7877813504823152e-05, "epoch": 0.3153153153153153, "percentage": 4.5, "elapsed_time": "0:12:04", "remaining_time": "4:16:01"}
|
||||
{"current_steps": 145, "total_steps": 3108, "loss": 0.3466, "lr": 1.8520900321543408e-05, "epoch": 0.32657657657657657, "percentage": 4.67, "elapsed_time": "0:12:30", "remaining_time": "4:15:44"}
|
||||
{"current_steps": 150, "total_steps": 3108, "loss": 0.3295, "lr": 1.9163987138263667e-05, "epoch": 0.33783783783783783, "percentage": 4.83, "elapsed_time": "0:12:56", "remaining_time": "4:15:11"}
|
||||
{"current_steps": 155, "total_steps": 3108, "loss": 0.3275, "lr": 1.9807073954983923e-05, "epoch": 0.3490990990990991, "percentage": 4.99, "elapsed_time": "0:13:21", "remaining_time": "4:14:34"}
|
||||
{"current_steps": 160, "total_steps": 3108, "loss": 0.3256, "lr": 2.0450160771704183e-05, "epoch": 0.36036036036036034, "percentage": 5.15, "elapsed_time": "0:13:46", "remaining_time": "4:13:39"}
|
||||
{"current_steps": 165, "total_steps": 3108, "loss": 0.3189, "lr": 2.1093247588424436e-05, "epoch": 0.3716216216216216, "percentage": 5.31, "elapsed_time": "0:14:09", "remaining_time": "4:12:25"}
|
||||
{"current_steps": 170, "total_steps": 3108, "loss": 0.3218, "lr": 2.1736334405144695e-05, "epoch": 0.38288288288288286, "percentage": 5.47, "elapsed_time": "0:14:32", "remaining_time": "4:11:20"}
|
||||
{"current_steps": 175, "total_steps": 3108, "loss": 0.32, "lr": 2.237942122186495e-05, "epoch": 0.39414414414414417, "percentage": 5.63, "elapsed_time": "0:14:57", "remaining_time": "4:10:39"}
|
||||
{"current_steps": 180, "total_steps": 3108, "loss": 0.3174, "lr": 2.302250803858521e-05, "epoch": 0.40540540540540543, "percentage": 5.79, "elapsed_time": "0:15:21", "remaining_time": "4:09:55"}
|
||||
{"current_steps": 185, "total_steps": 3108, "loss": 0.3218, "lr": 2.3665594855305467e-05, "epoch": 0.4166666666666667, "percentage": 5.95, "elapsed_time": "0:15:44", "remaining_time": "4:08:49"}
|
||||
{"current_steps": 190, "total_steps": 3108, "loss": 0.3248, "lr": 2.4308681672025726e-05, "epoch": 0.42792792792792794, "percentage": 6.11, "elapsed_time": "0:16:10", "remaining_time": "4:08:20"}
|
||||
{"current_steps": 195, "total_steps": 3108, "loss": 0.3009, "lr": 2.4951768488745982e-05, "epoch": 0.4391891891891892, "percentage": 6.27, "elapsed_time": "0:16:39", "remaining_time": "4:08:54"}
|
||||
{"current_steps": 200, "total_steps": 3108, "loss": 0.2893, "lr": 2.5594855305466242e-05, "epoch": 0.45045045045045046, "percentage": 6.44, "elapsed_time": "0:17:08", "remaining_time": "4:09:10"}
|
||||
{"current_steps": 205, "total_steps": 3108, "loss": 0.3123, "lr": 2.6237942122186498e-05, "epoch": 0.4617117117117117, "percentage": 6.6, "elapsed_time": "0:17:33", "remaining_time": "4:08:36"}
|
||||
{"current_steps": 210, "total_steps": 3108, "loss": 0.3067, "lr": 2.688102893890675e-05, "epoch": 0.47297297297297297, "percentage": 6.76, "elapsed_time": "0:18:00", "remaining_time": "4:08:25"}
|
||||
{"current_steps": 215, "total_steps": 3108, "loss": 0.3119, "lr": 2.7524115755627014e-05, "epoch": 0.48423423423423423, "percentage": 6.92, "elapsed_time": "0:18:30", "remaining_time": "4:09:03"}
|
||||
{"current_steps": 220, "total_steps": 3108, "loss": 0.2961, "lr": 2.8167202572347266e-05, "epoch": 0.4954954954954955, "percentage": 7.08, "elapsed_time": "0:18:54", "remaining_time": "4:08:18"}
|
||||
{"current_steps": 225, "total_steps": 3108, "loss": 0.2998, "lr": 2.8810289389067526e-05, "epoch": 0.5067567567567568, "percentage": 7.24, "elapsed_time": "0:19:23", "remaining_time": "4:08:24"}
|
||||
{"current_steps": 230, "total_steps": 3108, "loss": 0.3041, "lr": 2.9453376205787782e-05, "epoch": 0.5180180180180181, "percentage": 7.4, "elapsed_time": "0:19:49", "remaining_time": "4:08:04"}
|
||||
{"current_steps": 235, "total_steps": 3108, "loss": 0.3076, "lr": 3.009646302250804e-05, "epoch": 0.5292792792792793, "percentage": 7.56, "elapsed_time": "0:20:15", "remaining_time": "4:07:45"}
|
||||
{"current_steps": 240, "total_steps": 3108, "loss": 0.2969, "lr": 3.07395498392283e-05, "epoch": 0.5405405405405406, "percentage": 7.72, "elapsed_time": "0:20:44", "remaining_time": "4:07:47"}
|
||||
{"current_steps": 245, "total_steps": 3108, "loss": 0.2999, "lr": 3.138263665594856e-05, "epoch": 0.5518018018018018, "percentage": 7.88, "elapsed_time": "0:21:10", "remaining_time": "4:07:31"}
|
||||
{"current_steps": 250, "total_steps": 3108, "loss": 0.2792, "lr": 3.202572347266881e-05, "epoch": 0.5630630630630631, "percentage": 8.04, "elapsed_time": "0:21:36", "remaining_time": "4:07:07"}
|
||||
{"current_steps": 255, "total_steps": 3108, "loss": 0.322, "lr": 3.266881028938907e-05, "epoch": 0.5743243243243243, "percentage": 8.2, "elapsed_time": "0:22:01", "remaining_time": "4:06:26"}
|
||||
{"current_steps": 260, "total_steps": 3108, "loss": 0.2997, "lr": 3.331189710610933e-05, "epoch": 0.5855855855855856, "percentage": 8.37, "elapsed_time": "0:22:23", "remaining_time": "4:05:19"}
|
||||
{"current_steps": 265, "total_steps": 3108, "loss": 0.3058, "lr": 3.3954983922829585e-05, "epoch": 0.5968468468468469, "percentage": 8.53, "elapsed_time": "0:22:50", "remaining_time": "4:04:57"}
|
||||
{"current_steps": 270, "total_steps": 3108, "loss": 0.2892, "lr": 3.459807073954984e-05, "epoch": 0.6081081081081081, "percentage": 8.69, "elapsed_time": "0:23:16", "remaining_time": "4:04:37"}
|
||||
{"current_steps": 275, "total_steps": 3108, "loss": 0.2893, "lr": 3.52411575562701e-05, "epoch": 0.6193693693693694, "percentage": 8.85, "elapsed_time": "0:23:39", "remaining_time": "4:03:44"}
|
||||
{"current_steps": 280, "total_steps": 3108, "loss": 0.299, "lr": 3.588424437299036e-05, "epoch": 0.6306306306306306, "percentage": 9.01, "elapsed_time": "0:24:04", "remaining_time": "4:03:04"}
|
||||
{"current_steps": 285, "total_steps": 3108, "loss": 0.3056, "lr": 3.6527331189710616e-05, "epoch": 0.6418918918918919, "percentage": 9.17, "elapsed_time": "0:24:26", "remaining_time": "4:02:02"}
|
||||
{"current_steps": 290, "total_steps": 3108, "loss": 0.3097, "lr": 3.717041800643087e-05, "epoch": 0.6531531531531531, "percentage": 9.33, "elapsed_time": "0:24:52", "remaining_time": "4:01:38"}
|
||||
{"current_steps": 295, "total_steps": 3108, "loss": 0.3005, "lr": 3.781350482315113e-05, "epoch": 0.6644144144144144, "percentage": 9.49, "elapsed_time": "0:25:19", "remaining_time": "4:01:28"}
|
||||
{"current_steps": 300, "total_steps": 3108, "loss": 0.3046, "lr": 3.8456591639871385e-05, "epoch": 0.6756756756756757, "percentage": 9.65, "elapsed_time": "0:25:44", "remaining_time": "4:00:59"}
|
||||
{"current_steps": 305, "total_steps": 3108, "loss": 0.2973, "lr": 3.909967845659164e-05, "epoch": 0.6869369369369369, "percentage": 9.81, "elapsed_time": "0:26:11", "remaining_time": "4:00:41"}
|
||||
{"current_steps": 310, "total_steps": 3108, "loss": 0.2806, "lr": 3.97427652733119e-05, "epoch": 0.6981981981981982, "percentage": 9.97, "elapsed_time": "0:26:35", "remaining_time": "3:59:59"}
|
||||
{"current_steps": 315, "total_steps": 3108, "loss": 0.2891, "lr": 3.999988645790294e-05, "epoch": 0.7094594594594594, "percentage": 10.14, "elapsed_time": "0:26:59", "remaining_time": "3:59:21"}
|
||||
{"current_steps": 320, "total_steps": 3108, "loss": 0.2763, "lr": 3.999919259420062e-05, "epoch": 0.7207207207207207, "percentage": 10.3, "elapsed_time": "0:27:24", "remaining_time": "3:58:50"}
|
||||
{"current_steps": 325, "total_steps": 3108, "loss": 0.3009, "lr": 3.999786796759633e-05, "epoch": 0.7319819819819819, "percentage": 10.46, "elapsed_time": "0:27:51", "remaining_time": "3:58:32"}
|
||||
{"current_steps": 330, "total_steps": 3108, "loss": 0.2997, "lr": 3.999591261986801e-05, "epoch": 0.7432432432432432, "percentage": 10.62, "elapsed_time": "0:28:15", "remaining_time": "3:57:52"}
|
||||
{"current_steps": 335, "total_steps": 3108, "loss": 0.2997, "lr": 3.999332661268621e-05, "epoch": 0.7545045045045045, "percentage": 10.78, "elapsed_time": "0:28:40", "remaining_time": "3:57:21"}
|
||||
{"current_steps": 340, "total_steps": 3108, "loss": 0.2898, "lr": 3.999011002761208e-05, "epoch": 0.7657657657657657, "percentage": 10.94, "elapsed_time": "0:29:06", "remaining_time": "3:56:59"}
|
||||
{"current_steps": 345, "total_steps": 3108, "loss": 0.2976, "lr": 3.9986262966094836e-05, "epoch": 0.777027027027027, "percentage": 11.1, "elapsed_time": "0:29:29", "remaining_time": "3:56:14"}
|
||||
{"current_steps": 350, "total_steps": 3108, "loss": 0.2885, "lr": 3.998178554946858e-05, "epoch": 0.7882882882882883, "percentage": 11.26, "elapsed_time": "0:29:57", "remaining_time": "3:56:06"}
|
||||
{"current_steps": 355, "total_steps": 3108, "loss": 0.2796, "lr": 3.9976677918948406e-05, "epoch": 0.7995495495495496, "percentage": 11.42, "elapsed_time": "0:30:22", "remaining_time": "3:55:32"}
|
||||
{"current_steps": 360, "total_steps": 3108, "loss": 0.2805, "lr": 3.997094023562602e-05, "epoch": 0.8108108108108109, "percentage": 11.58, "elapsed_time": "0:30:49", "remaining_time": "3:55:17"}
|
||||
{"current_steps": 365, "total_steps": 3108, "loss": 0.2953, "lr": 3.9964572680464627e-05, "epoch": 0.8220720720720721, "percentage": 11.74, "elapsed_time": "0:31:14", "remaining_time": "3:54:50"}
|
||||
{"current_steps": 370, "total_steps": 3108, "loss": 0.2712, "lr": 3.9957575454293196e-05, "epoch": 0.8333333333333334, "percentage": 11.9, "elapsed_time": "0:31:43", "remaining_time": "3:54:49"}
|
||||
{"current_steps": 375, "total_steps": 3108, "loss": 0.2835, "lr": 3.994994877780018e-05, "epoch": 0.8445945945945946, "percentage": 12.07, "elapsed_time": "0:32:10", "remaining_time": "3:54:32"}
|
||||
{"current_steps": 380, "total_steps": 3108, "loss": 0.2882, "lr": 3.994169289152652e-05, "epoch": 0.8558558558558559, "percentage": 12.23, "elapsed_time": "0:32:37", "remaining_time": "3:54:11"}
|
||||
{"current_steps": 385, "total_steps": 3108, "loss": 0.276, "lr": 3.9932808055858055e-05, "epoch": 0.8671171171171171, "percentage": 12.39, "elapsed_time": "0:33:00", "remaining_time": "3:53:29"}
|
||||
{"current_steps": 390, "total_steps": 3108, "loss": 0.2758, "lr": 3.992329455101735e-05, "epoch": 0.8783783783783784, "percentage": 12.55, "elapsed_time": "0:33:24", "remaining_time": "3:52:52"}
|
||||
{"current_steps": 395, "total_steps": 3108, "loss": 0.2787, "lr": 3.9913152677054805e-05, "epoch": 0.8896396396396397, "percentage": 12.71, "elapsed_time": "0:33:52", "remaining_time": "3:52:42"}
|
||||
{"current_steps": 400, "total_steps": 3108, "loss": 0.2808, "lr": 3.990238275383923e-05, "epoch": 0.9009009009009009, "percentage": 12.87, "elapsed_time": "0:34:15", "remaining_time": "3:51:52"}
|
||||
{"current_steps": 405, "total_steps": 3108, "loss": 0.2748, "lr": 3.989098512104773e-05, "epoch": 0.9121621621621622, "percentage": 13.03, "elapsed_time": "0:34:38", "remaining_time": "3:51:11"}
|
||||
{"current_steps": 410, "total_steps": 3108, "loss": 0.2573, "lr": 3.987896013815504e-05, "epoch": 0.9234234234234234, "percentage": 13.19, "elapsed_time": "0:35:01", "remaining_time": "3:50:31"}
|
||||
{"current_steps": 415, "total_steps": 3108, "loss": 0.2691, "lr": 3.9866308184422095e-05, "epoch": 0.9346846846846847, "percentage": 13.35, "elapsed_time": "0:35:30", "remaining_time": "3:50:26"}
|
||||
{"current_steps": 420, "total_steps": 3108, "loss": 0.2741, "lr": 3.985302965888417e-05, "epoch": 0.9459459459459459, "percentage": 13.51, "elapsed_time": "0:35:54", "remaining_time": "3:49:50"}
|
||||
{"current_steps": 425, "total_steps": 3108, "loss": 0.2782, "lr": 3.983912498033823e-05, "epoch": 0.9572072072072072, "percentage": 13.67, "elapsed_time": "0:36:19", "remaining_time": "3:49:16"}
|
||||
{"current_steps": 430, "total_steps": 3108, "loss": 0.2679, "lr": 3.982459458732976e-05, "epoch": 0.9684684684684685, "percentage": 13.84, "elapsed_time": "0:36:42", "remaining_time": "3:48:36"}
|
||||
{"current_steps": 435, "total_steps": 3108, "loss": 0.2671, "lr": 3.9809438938138906e-05, "epoch": 0.9797297297297297, "percentage": 14.0, "elapsed_time": "0:37:06", "remaining_time": "3:48:03"}
|
||||
{"current_steps": 440, "total_steps": 3108, "loss": 0.2874, "lr": 3.9793658510766014e-05, "epoch": 0.990990990990991, "percentage": 14.16, "elapsed_time": "0:37:36", "remaining_time": "3:48:01"}
|
||||
{"current_steps": 445, "total_steps": 3108, "loss": 0.2519, "lr": 3.9777253802916586e-05, "epoch": 1.0022522522522523, "percentage": 14.32, "elapsed_time": "0:38:00", "remaining_time": "3:47:24"}
|
||||
{"current_steps": 450, "total_steps": 3108, "loss": 0.2572, "lr": 3.976022533198558e-05, "epoch": 1.0135135135135136, "percentage": 14.48, "elapsed_time": "0:38:22", "remaining_time": "3:46:42"}
|
||||
{"current_steps": 455, "total_steps": 3108, "loss": 0.2558, "lr": 3.974257363504106e-05, "epoch": 1.0247747747747749, "percentage": 14.64, "elapsed_time": "0:38:49", "remaining_time": "3:46:20"}
|
||||
{"current_steps": 460, "total_steps": 3108, "loss": 0.2675, "lr": 3.9724299268807274e-05, "epoch": 1.0360360360360361, "percentage": 14.8, "elapsed_time": "0:39:12", "remaining_time": "3:45:43"}
|
||||
{"current_steps": 465, "total_steps": 3108, "loss": 0.2616, "lr": 3.9705402809647095e-05, "epoch": 1.0472972972972974, "percentage": 14.96, "elapsed_time": "0:39:38", "remaining_time": "3:45:16"}
|
||||
{"current_steps": 470, "total_steps": 3108, "loss": 0.2591, "lr": 3.9685884853543875e-05, "epoch": 1.0585585585585586, "percentage": 15.12, "elapsed_time": "0:40:02", "remaining_time": "3:44:42"}
|
||||
{"current_steps": 475, "total_steps": 3108, "loss": 0.2802, "lr": 3.966574601608259e-05, "epoch": 1.0698198198198199, "percentage": 15.28, "elapsed_time": "0:40:27", "remaining_time": "3:44:18"}
|
||||
{"current_steps": 480, "total_steps": 3108, "loss": 0.2629, "lr": 3.9644986932430456e-05, "epoch": 1.0810810810810811, "percentage": 15.44, "elapsed_time": "0:40:52", "remaining_time": "3:43:45"}
|
||||
{"current_steps": 485, "total_steps": 3108, "loss": 0.2541, "lr": 3.96236082573169e-05, "epoch": 1.0923423423423424, "percentage": 15.6, "elapsed_time": "0:41:12", "remaining_time": "3:42:50"}
|
||||
{"current_steps": 490, "total_steps": 3108, "loss": 0.259, "lr": 3.9601610665012897e-05, "epoch": 1.1036036036036037, "percentage": 15.77, "elapsed_time": "0:41:35", "remaining_time": "3:42:10"}
|
||||
{"current_steps": 495, "total_steps": 3108, "loss": 0.2618, "lr": 3.957899484930971e-05, "epoch": 1.114864864864865, "percentage": 15.93, "elapsed_time": "0:41:55", "remaining_time": "3:41:19"}
|
||||
{"current_steps": 500, "total_steps": 3108, "loss": 0.2695, "lr": 3.9555761523497015e-05, "epoch": 1.1261261261261262, "percentage": 16.09, "elapsed_time": "0:42:18", "remaining_time": "3:40:39"}
|
||||
{"current_steps": 505, "total_steps": 3108, "loss": 0.2603, "lr": 3.9531911420340415e-05, "epoch": 1.1373873873873874, "percentage": 16.25, "elapsed_time": "0:42:40", "remaining_time": "3:39:57"}
|
||||
{"current_steps": 510, "total_steps": 3108, "loss": 0.2582, "lr": 3.950744529205826e-05, "epoch": 1.1486486486486487, "percentage": 16.41, "elapsed_time": "0:43:08", "remaining_time": "3:39:43"}
|
||||
{"current_steps": 515, "total_steps": 3108, "loss": 0.2747, "lr": 3.948236391029801e-05, "epoch": 1.15990990990991, "percentage": 16.57, "elapsed_time": "0:43:33", "remaining_time": "3:39:18"}
|
||||
{"current_steps": 520, "total_steps": 3108, "loss": 0.2558, "lr": 3.9456668066111874e-05, "epoch": 1.1711711711711712, "percentage": 16.73, "elapsed_time": "0:43:59", "remaining_time": "3:38:54"}
|
||||
{"current_steps": 525, "total_steps": 3108, "loss": 0.257, "lr": 3.9430358569931824e-05, "epoch": 1.1824324324324325, "percentage": 16.89, "elapsed_time": "0:44:22", "remaining_time": "3:38:17"}
|
||||
{"current_steps": 530, "total_steps": 3108, "loss": 0.2732, "lr": 3.940343625154407e-05, "epoch": 1.1936936936936937, "percentage": 17.05, "elapsed_time": "0:44:44", "remaining_time": "3:37:36"}
|
||||
{"current_steps": 535, "total_steps": 3108, "loss": 0.2727, "lr": 3.9375901960062866e-05, "epoch": 1.204954954954955, "percentage": 17.21, "elapsed_time": "0:45:14", "remaining_time": "3:37:32"}
|
||||
{"current_steps": 540, "total_steps": 3108, "loss": 0.2541, "lr": 3.934775656390375e-05, "epoch": 1.2162162162162162, "percentage": 17.37, "elapsed_time": "0:45:38", "remaining_time": "3:37:00"}
|
||||
{"current_steps": 545, "total_steps": 3108, "loss": 0.2496, "lr": 3.931900095075616e-05, "epoch": 1.2274774774774775, "percentage": 17.54, "elapsed_time": "0:46:04", "remaining_time": "3:36:40"}
|
||||
{"current_steps": 550, "total_steps": 3108, "loss": 0.2712, "lr": 3.928963602755539e-05, "epoch": 1.2387387387387387, "percentage": 17.7, "elapsed_time": "0:46:29", "remaining_time": "3:36:12"}
|
||||
{"current_steps": 555, "total_steps": 3108, "loss": 0.2631, "lr": 3.9259662720454054e-05, "epoch": 1.25, "percentage": 17.86, "elapsed_time": "0:46:54", "remaining_time": "3:35:45"}
|
||||
{"current_steps": 560, "total_steps": 3108, "loss": 0.2558, "lr": 3.922908197479282e-05, "epoch": 1.2612612612612613, "percentage": 18.02, "elapsed_time": "0:47:18", "remaining_time": "3:35:16"}
|
||||
{"current_steps": 565, "total_steps": 3108, "loss": 0.2507, "lr": 3.919789475507061e-05, "epoch": 1.2725225225225225, "percentage": 18.18, "elapsed_time": "0:47:46", "remaining_time": "3:34:59"}
|
||||
{"current_steps": 570, "total_steps": 3108, "loss": 0.2726, "lr": 3.91661020449142e-05, "epoch": 1.2837837837837838, "percentage": 18.34, "elapsed_time": "0:48:15", "remaining_time": "3:34:52"}
|
||||
{"current_steps": 575, "total_steps": 3108, "loss": 0.2653, "lr": 3.913370484704718e-05, "epoch": 1.295045045045045, "percentage": 18.5, "elapsed_time": "0:48:40", "remaining_time": "3:34:25"}
|
||||
{"current_steps": 580, "total_steps": 3108, "loss": 0.256, "lr": 3.910070418325833e-05, "epoch": 1.3063063063063063, "percentage": 18.66, "elapsed_time": "0:49:05", "remaining_time": "3:34:00"}
|
||||
{"current_steps": 585, "total_steps": 3108, "loss": 0.2672, "lr": 3.9067101094369384e-05, "epoch": 1.3175675675675675, "percentage": 18.82, "elapsed_time": "0:49:34", "remaining_time": "3:33:46"}
|
||||
{"current_steps": 590, "total_steps": 3108, "loss": 0.2758, "lr": 3.903289664020221e-05, "epoch": 1.3288288288288288, "percentage": 18.98, "elapsed_time": "0:49:56", "remaining_time": "3:33:10"}
|
||||
{"current_steps": 595, "total_steps": 3108, "loss": 0.2471, "lr": 3.899809189954542e-05, "epoch": 1.34009009009009, "percentage": 19.14, "elapsed_time": "0:50:20", "remaining_time": "3:32:37"}
|
||||
{"current_steps": 600, "total_steps": 3108, "loss": 0.2482, "lr": 3.896268797012028e-05, "epoch": 1.3513513513513513, "percentage": 19.31, "elapsed_time": "0:50:44", "remaining_time": "3:32:04"}
|
||||
{"current_steps": 605, "total_steps": 3108, "loss": 0.2678, "lr": 3.892668596854614e-05, "epoch": 1.3626126126126126, "percentage": 19.47, "elapsed_time": "0:51:13", "remaining_time": "3:31:57"}
|
||||
{"current_steps": 610, "total_steps": 3108, "loss": 0.2479, "lr": 3.8890087030305165e-05, "epoch": 1.3738738738738738, "percentage": 19.63, "elapsed_time": "0:51:38", "remaining_time": "3:31:27"}
|
||||
{"current_steps": 615, "total_steps": 3108, "loss": 0.25, "lr": 3.88528923097066e-05, "epoch": 1.385135135135135, "percentage": 19.79, "elapsed_time": "0:52:01", "remaining_time": "3:30:54"}
|
||||
{"current_steps": 620, "total_steps": 3108, "loss": 0.2637, "lr": 3.8815102979850294e-05, "epoch": 1.3963963963963963, "percentage": 19.95, "elapsed_time": "0:52:28", "remaining_time": "3:30:35"}
|
||||
{"current_steps": 625, "total_steps": 3108, "loss": 0.2552, "lr": 3.877672023258973e-05, "epoch": 1.4076576576576576, "percentage": 20.11, "elapsed_time": "0:52:53", "remaining_time": "3:30:08"}
|
||||
{"current_steps": 630, "total_steps": 3108, "loss": 0.2502, "lr": 3.873774527849443e-05, "epoch": 1.4189189189189189, "percentage": 20.27, "elapsed_time": "0:53:20", "remaining_time": "3:29:50"}
|
||||
{"current_steps": 635, "total_steps": 3108, "loss": 0.2476, "lr": 3.8698179346811776e-05, "epoch": 1.4301801801801801, "percentage": 20.43, "elapsed_time": "0:53:46", "remaining_time": "3:29:24"}
|
||||
{"current_steps": 640, "total_steps": 3108, "loss": 0.2623, "lr": 3.865802368542825e-05, "epoch": 1.4414414414414414, "percentage": 20.59, "elapsed_time": "0:54:12", "remaining_time": "3:29:03"}
|
||||
{"current_steps": 645, "total_steps": 3108, "loss": 0.2464, "lr": 3.8617279560830045e-05, "epoch": 1.4527027027027026, "percentage": 20.75, "elapsed_time": "0:54:35", "remaining_time": "3:28:27"}
|
||||
{"current_steps": 650, "total_steps": 3108, "loss": 0.269, "lr": 3.857594825806315e-05, "epoch": 1.4639639639639639, "percentage": 20.91, "elapsed_time": "0:54:59", "remaining_time": "3:27:55"}
|
||||
{"current_steps": 655, "total_steps": 3108, "loss": 0.2562, "lr": 3.853403108069282e-05, "epoch": 1.4752252252252251, "percentage": 21.07, "elapsed_time": "0:55:28", "remaining_time": "3:27:46"}
|
||||
{"current_steps": 660, "total_steps": 3108, "loss": 0.2666, "lr": 3.849152935076245e-05, "epoch": 1.4864864864864864, "percentage": 21.24, "elapsed_time": "0:55:54", "remaining_time": "3:27:21"}
|
||||
{"current_steps": 665, "total_steps": 3108, "loss": 0.2561, "lr": 3.8448444408751886e-05, "epoch": 1.4977477477477477, "percentage": 21.4, "elapsed_time": "0:56:21", "remaining_time": "3:27:02"}
|
||||
{"current_steps": 670, "total_steps": 3108, "loss": 0.2591, "lr": 3.840477761353512e-05, "epoch": 1.509009009009009, "percentage": 21.56, "elapsed_time": "0:56:43", "remaining_time": "3:26:25"}
|
||||
{"current_steps": 675, "total_steps": 3108, "loss": 0.2537, "lr": 3.836053034233748e-05, "epoch": 1.5202702702702702, "percentage": 21.72, "elapsed_time": "0:57:07", "remaining_time": "3:25:54"}
|
||||
{"current_steps": 680, "total_steps": 3108, "loss": 0.2574, "lr": 3.831570399069218e-05, "epoch": 1.5315315315315314, "percentage": 21.88, "elapsed_time": "0:57:30", "remaining_time": "3:25:21"}
|
||||
{"current_steps": 685, "total_steps": 3108, "loss": 0.2432, "lr": 3.8270299972396265e-05, "epoch": 1.5427927927927927, "percentage": 22.04, "elapsed_time": "0:57:53", "remaining_time": "3:24:45"}
|
||||
{"current_steps": 690, "total_steps": 3108, "loss": 0.2582, "lr": 3.822431971946607e-05, "epoch": 1.554054054054054, "percentage": 22.2, "elapsed_time": "0:58:15", "remaining_time": "3:24:08"}
|
||||
{"current_steps": 695, "total_steps": 3108, "loss": 0.2465, "lr": 3.817776468209202e-05, "epoch": 1.5653153153153152, "percentage": 22.36, "elapsed_time": "0:58:38", "remaining_time": "3:23:36"}
|
||||
{"current_steps": 700, "total_steps": 3108, "loss": 0.2563, "lr": 3.813063632859291e-05, "epoch": 1.5765765765765765, "percentage": 22.52, "elapsed_time": "0:59:04", "remaining_time": "3:23:11"}
|
||||
{"current_steps": 705, "total_steps": 3108, "loss": 0.2634, "lr": 3.808293614536961e-05, "epoch": 1.5878378378378377, "percentage": 22.68, "elapsed_time": "0:59:27", "remaining_time": "3:22:39"}
|
||||
{"current_steps": 710, "total_steps": 3108, "loss": 0.2359, "lr": 3.8034665636858155e-05, "epoch": 1.599099099099099, "percentage": 22.84, "elapsed_time": "0:59:50", "remaining_time": "3:22:07"}
|
||||
{"current_steps": 715, "total_steps": 3108, "loss": 0.2674, "lr": 3.79858263254823e-05, "epoch": 1.6103603603603602, "percentage": 23.01, "elapsed_time": "1:00:17", "remaining_time": "3:21:47"}
|
||||
{"current_steps": 720, "total_steps": 3108, "loss": 0.2485, "lr": 3.793641975160552e-05, "epoch": 1.6216216216216215, "percentage": 23.17, "elapsed_time": "1:00:40", "remaining_time": "3:21:15"}
|
||||
{"current_steps": 725, "total_steps": 3108, "loss": 0.2474, "lr": 3.788644747348242e-05, "epoch": 1.6328828828828827, "percentage": 23.33, "elapsed_time": "1:01:06", "remaining_time": "3:20:51"}
|
||||
{"current_steps": 730, "total_steps": 3108, "loss": 0.2439, "lr": 3.7835911067209576e-05, "epoch": 1.644144144144144, "percentage": 23.49, "elapsed_time": "1:01:32", "remaining_time": "3:20:27"}
|
||||
{"current_steps": 735, "total_steps": 3108, "loss": 0.2539, "lr": 3.778481212667586e-05, "epoch": 1.6554054054054053, "percentage": 23.65, "elapsed_time": "1:01:57", "remaining_time": "3:20:01"}
|
||||
{"current_steps": 740, "total_steps": 3108, "loss": 0.2484, "lr": 3.773315226351214e-05, "epoch": 1.6666666666666665, "percentage": 23.81, "elapsed_time": "1:02:22", "remaining_time": "3:19:35"}
|
||||
{"current_steps": 745, "total_steps": 3108, "loss": 0.2542, "lr": 3.768093310704043e-05, "epoch": 1.6779279279279278, "percentage": 23.97, "elapsed_time": "1:02:49", "remaining_time": "3:19:16"}
|
||||
{"current_steps": 750, "total_steps": 3108, "loss": 0.2394, "lr": 3.7628156304222576e-05, "epoch": 1.689189189189189, "percentage": 24.13, "elapsed_time": "1:03:12", "remaining_time": "3:18:43"}
|
||||
{"current_steps": 755, "total_steps": 3108, "loss": 0.242, "lr": 3.757482351960822e-05, "epoch": 1.7004504504504503, "percentage": 24.29, "elapsed_time": "1:03:34", "remaining_time": "3:18:07"}
|
||||
{"current_steps": 760, "total_steps": 3108, "loss": 0.2461, "lr": 3.752093643528239e-05, "epoch": 1.7117117117117115, "percentage": 24.45, "elapsed_time": "1:03:56", "remaining_time": "3:17:33"}
|
||||
{"current_steps": 765, "total_steps": 3108, "loss": 0.2554, "lr": 3.746649675081236e-05, "epoch": 1.722972972972973, "percentage": 24.61, "elapsed_time": "1:04:20", "remaining_time": "3:17:05"}
|
||||
{"current_steps": 770, "total_steps": 3108, "loss": 0.2446, "lr": 3.741150618319414e-05, "epoch": 1.7342342342342343, "percentage": 24.77, "elapsed_time": "1:04:49", "remaining_time": "3:16:48"}
|
||||
{"current_steps": 775, "total_steps": 3108, "loss": 0.2608, "lr": 3.7355966466798224e-05, "epoch": 1.7454954954954955, "percentage": 24.94, "elapsed_time": "1:05:11", "remaining_time": "3:16:15"}
|
||||
{"current_steps": 780, "total_steps": 3108, "loss": 0.2483, "lr": 3.729987935331497e-05, "epoch": 1.7567567567567568, "percentage": 25.1, "elapsed_time": "1:05:38", "remaining_time": "3:15:54"}
|
||||
{"current_steps": 785, "total_steps": 3108, "loss": 0.2427, "lr": 3.724324661169931e-05, "epoch": 1.768018018018018, "percentage": 25.26, "elapsed_time": "1:06:03", "remaining_time": "3:15:27"}
|
||||
{"current_steps": 790, "total_steps": 3108, "loss": 0.2384, "lr": 3.7186070028114986e-05, "epoch": 1.7792792792792793, "percentage": 25.42, "elapsed_time": "1:06:29", "remaining_time": "3:15:04"}
|
||||
{"current_steps": 795, "total_steps": 3108, "loss": 0.2446, "lr": 3.7128351405878186e-05, "epoch": 1.7905405405405406, "percentage": 25.58, "elapsed_time": "1:06:55", "remaining_time": "3:14:42"}
|
||||
{"current_steps": 800, "total_steps": 3108, "loss": 0.2424, "lr": 3.707009256540067e-05, "epoch": 1.8018018018018018, "percentage": 25.74, "elapsed_time": "1:07:18", "remaining_time": "3:14:10"}
|
||||
{"current_steps": 805, "total_steps": 3108, "loss": 0.2601, "lr": 3.70112953441324e-05, "epoch": 1.813063063063063, "percentage": 25.9, "elapsed_time": "1:07:47", "remaining_time": "3:13:56"}
|
||||
{"current_steps": 810, "total_steps": 3108, "loss": 0.2465, "lr": 3.695196159650352e-05, "epoch": 1.8243243243243243, "percentage": 26.06, "elapsed_time": "1:08:10", "remaining_time": "3:13:23"}
|
||||
{"current_steps": 815, "total_steps": 3108, "loss": 0.253, "lr": 3.6892093193865926e-05, "epoch": 1.8355855855855856, "percentage": 26.22, "elapsed_time": "1:08:32", "remaining_time": "3:12:50"}
|
||||
{"current_steps": 820, "total_steps": 3108, "loss": 0.2425, "lr": 3.683169202443421e-05, "epoch": 1.8468468468468469, "percentage": 26.38, "elapsed_time": "1:08:56", "remaining_time": "3:12:21"}
|
||||
{"current_steps": 825, "total_steps": 3108, "loss": 0.2495, "lr": 3.677075999322613e-05, "epoch": 1.8581081081081081, "percentage": 26.54, "elapsed_time": "1:09:17", "remaining_time": "3:11:43"}
|
||||
{"current_steps": 830, "total_steps": 3108, "loss": 0.2517, "lr": 3.670929902200251e-05, "epoch": 1.8693693693693694, "percentage": 26.71, "elapsed_time": "1:09:38", "remaining_time": "3:11:09"}
|
||||
{"current_steps": 835, "total_steps": 3108, "loss": 0.2536, "lr": 3.6647311049206625e-05, "epoch": 1.8806306306306306, "percentage": 26.87, "elapsed_time": "1:10:01", "remaining_time": "3:10:36"}
|
||||
{"current_steps": 840, "total_steps": 3108, "loss": 0.2399, "lr": 3.658479802990309e-05, "epoch": 1.8918918918918919, "percentage": 27.03, "elapsed_time": "1:10:24", "remaining_time": "3:10:06"}
|
||||
{"current_steps": 845, "total_steps": 3108, "loss": 0.2487, "lr": 3.6521761935716165e-05, "epoch": 1.9031531531531531, "percentage": 27.19, "elapsed_time": "1:10:48", "remaining_time": "3:09:38"}
|
||||
{"current_steps": 850, "total_steps": 3108, "loss": 0.2546, "lr": 3.645820475476761e-05, "epoch": 1.9144144144144144, "percentage": 27.35, "elapsed_time": "1:11:11", "remaining_time": "3:09:06"}
|
||||
{"current_steps": 855, "total_steps": 3108, "loss": 0.2644, "lr": 3.639412849161394e-05, "epoch": 1.9256756756756757, "percentage": 27.51, "elapsed_time": "1:11:35", "remaining_time": "3:08:39"}
|
||||
{"current_steps": 860, "total_steps": 3108, "loss": 0.2453, "lr": 3.632953516718321e-05, "epoch": 1.936936936936937, "percentage": 27.67, "elapsed_time": "1:12:01", "remaining_time": "3:08:15"}
|
||||
{"current_steps": 865, "total_steps": 3108, "loss": 0.2424, "lr": 3.626442681871132e-05, "epoch": 1.9481981981981982, "percentage": 27.83, "elapsed_time": "1:12:26", "remaining_time": "3:07:50"}
|
||||
{"current_steps": 870, "total_steps": 3108, "loss": 0.2382, "lr": 3.61988054996777e-05, "epoch": 1.9594594594594594, "percentage": 27.99, "elapsed_time": "1:12:53", "remaining_time": "3:07:31"}
|
||||
{"current_steps": 875, "total_steps": 3108, "loss": 0.2443, "lr": 3.613267327974059e-05, "epoch": 1.9707207207207207, "percentage": 28.15, "elapsed_time": "1:13:17", "remaining_time": "3:07:02"}
|
||||
{"current_steps": 880, "total_steps": 3108, "loss": 0.2463, "lr": 3.606603224467174e-05, "epoch": 1.981981981981982, "percentage": 28.31, "elapsed_time": "1:13:39", "remaining_time": "3:06:28"}
|
||||
{"current_steps": 885, "total_steps": 3108, "loss": 0.2372, "lr": 3.599888449629064e-05, "epoch": 1.9932432432432432, "percentage": 28.47, "elapsed_time": "1:14:02", "remaining_time": "3:05:58"}
|
||||
{"current_steps": 890, "total_steps": 3108, "loss": 0.24, "lr": 3.59312321523982e-05, "epoch": 2.0045045045045047, "percentage": 28.64, "elapsed_time": "1:14:28", "remaining_time": "3:05:35"}
|
||||
{"current_steps": 895, "total_steps": 3108, "loss": 0.2229, "lr": 3.5863077346710016e-05, "epoch": 2.015765765765766, "percentage": 28.8, "elapsed_time": "1:14:56", "remaining_time": "3:05:17"}
|
||||
{"current_steps": 900, "total_steps": 3108, "loss": 0.2334, "lr": 3.579442222878901e-05, "epoch": 2.027027027027027, "percentage": 28.96, "elapsed_time": "1:15:18", "remaining_time": "3:04:45"}
|
||||
{"current_steps": 905, "total_steps": 3108, "loss": 0.219, "lr": 3.5725268963977664e-05, "epoch": 2.0382882882882885, "percentage": 29.12, "elapsed_time": "1:15:41", "remaining_time": "3:04:16"}
|
||||
{"current_steps": 910, "total_steps": 3108, "loss": 0.2166, "lr": 3.5655619733329735e-05, "epoch": 2.0495495495495497, "percentage": 29.28, "elapsed_time": "1:16:05", "remaining_time": "3:03:48"}
|
||||
{"current_steps": 915, "total_steps": 3108, "loss": 0.2221, "lr": 3.558547673354144e-05, "epoch": 2.060810810810811, "percentage": 29.44, "elapsed_time": "1:16:32", "remaining_time": "3:03:27"}
|
||||
{"current_steps": 920, "total_steps": 3108, "loss": 0.2321, "lr": 3.5514842176882207e-05, "epoch": 2.0720720720720722, "percentage": 29.6, "elapsed_time": "1:16:57", "remaining_time": "3:03:02"}
|
||||
{"current_steps": 925, "total_steps": 3108, "loss": 0.2246, "lr": 3.544371829112485e-05, "epoch": 2.0833333333333335, "percentage": 29.76, "elapsed_time": "1:17:25", "remaining_time": "3:02:42"}
|
||||
{"current_steps": 930, "total_steps": 3108, "loss": 0.2313, "lr": 3.537210731947537e-05, "epoch": 2.0945945945945947, "percentage": 29.92, "elapsed_time": "1:17:47", "remaining_time": "3:02:11"}
|
||||
{"current_steps": 935, "total_steps": 3108, "loss": 0.2264, "lr": 3.530001152050215e-05, "epoch": 2.105855855855856, "percentage": 30.08, "elapsed_time": "1:18:10", "remaining_time": "3:01:42"}
|
||||
{"current_steps": 940, "total_steps": 3108, "loss": 0.2238, "lr": 3.5227433168064765e-05, "epoch": 2.1171171171171173, "percentage": 30.24, "elapsed_time": "1:18:32", "remaining_time": "3:01:09"}
|
||||
{"current_steps": 945, "total_steps": 3108, "loss": 0.2259, "lr": 3.515437455124223e-05, "epoch": 2.1283783783783785, "percentage": 30.41, "elapsed_time": "1:18:55", "remaining_time": "3:00:39"}
|
||||
{"current_steps": 950, "total_steps": 3108, "loss": 0.2317, "lr": 3.5080837974260826e-05, "epoch": 2.1396396396396398, "percentage": 30.57, "elapsed_time": "1:19:16", "remaining_time": "3:00:04"}
|
||||
{"current_steps": 955, "total_steps": 3108, "loss": 0.2335, "lr": 3.500682575642143e-05, "epoch": 2.150900900900901, "percentage": 30.73, "elapsed_time": "1:19:45", "remaining_time": "2:59:49"}
|
||||
{"current_steps": 960, "total_steps": 3108, "loss": 0.227, "lr": 3.493234023202633e-05, "epoch": 2.1621621621621623, "percentage": 30.89, "elapsed_time": "1:20:14", "remaining_time": "2:59:33"}
|
||||
{"current_steps": 965, "total_steps": 3108, "loss": 0.2302, "lr": 3.485738375030565e-05, "epoch": 2.1734234234234235, "percentage": 31.05, "elapsed_time": "1:20:39", "remaining_time": "2:59:07"}
|
||||
{"current_steps": 970, "total_steps": 3108, "loss": 0.2321, "lr": 3.4781958675343216e-05, "epoch": 2.184684684684685, "percentage": 31.21, "elapsed_time": "1:21:02", "remaining_time": "2:58:36"}
|
||||
{"current_steps": 975, "total_steps": 3108, "loss": 0.2397, "lr": 3.470606738600202e-05, "epoch": 2.195945945945946, "percentage": 31.37, "elapsed_time": "1:21:24", "remaining_time": "2:58:06"}
|
||||
{"current_steps": 980, "total_steps": 3108, "loss": 0.2141, "lr": 3.4629712275849206e-05, "epoch": 2.2072072072072073, "percentage": 31.53, "elapsed_time": "1:21:48", "remaining_time": "2:57:37"}
|
||||
{"current_steps": 985, "total_steps": 3108, "loss": 0.2275, "lr": 3.455289575308051e-05, "epoch": 2.2184684684684686, "percentage": 31.69, "elapsed_time": "1:22:11", "remaining_time": "2:57:08"}
|
||||
{"current_steps": 990, "total_steps": 3108, "loss": 0.2333, "lr": 3.4475620240444404e-05, "epoch": 2.22972972972973, "percentage": 31.85, "elapsed_time": "1:22:35", "remaining_time": "2:56:42"}
|
||||
{"current_steps": 995, "total_steps": 3108, "loss": 0.2199, "lr": 3.439788817516559e-05, "epoch": 2.240990990990991, "percentage": 32.01, "elapsed_time": "1:23:00", "remaining_time": "2:56:16"}
|
||||
{"current_steps": 1000, "total_steps": 3108, "loss": 0.2289, "lr": 3.431970200886821e-05, "epoch": 2.2522522522522523, "percentage": 32.18, "elapsed_time": "1:23:25", "remaining_time": "2:55:50"}
|
||||
{"current_steps": 1005, "total_steps": 3108, "loss": 0.2214, "lr": 3.424106420749847e-05, "epoch": 2.2635135135135136, "percentage": 32.34, "elapsed_time": "1:23:48", "remaining_time": "2:55:22"}
|
||||
{"current_steps": 1010, "total_steps": 3108, "loss": 0.2313, "lr": 3.416197725124687e-05, "epoch": 2.274774774774775, "percentage": 32.5, "elapsed_time": "1:24:11", "remaining_time": "2:54:53"}
|
||||
{"current_steps": 1015, "total_steps": 3108, "loss": 0.2345, "lr": 3.408244363447003e-05, "epoch": 2.286036036036036, "percentage": 32.66, "elapsed_time": "1:24:34", "remaining_time": "2:54:23"}
|
||||
{"current_steps": 1020, "total_steps": 3108, "loss": 0.228, "lr": 3.400246586561195e-05, "epoch": 2.2972972972972974, "percentage": 32.82, "elapsed_time": "1:24:58", "remaining_time": "2:53:56"}
|
||||
{"current_steps": 1025, "total_steps": 3108, "loss": 0.2376, "lr": 3.3922046467124946e-05, "epoch": 2.3085585585585586, "percentage": 32.98, "elapsed_time": "1:25:22", "remaining_time": "2:53:30"}
|
||||
{"current_steps": 1030, "total_steps": 3108, "loss": 0.2326, "lr": 3.3841187975390065e-05, "epoch": 2.31981981981982, "percentage": 33.14, "elapsed_time": "1:25:50", "remaining_time": "2:53:10"}
|
||||
{"current_steps": 1035, "total_steps": 3108, "loss": 0.2288, "lr": 3.375989294063711e-05, "epoch": 2.331081081081081, "percentage": 33.3, "elapsed_time": "1:26:14", "remaining_time": "2:52:44"}
|
||||
{"current_steps": 1040, "total_steps": 3108, "loss": 0.2216, "lr": 3.36781639268642e-05, "epoch": 2.3423423423423424, "percentage": 33.46, "elapsed_time": "1:26:38", "remaining_time": "2:52:17"}
|
||||
{"current_steps": 1045, "total_steps": 3108, "loss": 0.2292, "lr": 3.359600351175688e-05, "epoch": 2.3536036036036037, "percentage": 33.62, "elapsed_time": "1:27:07", "remaining_time": "2:52:00"}
|
||||
{"current_steps": 1050, "total_steps": 3108, "loss": 0.2229, "lr": 3.3513414286606854e-05, "epoch": 2.364864864864865, "percentage": 33.78, "elapsed_time": "1:27:30", "remaining_time": "2:51:30"}
|
||||
{"current_steps": 1055, "total_steps": 3108, "loss": 0.2373, "lr": 3.3430398856230253e-05, "epoch": 2.376126126126126, "percentage": 33.94, "elapsed_time": "1:27:53", "remaining_time": "2:51:01"}
|
||||
{"current_steps": 1060, "total_steps": 3108, "loss": 0.2236, "lr": 3.334695983888547e-05, "epoch": 2.3873873873873874, "percentage": 34.11, "elapsed_time": "1:28:17", "remaining_time": "2:50:34"}
|
||||
{"current_steps": 1065, "total_steps": 3108, "loss": 0.2244, "lr": 3.326309986619056e-05, "epoch": 2.3986486486486487, "percentage": 34.27, "elapsed_time": "1:28:40", "remaining_time": "2:50:06"}
|
||||
{"current_steps": 1070, "total_steps": 3108, "loss": 0.2393, "lr": 3.317882158304031e-05, "epoch": 2.40990990990991, "percentage": 34.43, "elapsed_time": "1:29:08", "remaining_time": "2:49:46"}
|
||||
{"current_steps": 1075, "total_steps": 3108, "loss": 0.2214, "lr": 3.3094127647522725e-05, "epoch": 2.421171171171171, "percentage": 34.59, "elapsed_time": "1:29:34", "remaining_time": "2:49:23"}
|
||||
{"current_steps": 1080, "total_steps": 3108, "loss": 0.2277, "lr": 3.3009020730835284e-05, "epoch": 2.4324324324324325, "percentage": 34.75, "elapsed_time": "1:29:55", "remaining_time": "2:48:51"}
|
||||
{"current_steps": 1085, "total_steps": 3108, "loss": 0.2258, "lr": 3.292350351720064e-05, "epoch": 2.4436936936936937, "percentage": 34.91, "elapsed_time": "1:30:20", "remaining_time": "2:48:25"}
|
||||
{"current_steps": 1090, "total_steps": 3108, "loss": 0.2335, "lr": 3.283757870378197e-05, "epoch": 2.454954954954955, "percentage": 35.07, "elapsed_time": "1:30:44", "remaining_time": "2:47:59"}
|
||||
{"current_steps": 1095, "total_steps": 3108, "loss": 0.2292, "lr": 3.27512490005979e-05, "epoch": 2.4662162162162162, "percentage": 35.23, "elapsed_time": "1:31:08", "remaining_time": "2:47:33"}
|
||||
{"current_steps": 1100, "total_steps": 3108, "loss": 0.2286, "lr": 3.266451713043706e-05, "epoch": 2.4774774774774775, "percentage": 35.39, "elapsed_time": "1:31:34", "remaining_time": "2:47:10"}
|
||||
{"current_steps": 1105, "total_steps": 3108, "loss": 0.2309, "lr": 3.2577385828772194e-05, "epoch": 2.4887387387387387, "percentage": 35.55, "elapsed_time": "1:31:59", "remaining_time": "2:46:44"}
|
||||
{"current_steps": 1110, "total_steps": 3108, "loss": 0.2309, "lr": 3.2489857843673877e-05, "epoch": 2.5, "percentage": 35.71, "elapsed_time": "1:32:25", "remaining_time": "2:46:22"}
|
||||
{"current_steps": 1115, "total_steps": 3108, "loss": 0.223, "lr": 3.240193593572384e-05, "epoch": 2.5112612612612613, "percentage": 35.88, "elapsed_time": "1:32:51", "remaining_time": "2:45:58"}
|
||||
{"current_steps": 1120, "total_steps": 3108, "loss": 0.2327, "lr": 3.231362287792793e-05, "epoch": 2.5225225225225225, "percentage": 36.04, "elapsed_time": "1:33:16", "remaining_time": "2:45:33"}
|
||||
{"current_steps": 1125, "total_steps": 3108, "loss": 0.2353, "lr": 3.2224921455628605e-05, "epoch": 2.5337837837837838, "percentage": 36.2, "elapsed_time": "1:33:39", "remaining_time": "2:45:05"}
|
||||
{"current_steps": 1130, "total_steps": 3108, "loss": 0.2244, "lr": 3.213583446641716e-05, "epoch": 2.545045045045045, "percentage": 36.36, "elapsed_time": "1:34:06", "remaining_time": "2:44:43"}
|
||||
{"current_steps": 1135, "total_steps": 3108, "loss": 0.2234, "lr": 3.2046364720045395e-05, "epoch": 2.5563063063063063, "percentage": 36.52, "elapsed_time": "1:34:28", "remaining_time": "2:44:14"}
|
||||
{"current_steps": 1140, "total_steps": 3108, "loss": 0.221, "lr": 3.195651503833707e-05, "epoch": 2.5675675675675675, "percentage": 36.68, "elapsed_time": "1:34:52", "remaining_time": "2:43:47"}
|
||||
{"current_steps": 1145, "total_steps": 3108, "loss": 0.2195, "lr": 3.1866288255098904e-05, "epoch": 2.578828828828829, "percentage": 36.84, "elapsed_time": "1:35:13", "remaining_time": "2:43:15"}
|
||||
{"current_steps": 1150, "total_steps": 3108, "loss": 0.2176, "lr": 3.177568721603115e-05, "epoch": 2.59009009009009, "percentage": 37.0, "elapsed_time": "1:35:40", "remaining_time": "2:42:53"}
|
||||
{"current_steps": 1155, "total_steps": 3108, "loss": 0.225, "lr": 3.168471477863788e-05, "epoch": 2.6013513513513513, "percentage": 37.16, "elapsed_time": "1:36:03", "remaining_time": "2:42:26"}
|
||||
{"current_steps": 1160, "total_steps": 3108, "loss": 0.2287, "lr": 3.159337381213685e-05, "epoch": 2.6126126126126126, "percentage": 37.32, "elapsed_time": "1:36:26", "remaining_time": "2:41:56"}
|
||||
{"current_steps": 1165, "total_steps": 3108, "loss": 0.2372, "lr": 3.150166719736903e-05, "epoch": 2.623873873873874, "percentage": 37.48, "elapsed_time": "1:36:53", "remaining_time": "2:41:35"}
|
||||
{"current_steps": 1170, "total_steps": 3108, "loss": 0.2349, "lr": 3.1409597826707704e-05, "epoch": 2.635135135135135, "percentage": 37.64, "elapsed_time": "1:37:18", "remaining_time": "2:41:10"}
|
||||
{"current_steps": 1175, "total_steps": 3108, "loss": 0.2238, "lr": 3.1317168603967266e-05, "epoch": 2.6463963963963963, "percentage": 37.81, "elapsed_time": "1:37:41", "remaining_time": "2:40:42"}
|
||||
{"current_steps": 1180, "total_steps": 3108, "loss": 0.2236, "lr": 3.1224382444311644e-05, "epoch": 2.6576576576576576, "percentage": 37.97, "elapsed_time": "1:38:04", "remaining_time": "2:40:14"}
|
||||
{"current_steps": 1185, "total_steps": 3108, "loss": 0.2309, "lr": 3.113124227416234e-05, "epoch": 2.668918918918919, "percentage": 38.13, "elapsed_time": "1:38:29", "remaining_time": "2:39:49"}
|
||||
{"current_steps": 1190, "total_steps": 3108, "loss": 0.2284, "lr": 3.103775103110615e-05, "epoch": 2.68018018018018, "percentage": 38.29, "elapsed_time": "1:38:58", "remaining_time": "2:39:31"}
|
||||
{"current_steps": 1195, "total_steps": 3108, "loss": 0.2274, "lr": 3.094391166380251e-05, "epoch": 2.6914414414414414, "percentage": 38.45, "elapsed_time": "1:39:22", "remaining_time": "2:39:05"}
|
||||
{"current_steps": 1200, "total_steps": 3108, "loss": 0.2179, "lr": 3.084972713189046e-05, "epoch": 2.7027027027027026, "percentage": 38.61, "elapsed_time": "1:39:48", "remaining_time": "2:38:41"}
|
||||
{"current_steps": 1205, "total_steps": 3108, "loss": 0.2235, "lr": 3.075520040589539e-05, "epoch": 2.713963963963964, "percentage": 38.77, "elapsed_time": "1:40:11", "remaining_time": "2:38:14"}
|
||||
{"current_steps": 1210, "total_steps": 3108, "loss": 0.2349, "lr": 3.0660334467135216e-05, "epoch": 2.725225225225225, "percentage": 38.93, "elapsed_time": "1:40:37", "remaining_time": "2:37:49"}
|
||||
{"current_steps": 1215, "total_steps": 3108, "loss": 0.2114, "lr": 3.05651323076265e-05, "epoch": 2.7364864864864864, "percentage": 39.09, "elapsed_time": "1:40:58", "remaining_time": "2:37:19"}
|
||||
{"current_steps": 1220, "total_steps": 3108, "loss": 0.2323, "lr": 3.0469596929989954e-05, "epoch": 2.7477477477477477, "percentage": 39.25, "elapsed_time": "1:41:26", "remaining_time": "2:36:58"}
|
||||
{"current_steps": 1225, "total_steps": 3108, "loss": 0.2304, "lr": 3.0373731347355843e-05, "epoch": 2.759009009009009, "percentage": 39.41, "elapsed_time": "1:41:56", "remaining_time": "2:36:41"}
|
||||
{"current_steps": 1230, "total_steps": 3108, "loss": 0.2296, "lr": 3.0277538583268873e-05, "epoch": 2.77027027027027, "percentage": 39.58, "elapsed_time": "1:42:17", "remaining_time": "2:36:11"}
|
||||
{"current_steps": 1235, "total_steps": 3108, "loss": 0.2269, "lr": 3.0181021671592873e-05, "epoch": 2.7815315315315314, "percentage": 39.74, "elapsed_time": "1:42:42", "remaining_time": "2:35:46"}
|
||||
{"current_steps": 1240, "total_steps": 3108, "loss": 0.2236, "lr": 3.0084183656415104e-05, "epoch": 2.7927927927927927, "percentage": 39.9, "elapsed_time": "1:43:08", "remaining_time": "2:35:22"}
|
||||
{"current_steps": 1245, "total_steps": 3108, "loss": 0.2309, "lr": 2.9987027591950232e-05, "epoch": 2.804054054054054, "percentage": 40.06, "elapsed_time": "1:43:30", "remaining_time": "2:34:53"}
|
||||
{"current_steps": 1250, "total_steps": 3108, "loss": 0.2196, "lr": 2.988955654244403e-05, "epoch": 2.815315315315315, "percentage": 40.22, "elapsed_time": "1:43:53", "remaining_time": "2:34:25"}
|
||||
{"current_steps": 1255, "total_steps": 3108, "loss": 0.2248, "lr": 2.9791773582076692e-05, "epoch": 2.8265765765765765, "percentage": 40.38, "elapsed_time": "1:44:19", "remaining_time": "2:34:01"}
|
||||
{"current_steps": 1260, "total_steps": 3108, "loss": 0.2258, "lr": 2.9693681794865917e-05, "epoch": 2.8378378378378377, "percentage": 40.54, "elapsed_time": "1:44:45", "remaining_time": "2:33:38"}
|
||||
{"current_steps": 1265, "total_steps": 3108, "loss": 0.2232, "lr": 2.9595284274569623e-05, "epoch": 2.849099099099099, "percentage": 40.7, "elapsed_time": "1:45:12", "remaining_time": "2:33:16"}
|
||||
{"current_steps": 1270, "total_steps": 3108, "loss": 0.2204, "lr": 2.949658412458837e-05, "epoch": 2.8603603603603602, "percentage": 40.86, "elapsed_time": "1:45:34", "remaining_time": "2:32:47"}
|
||||
{"current_steps": 1275, "total_steps": 3108, "loss": 0.2258, "lr": 2.9397584457867475e-05, "epoch": 2.8716216216216215, "percentage": 41.02, "elapsed_time": "1:46:01", "remaining_time": "2:32:25"}
|
||||
{"current_steps": 1280, "total_steps": 3108, "loss": 0.229, "lr": 2.9298288396798846e-05, "epoch": 2.8828828828828827, "percentage": 41.18, "elapsed_time": "1:46:27", "remaining_time": "2:32:02"}
|
||||
{"current_steps": 1285, "total_steps": 3108, "loss": 0.233, "lr": 2.919869907312249e-05, "epoch": 2.894144144144144, "percentage": 41.34, "elapsed_time": "1:46:51", "remaining_time": "2:31:36"}
|
||||
{"current_steps": 1290, "total_steps": 3108, "loss": 0.2222, "lr": 2.9098819627827758e-05, "epoch": 2.9054054054054053, "percentage": 41.51, "elapsed_time": "1:47:16", "remaining_time": "2:31:10"}
|
||||
{"current_steps": 1295, "total_steps": 3108, "loss": 0.2165, "lr": 2.8998653211054253e-05, "epoch": 2.9166666666666665, "percentage": 41.67, "elapsed_time": "1:47:38", "remaining_time": "2:30:42"}
|
||||
{"current_steps": 1300, "total_steps": 3108, "loss": 0.2306, "lr": 2.88982029819925e-05, "epoch": 2.9279279279279278, "percentage": 41.83, "elapsed_time": "1:48:01", "remaining_time": "2:30:14"}
|
||||
{"current_steps": 1305, "total_steps": 3108, "loss": 0.2288, "lr": 2.8797472108784296e-05, "epoch": 2.939189189189189, "percentage": 41.99, "elapsed_time": "1:48:28", "remaining_time": "2:29:52"}
|
||||
{"current_steps": 1310, "total_steps": 3108, "loss": 0.2239, "lr": 2.86964637684228e-05, "epoch": 2.9504504504504503, "percentage": 42.15, "elapsed_time": "1:48:54", "remaining_time": "2:29:29"}
|
||||
{"current_steps": 1315, "total_steps": 3108, "loss": 0.2256, "lr": 2.8595181146652305e-05, "epoch": 2.9617117117117115, "percentage": 42.31, "elapsed_time": "1:49:18", "remaining_time": "2:29:02"}
|
||||
{"current_steps": 1320, "total_steps": 3108, "loss": 0.2332, "lr": 2.8493627437867792e-05, "epoch": 2.972972972972973, "percentage": 42.47, "elapsed_time": "1:49:41", "remaining_time": "2:28:34"}
|
||||
{"current_steps": 1325, "total_steps": 3108, "loss": 0.2145, "lr": 2.8391805845014178e-05, "epoch": 2.9842342342342345, "percentage": 42.63, "elapsed_time": "1:50:06", "remaining_time": "2:28:10"}
|
||||
{"current_steps": 1330, "total_steps": 3108, "loss": 0.2302, "lr": 2.8289719579485265e-05, "epoch": 2.9954954954954953, "percentage": 42.79, "elapsed_time": "1:50:29", "remaining_time": "2:27:42"}
|
||||
{"current_steps": 1335, "total_steps": 3108, "loss": 0.2077, "lr": 2.8187371861022504e-05, "epoch": 3.0067567567567566, "percentage": 42.95, "elapsed_time": "1:50:57", "remaining_time": "2:27:21"}
|
||||
{"current_steps": 1340, "total_steps": 3108, "loss": 0.1987, "lr": 2.808476591761339e-05, "epoch": 3.018018018018018, "percentage": 43.11, "elapsed_time": "1:51:22", "remaining_time": "2:26:56"}
|
||||
{"current_steps": 1345, "total_steps": 3108, "loss": 0.2012, "lr": 2.7981904985389713e-05, "epoch": 3.029279279279279, "percentage": 43.28, "elapsed_time": "1:51:45", "remaining_time": "2:26:29"}
|
||||
{"current_steps": 1350, "total_steps": 3108, "loss": 0.1949, "lr": 2.7878792308525432e-05, "epoch": 3.0405405405405403, "percentage": 43.44, "elapsed_time": "1:52:05", "remaining_time": "2:25:58"}
|
||||
{"current_steps": 1355, "total_steps": 3108, "loss": 0.2043, "lr": 2.7775431139134408e-05, "epoch": 3.0518018018018016, "percentage": 43.6, "elapsed_time": "1:52:26", "remaining_time": "2:25:27"}
|
||||
{"current_steps": 1360, "total_steps": 3108, "loss": 0.2163, "lr": 2.7671824737167808e-05, "epoch": 3.063063063063063, "percentage": 43.76, "elapsed_time": "1:52:48", "remaining_time": "2:24:59"}
|
||||
{"current_steps": 1365, "total_steps": 3108, "loss": 0.2035, "lr": 2.7567976370311284e-05, "epoch": 3.074324324324324, "percentage": 43.92, "elapsed_time": "1:53:13", "remaining_time": "2:24:35"}
|
||||
{"current_steps": 1370, "total_steps": 3108, "loss": 0.2189, "lr": 2.7463889313881912e-05, "epoch": 3.0855855855855854, "percentage": 44.08, "elapsed_time": "1:53:36", "remaining_time": "2:24:07"}
|
||||
{"current_steps": 1375, "total_steps": 3108, "loss": 0.2014, "lr": 2.7359566850724925e-05, "epoch": 3.0968468468468466, "percentage": 44.24, "elapsed_time": "1:54:03", "remaining_time": "2:23:44"}
|
||||
{"current_steps": 1380, "total_steps": 3108, "loss": 0.2049, "lr": 2.7255012271110132e-05, "epoch": 3.108108108108108, "percentage": 44.4, "elapsed_time": "1:54:28", "remaining_time": "2:23:20"}
|
||||
{"current_steps": 1385, "total_steps": 3108, "loss": 0.2053, "lr": 2.7150228872628158e-05, "epoch": 3.1193693693693696, "percentage": 44.56, "elapsed_time": "1:54:53", "remaining_time": "2:22:55"}
|
||||
{"current_steps": 1390, "total_steps": 3108, "loss": 0.2137, "lr": 2.704521996008643e-05, "epoch": 3.1306306306306304, "percentage": 44.72, "elapsed_time": "1:55:16", "remaining_time": "2:22:28"}
|
||||
{"current_steps": 1395, "total_steps": 3108, "loss": 0.2169, "lr": 2.6939988845404994e-05, "epoch": 3.141891891891892, "percentage": 44.88, "elapsed_time": "1:55:42", "remaining_time": "2:22:05"}
|
||||
{"current_steps": 1400, "total_steps": 3108, "loss": 0.2165, "lr": 2.6834538847511988e-05, "epoch": 3.153153153153153, "percentage": 45.05, "elapsed_time": "1:56:08", "remaining_time": "2:21:41"}
|
||||
{"current_steps": 1405, "total_steps": 3108, "loss": 0.204, "lr": 2.6728873292239015e-05, "epoch": 3.1644144144144146, "percentage": 45.21, "elapsed_time": "1:56:29", "remaining_time": "2:21:12"}
|
||||
{"current_steps": 1410, "total_steps": 3108, "loss": 0.2065, "lr": 2.6622995512216218e-05, "epoch": 3.175675675675676, "percentage": 45.37, "elapsed_time": "1:56:53", "remaining_time": "2:20:46"}
|
||||
{"current_steps": 1415, "total_steps": 3108, "loss": 0.2134, "lr": 2.6516908846767208e-05, "epoch": 3.186936936936937, "percentage": 45.53, "elapsed_time": "1:57:17", "remaining_time": "2:20:19"}
|
||||
{"current_steps": 1420, "total_steps": 3108, "loss": 0.2208, "lr": 2.641061664180371e-05, "epoch": 3.1981981981981984, "percentage": 45.69, "elapsed_time": "1:57:42", "remaining_time": "2:19:54"}
|
||||
{"current_steps": 1425, "total_steps": 3108, "loss": 0.2104, "lr": 2.630412224972003e-05, "epoch": 3.2094594594594597, "percentage": 45.85, "elapsed_time": "1:58:05", "remaining_time": "2:19:28"}
|
||||
{"current_steps": 1430, "total_steps": 3108, "loss": 0.2056, "lr": 2.619742902928736e-05, "epoch": 3.220720720720721, "percentage": 46.01, "elapsed_time": "1:58:31", "remaining_time": "2:19:04"}
|
||||
{"current_steps": 1435, "total_steps": 3108, "loss": 0.2137, "lr": 2.609054034554782e-05, "epoch": 3.231981981981982, "percentage": 46.17, "elapsed_time": "1:58:52", "remaining_time": "2:18:35"}
|
||||
{"current_steps": 1440, "total_steps": 3108, "loss": 0.2132, "lr": 2.5983459569708313e-05, "epoch": 3.2432432432432434, "percentage": 46.33, "elapsed_time": "1:59:14", "remaining_time": "2:18:07"}
|
||||
{"current_steps": 1445, "total_steps": 3108, "loss": 0.2089, "lr": 2.5876190079034236e-05, "epoch": 3.2545045045045047, "percentage": 46.49, "elapsed_time": "1:59:38", "remaining_time": "2:17:41"}
|
||||
{"current_steps": 1450, "total_steps": 3108, "loss": 0.2001, "lr": 2.576873525674293e-05, "epoch": 3.265765765765766, "percentage": 46.65, "elapsed_time": "2:00:03", "remaining_time": "2:17:16"}
|
||||
{"current_steps": 1455, "total_steps": 3108, "loss": 0.2009, "lr": 2.5661098491896985e-05, "epoch": 3.277027027027027, "percentage": 46.81, "elapsed_time": "2:00:27", "remaining_time": "2:16:51"}
|
||||
{"current_steps": 1460, "total_steps": 3108, "loss": 0.2054, "lr": 2.5553283179297354e-05, "epoch": 3.2882882882882885, "percentage": 46.98, "elapsed_time": "2:00:50", "remaining_time": "2:16:23"}
|
||||
{"current_steps": 1465, "total_steps": 3108, "loss": 0.215, "lr": 2.5445292719376303e-05, "epoch": 3.2995495495495497, "percentage": 47.14, "elapsed_time": "2:01:17", "remaining_time": "2:16:01"}
|
||||
{"current_steps": 1470, "total_steps": 3108, "loss": 0.2036, "lr": 2.533713051809011e-05, "epoch": 3.310810810810811, "percentage": 47.3, "elapsed_time": "2:01:40", "remaining_time": "2:15:35"}
|
||||
{"current_steps": 1475, "total_steps": 3108, "loss": 0.2078, "lr": 2.5228799986811695e-05, "epoch": 3.3220720720720722, "percentage": 47.46, "elapsed_time": "2:02:06", "remaining_time": "2:15:11"}
|
||||
{"current_steps": 1480, "total_steps": 3108, "loss": 0.1962, "lr": 2.5120304542223006e-05, "epoch": 3.3333333333333335, "percentage": 47.62, "elapsed_time": "2:02:30", "remaining_time": "2:14:45"}
|
||||
{"current_steps": 1485, "total_steps": 3108, "loss": 0.2041, "lr": 2.501164760620726e-05, "epoch": 3.3445945945945947, "percentage": 47.78, "elapsed_time": "2:02:54", "remaining_time": "2:14:19"}
|
||||
{"current_steps": 1490, "total_steps": 3108, "loss": 0.2142, "lr": 2.4902832605741022e-05, "epoch": 3.355855855855856, "percentage": 47.94, "elapsed_time": "2:03:17", "remaining_time": "2:13:52"}
|
||||
{"current_steps": 1495, "total_steps": 3108, "loss": 0.2012, "lr": 2.4793862972786104e-05, "epoch": 3.3671171171171173, "percentage": 48.1, "elapsed_time": "2:03:39", "remaining_time": "2:13:25"}
|
||||
{"current_steps": 1500, "total_steps": 3108, "loss": 0.2118, "lr": 2.4684742144181358e-05, "epoch": 3.3783783783783785, "percentage": 48.26, "elapsed_time": "2:04:05", "remaining_time": "2:13:01"}
|
||||
{"current_steps": 1505, "total_steps": 3108, "loss": 0.2041, "lr": 2.457547356153425e-05, "epoch": 3.3896396396396398, "percentage": 48.42, "elapsed_time": "2:04:40", "remaining_time": "2:12:47"}
|
||||
{"current_steps": 1510, "total_steps": 3108, "loss": 0.2143, "lr": 2.4466060671112315e-05, "epoch": 3.400900900900901, "percentage": 48.58, "elapsed_time": "2:05:03", "remaining_time": "2:12:20"}
|
||||
{"current_steps": 1515, "total_steps": 3108, "loss": 0.1992, "lr": 2.435650692373447e-05, "epoch": 3.4121621621621623, "percentage": 48.75, "elapsed_time": "2:05:27", "remaining_time": "2:11:55"}
|
||||
{"current_steps": 1520, "total_steps": 3108, "loss": 0.1983, "lr": 2.4246815774662206e-05, "epoch": 3.4234234234234235, "percentage": 48.91, "elapsed_time": "2:05:53", "remaining_time": "2:11:31"}
|
||||
{"current_steps": 1525, "total_steps": 3108, "loss": 0.2014, "lr": 2.4136990683490545e-05, "epoch": 3.434684684684685, "percentage": 49.07, "elapsed_time": "2:06:17", "remaining_time": "2:11:05"}
|
||||
{"current_steps": 1530, "total_steps": 3108, "loss": 0.2143, "lr": 2.4027035114038988e-05, "epoch": 3.445945945945946, "percentage": 49.23, "elapsed_time": "2:06:43", "remaining_time": "2:10:41"}
|
||||
{"current_steps": 1535, "total_steps": 3108, "loss": 0.1987, "lr": 2.3916952534242247e-05, "epoch": 3.4572072072072073, "percentage": 49.39, "elapsed_time": "2:07:09", "remaining_time": "2:10:18"}
|
||||
{"current_steps": 1540, "total_steps": 3108, "loss": 0.2, "lr": 2.380674641604085e-05, "epoch": 3.4684684684684686, "percentage": 49.55, "elapsed_time": "2:07:35", "remaining_time": "2:09:54"}
|
||||
{"current_steps": 1545, "total_steps": 3108, "loss": 0.1993, "lr": 2.369642023527168e-05, "epoch": 3.47972972972973, "percentage": 49.71, "elapsed_time": "2:07:59", "remaining_time": "2:09:28"}
|
||||
{"current_steps": 1550, "total_steps": 3108, "loss": 0.209, "lr": 2.3585977471558286e-05, "epoch": 3.490990990990991, "percentage": 49.87, "elapsed_time": "2:08:22", "remaining_time": "2:09:02"}
|
||||
{"current_steps": 1555, "total_steps": 3108, "loss": 0.2033, "lr": 2.3475421608201216e-05, "epoch": 3.5022522522522523, "percentage": 50.03, "elapsed_time": "2:08:46", "remaining_time": "2:08:36"}
|
||||
{"current_steps": 1560, "total_steps": 3108, "loss": 0.1909, "lr": 2.3364756132068083e-05, "epoch": 3.5135135135135136, "percentage": 50.19, "elapsed_time": "2:09:08", "remaining_time": "2:08:08"}
|
||||
{"current_steps": 1565, "total_steps": 3108, "loss": 0.2013, "lr": 2.325398453348365e-05, "epoch": 3.524774774774775, "percentage": 50.35, "elapsed_time": "2:09:33", "remaining_time": "2:07:44"}
|
||||
{"current_steps": 1570, "total_steps": 3108, "loss": 0.212, "lr": 2.3143110306119682e-05, "epoch": 3.536036036036036, "percentage": 50.51, "elapsed_time": "2:09:59", "remaining_time": "2:07:20"}
|
||||
{"current_steps": 1575, "total_steps": 3108, "loss": 0.2062, "lr": 2.3032136946884848e-05, "epoch": 3.5472972972972974, "percentage": 50.68, "elapsed_time": "2:10:25", "remaining_time": "2:06:56"}
|
||||
{"current_steps": 1580, "total_steps": 3108, "loss": 0.2082, "lr": 2.2921067955814332e-05, "epoch": 3.5585585585585586, "percentage": 50.84, "elapsed_time": "2:10:46", "remaining_time": "2:06:28"}
|
||||
{"current_steps": 1585, "total_steps": 3108, "loss": 0.2033, "lr": 2.2809906835959523e-05, "epoch": 3.56981981981982, "percentage": 51.0, "elapsed_time": "2:11:14", "remaining_time": "2:06:05"}
|
||||
{"current_steps": 1590, "total_steps": 3108, "loss": 0.1998, "lr": 2.2698657093277476e-05, "epoch": 3.581081081081081, "percentage": 51.16, "elapsed_time": "2:11:35", "remaining_time": "2:05:37"}
|
||||
{"current_steps": 1595, "total_steps": 3108, "loss": 0.1953, "lr": 2.2587322236520367e-05, "epoch": 3.5923423423423424, "percentage": 51.32, "elapsed_time": "2:12:00", "remaining_time": "2:05:13"}
|
||||
{"current_steps": 1600, "total_steps": 3108, "loss": 0.2235, "lr": 2.247590577712483e-05, "epoch": 3.6036036036036037, "percentage": 51.48, "elapsed_time": "2:12:24", "remaining_time": "2:04:47"}
|
||||
{"current_steps": 1605, "total_steps": 3108, "loss": 0.2031, "lr": 2.2364411229101174e-05, "epoch": 3.614864864864865, "percentage": 51.64, "elapsed_time": "2:12:49", "remaining_time": "2:04:22"}
|
||||
{"current_steps": 1610, "total_steps": 3108, "loss": 0.1937, "lr": 2.225284210892261e-05, "epoch": 3.626126126126126, "percentage": 51.8, "elapsed_time": "2:13:09", "remaining_time": "2:03:53"}
|
||||
{"current_steps": 1615, "total_steps": 3108, "loss": 0.1969, "lr": 2.2141201935414286e-05, "epoch": 3.6373873873873874, "percentage": 51.96, "elapsed_time": "2:13:32", "remaining_time": "2:03:27"}
|
||||
{"current_steps": 1620, "total_steps": 3108, "loss": 0.2023, "lr": 2.2029494229642335e-05, "epoch": 3.6486486486486487, "percentage": 52.12, "elapsed_time": "2:13:57", "remaining_time": "2:03:02"}
|
||||
{"current_steps": 1625, "total_steps": 3108, "loss": 0.1975, "lr": 2.1917722514802833e-05, "epoch": 3.65990990990991, "percentage": 52.28, "elapsed_time": "2:14:20", "remaining_time": "2:02:36"}
|
||||
{"current_steps": 1630, "total_steps": 3108, "loss": 0.2054, "lr": 2.180589031611065e-05, "epoch": 3.671171171171171, "percentage": 52.45, "elapsed_time": "2:14:42", "remaining_time": "2:02:09"}
|
||||
{"current_steps": 1635, "total_steps": 3108, "loss": 0.2206, "lr": 2.1694001160688275e-05, "epoch": 3.6824324324324325, "percentage": 52.61, "elapsed_time": "2:15:05", "remaining_time": "2:01:42"}
|
||||
{"current_steps": 1640, "total_steps": 3108, "loss": 0.2104, "lr": 2.158205857745461e-05, "epoch": 3.6936936936936937, "percentage": 52.77, "elapsed_time": "2:15:31", "remaining_time": "2:01:18"}
|
||||
{"current_steps": 1645, "total_steps": 3108, "loss": 0.2128, "lr": 2.1470066097013607e-05, "epoch": 3.704954954954955, "percentage": 52.93, "elapsed_time": "2:15:53", "remaining_time": "2:00:51"}
|
||||
{"current_steps": 1650, "total_steps": 3108, "loss": 0.2017, "lr": 2.1358027251542966e-05, "epoch": 3.7162162162162162, "percentage": 53.09, "elapsed_time": "2:16:21", "remaining_time": "2:00:29"}
|
||||
{"current_steps": 1655, "total_steps": 3108, "loss": 0.2094, "lr": 2.1245945574682696e-05, "epoch": 3.7274774774774775, "percentage": 53.25, "elapsed_time": "2:16:48", "remaining_time": "2:00:06"}
|
||||
{"current_steps": 1660, "total_steps": 3108, "loss": 0.2072, "lr": 2.1133824601423706e-05, "epoch": 3.7387387387387387, "percentage": 53.41, "elapsed_time": "2:17:11", "remaining_time": "1:59:40"}
|
||||
{"current_steps": 1665, "total_steps": 3108, "loss": 0.2006, "lr": 2.102166786799627e-05, "epoch": 3.75, "percentage": 53.57, "elapsed_time": "2:17:38", "remaining_time": "1:59:17"}
|
||||
{"current_steps": 1670, "total_steps": 3108, "loss": 0.2092, "lr": 2.0909478911758526e-05, "epoch": 3.7612612612612613, "percentage": 53.73, "elapsed_time": "2:18:03", "remaining_time": "1:58:52"}
|
||||
{"current_steps": 1675, "total_steps": 3108, "loss": 0.2042, "lr": 2.0797261271084897e-05, "epoch": 3.7725225225225225, "percentage": 53.89, "elapsed_time": "2:18:27", "remaining_time": "1:58:27"}
|
||||
{"current_steps": 1680, "total_steps": 3108, "loss": 0.2066, "lr": 2.0685018485254505e-05, "epoch": 3.7837837837837838, "percentage": 54.05, "elapsed_time": "2:18:54", "remaining_time": "1:58:04"}
|
||||
{"current_steps": 1685, "total_steps": 3108, "loss": 0.2033, "lr": 2.0572754094339526e-05, "epoch": 3.795045045045045, "percentage": 54.21, "elapsed_time": "2:19:18", "remaining_time": "1:57:38"}
|
||||
{"current_steps": 1690, "total_steps": 3108, "loss": 0.2128, "lr": 2.0460471639093544e-05, "epoch": 3.8063063063063063, "percentage": 54.38, "elapsed_time": "2:19:44", "remaining_time": "1:57:14"}
|
||||
{"current_steps": 1695, "total_steps": 3108, "loss": 0.208, "lr": 2.0348174660839905e-05, "epoch": 3.8175675675675675, "percentage": 54.54, "elapsed_time": "2:20:09", "remaining_time": "1:56:50"}
|
||||
{"current_steps": 1700, "total_steps": 3108, "loss": 0.2101, "lr": 2.0235866701359978e-05, "epoch": 3.828828828828829, "percentage": 54.7, "elapsed_time": "2:20:34", "remaining_time": "1:56:25"}
|
||||
{"current_steps": 1705, "total_steps": 3108, "loss": 0.204, "lr": 2.012355130278148e-05, "epoch": 3.84009009009009, "percentage": 54.86, "elapsed_time": "2:20:55", "remaining_time": "1:55:58"}
|
||||
{"current_steps": 1710, "total_steps": 3108, "loss": 0.2076, "lr": 2.001123200746675e-05, "epoch": 3.8513513513513513, "percentage": 55.02, "elapsed_time": "2:21:23", "remaining_time": "1:55:35"}
|
||||
{"current_steps": 1715, "total_steps": 3108, "loss": 0.2076, "lr": 1.989891235790105e-05, "epoch": 3.8626126126126126, "percentage": 55.18, "elapsed_time": "2:21:48", "remaining_time": "1:55:11"}
|
||||
{"current_steps": 1720, "total_steps": 3108, "loss": 0.1981, "lr": 1.978659589658077e-05, "epoch": 3.873873873873874, "percentage": 55.34, "elapsed_time": "2:22:12", "remaining_time": "1:54:45"}
|
||||
{"current_steps": 1725, "total_steps": 3108, "loss": 0.2043, "lr": 1.9674286165901788e-05, "epoch": 3.885135135135135, "percentage": 55.5, "elapsed_time": "2:22:37", "remaining_time": "1:54:20"}
|
||||
{"current_steps": 1730, "total_steps": 3108, "loss": 0.205, "lr": 1.956198670804769e-05, "epoch": 3.8963963963963963, "percentage": 55.66, "elapsed_time": "2:23:00", "remaining_time": "1:53:54"}
|
||||
{"current_steps": 1735, "total_steps": 3108, "loss": 0.2043, "lr": 1.9449701064878052e-05, "epoch": 3.9076576576576576, "percentage": 55.82, "elapsed_time": "2:23:26", "remaining_time": "1:53:31"}
|
||||
{"current_steps": 1740, "total_steps": 3108, "loss": 0.2093, "lr": 1.9337432777816742e-05, "epoch": 3.918918918918919, "percentage": 55.98, "elapsed_time": "2:23:52", "remaining_time": "1:53:06"}
|
||||
{"current_steps": 1745, "total_steps": 3108, "loss": 0.1991, "lr": 1.9225185387740244e-05, "epoch": 3.93018018018018, "percentage": 56.15, "elapsed_time": "2:24:17", "remaining_time": "1:52:41"}
|
||||
{"current_steps": 1750, "total_steps": 3108, "loss": 0.1967, "lr": 1.9112962434865943e-05, "epoch": 3.9414414414414414, "percentage": 56.31, "elapsed_time": "2:24:43", "remaining_time": "1:52:18"}
|
||||
{"current_steps": 1755, "total_steps": 3108, "loss": 0.2032, "lr": 1.9000767458640513e-05, "epoch": 3.9527027027027026, "percentage": 56.47, "elapsed_time": "2:25:03", "remaining_time": "1:51:50"}
|
||||
{"current_steps": 1760, "total_steps": 3108, "loss": 0.2064, "lr": 1.8888603997628235e-05, "epoch": 3.963963963963964, "percentage": 56.63, "elapsed_time": "2:25:26", "remaining_time": "1:51:24"}
|
||||
{"current_steps": 1765, "total_steps": 3108, "loss": 0.2121, "lr": 1.8776475589399434e-05, "epoch": 3.975225225225225, "percentage": 56.79, "elapsed_time": "2:25:49", "remaining_time": "1:50:57"}
|
||||
{"current_steps": 1770, "total_steps": 3108, "loss": 0.2089, "lr": 1.866438577041888e-05, "epoch": 3.9864864864864864, "percentage": 56.95, "elapsed_time": "2:26:11", "remaining_time": "1:50:30"}
|
||||
{"current_steps": 1775, "total_steps": 3108, "loss": 0.2021, "lr": 1.8552338075934284e-05, "epoch": 3.9977477477477477, "percentage": 57.11, "elapsed_time": "2:26:36", "remaining_time": "1:50:06"}
|
||||
{"current_steps": 1780, "total_steps": 3108, "loss": 0.1918, "lr": 1.8440336039864734e-05, "epoch": 4.009009009009009, "percentage": 57.27, "elapsed_time": "2:27:02", "remaining_time": "1:49:41"}
|
||||
{"current_steps": 1785, "total_steps": 3108, "loss": 0.1934, "lr": 1.8328383194689326e-05, "epoch": 4.02027027027027, "percentage": 57.43, "elapsed_time": "2:27:23", "remaining_time": "1:49:14"}
|
||||
{"current_steps": 1790, "total_steps": 3108, "loss": 0.1928, "lr": 1.8216483071335662e-05, "epoch": 4.031531531531532, "percentage": 57.59, "elapsed_time": "2:27:45", "remaining_time": "1:48:48"}
|
||||
{"current_steps": 1795, "total_steps": 3108, "loss": 0.1915, "lr": 1.810463919906855e-05, "epoch": 4.042792792792793, "percentage": 57.75, "elapsed_time": "2:28:10", "remaining_time": "1:48:22"}
|
||||
{"current_steps": 1800, "total_steps": 3108, "loss": 0.1875, "lr": 1.799285510537868e-05, "epoch": 4.054054054054054, "percentage": 57.92, "elapsed_time": "2:28:33", "remaining_time": "1:47:57"}
|
||||
{"current_steps": 1805, "total_steps": 3108, "loss": 0.1857, "lr": 1.7881134315871332e-05, "epoch": 4.065315315315315, "percentage": 58.08, "elapsed_time": "2:28:59", "remaining_time": "1:47:33"}
|
||||
{"current_steps": 1810, "total_steps": 3108, "loss": 0.1834, "lr": 1.7769480354155228e-05, "epoch": 4.076576576576577, "percentage": 58.24, "elapsed_time": "2:29:25", "remaining_time": "1:47:09"}
|
||||
{"current_steps": 1815, "total_steps": 3108, "loss": 0.1805, "lr": 1.765789674173139e-05, "epoch": 4.087837837837838, "percentage": 58.4, "elapsed_time": "2:29:51", "remaining_time": "1:46:45"}
|
||||
{"current_steps": 1820, "total_steps": 3108, "loss": 0.1909, "lr": 1.7546386997882038e-05, "epoch": 4.099099099099099, "percentage": 58.56, "elapsed_time": "2:30:16", "remaining_time": "1:46:20"}
|
||||
{"current_steps": 1825, "total_steps": 3108, "loss": 0.1893, "lr": 1.743495463955964e-05, "epoch": 4.11036036036036, "percentage": 58.72, "elapsed_time": "2:30:42", "remaining_time": "1:45:56"}
|
||||
{"current_steps": 1830, "total_steps": 3108, "loss": 0.1866, "lr": 1.7323603181275967e-05, "epoch": 4.121621621621622, "percentage": 58.88, "elapsed_time": "2:31:07", "remaining_time": "1:45:32"}
|
||||
{"current_steps": 1835, "total_steps": 3108, "loss": 0.1848, "lr": 1.7212336134991234e-05, "epoch": 4.132882882882883, "percentage": 59.04, "elapsed_time": "2:31:28", "remaining_time": "1:45:05"}
|
||||
{"current_steps": 1840, "total_steps": 3108, "loss": 0.1829, "lr": 1.710115701000337e-05, "epoch": 4.1441441441441444, "percentage": 59.2, "elapsed_time": "2:31:53", "remaining_time": "1:44:40"}
|
||||
{"current_steps": 1845, "total_steps": 3108, "loss": 0.1841, "lr": 1.6990069312837315e-05, "epoch": 4.155405405405405, "percentage": 59.36, "elapsed_time": "2:32:17", "remaining_time": "1:44:14"}
|
||||
{"current_steps": 1850, "total_steps": 3108, "loss": 0.1865, "lr": 1.6879076547134424e-05, "epoch": 4.166666666666667, "percentage": 59.52, "elapsed_time": "2:32:39", "remaining_time": "1:43:48"}
|
||||
{"current_steps": 1855, "total_steps": 3108, "loss": 0.1811, "lr": 1.676818221354196e-05, "epoch": 4.177927927927928, "percentage": 59.68, "elapsed_time": "2:33:03", "remaining_time": "1:43:23"}
|
||||
{"current_steps": 1860, "total_steps": 3108, "loss": 0.1916, "lr": 1.66573898096027e-05, "epoch": 4.1891891891891895, "percentage": 59.85, "elapsed_time": "2:33:27", "remaining_time": "1:42:58"}
|
||||
{"current_steps": 1865, "total_steps": 3108, "loss": 0.1889, "lr": 1.6546702829644636e-05, "epoch": 4.20045045045045, "percentage": 60.01, "elapsed_time": "2:33:48", "remaining_time": "1:42:30"}
|
||||
{"current_steps": 1870, "total_steps": 3108, "loss": 0.1813, "lr": 1.6436124764670732e-05, "epoch": 4.211711711711712, "percentage": 60.17, "elapsed_time": "2:34:10", "remaining_time": "1:42:04"}
|
||||
{"current_steps": 1875, "total_steps": 3108, "loss": 0.1835, "lr": 1.6325659102248833e-05, "epoch": 4.222972972972973, "percentage": 60.33, "elapsed_time": "2:34:37", "remaining_time": "1:41:40"}
|
||||
{"current_steps": 1880, "total_steps": 3108, "loss": 0.1853, "lr": 1.621530932640168e-05, "epoch": 4.2342342342342345, "percentage": 60.49, "elapsed_time": "2:34:58", "remaining_time": "1:41:13"}
|
||||
{"current_steps": 1885, "total_steps": 3108, "loss": 0.1857, "lr": 1.610507891749703e-05, "epoch": 4.245495495495495, "percentage": 60.65, "elapsed_time": "2:35:21", "remaining_time": "1:40:47"}
|
||||
{"current_steps": 1890, "total_steps": 3108, "loss": 0.1889, "lr": 1.5994971352137857e-05, "epoch": 4.256756756756757, "percentage": 60.81, "elapsed_time": "2:35:47", "remaining_time": "1:40:24"}
|
||||
{"current_steps": 1895, "total_steps": 3108, "loss": 0.1887, "lr": 1.5884990103052726e-05, "epoch": 4.268018018018018, "percentage": 60.97, "elapsed_time": "2:36:09", "remaining_time": "1:39:57"}
|
||||
{"current_steps": 1900, "total_steps": 3108, "loss": 0.1829, "lr": 1.5775138638986268e-05, "epoch": 4.2792792792792795, "percentage": 61.13, "elapsed_time": "2:36:32", "remaining_time": "1:39:31"}
|
||||
{"current_steps": 1905, "total_steps": 3108, "loss": 0.1906, "lr": 1.566542042458976e-05, "epoch": 4.29054054054054, "percentage": 61.29, "elapsed_time": "2:36:57", "remaining_time": "1:39:07"}
|
||||
{"current_steps": 1910, "total_steps": 3108, "loss": 0.1909, "lr": 1.555583892031188e-05, "epoch": 4.301801801801802, "percentage": 61.45, "elapsed_time": "2:37:22", "remaining_time": "1:38:42"}
|
||||
{"current_steps": 1915, "total_steps": 3108, "loss": 0.1829, "lr": 1.5446397582289517e-05, "epoch": 4.313063063063063, "percentage": 61.62, "elapsed_time": "2:37:46", "remaining_time": "1:38:17"}
|
||||
{"current_steps": 1920, "total_steps": 3108, "loss": 0.188, "lr": 1.533709986223882e-05, "epoch": 4.324324324324325, "percentage": 61.78, "elapsed_time": "2:38:10", "remaining_time": "1:37:52"}
|
||||
{"current_steps": 1925, "total_steps": 3108, "loss": 0.2012, "lr": 1.5227949207346324e-05, "epoch": 4.335585585585585, "percentage": 61.94, "elapsed_time": "2:38:37", "remaining_time": "1:37:28"}
|
||||
{"current_steps": 1930, "total_steps": 3108, "loss": 0.1908, "lr": 1.5118949060160177e-05, "epoch": 4.346846846846847, "percentage": 62.1, "elapsed_time": "2:39:01", "remaining_time": "1:37:03"}
|
||||
{"current_steps": 1935, "total_steps": 3108, "loss": 0.2009, "lr": 1.5010102858481629e-05, "epoch": 4.358108108108108, "percentage": 62.26, "elapsed_time": "2:39:28", "remaining_time": "1:36:40"}
|
||||
{"current_steps": 1940, "total_steps": 3108, "loss": 0.1925, "lr": 1.4901414035256573e-05, "epoch": 4.36936936936937, "percentage": 62.42, "elapsed_time": "2:39:53", "remaining_time": "1:36:15"}
|
||||
{"current_steps": 1945, "total_steps": 3108, "loss": 0.1893, "lr": 1.4792886018467277e-05, "epoch": 4.38063063063063, "percentage": 62.58, "elapsed_time": "2:40:16", "remaining_time": "1:35:50"}
|
||||
{"current_steps": 1950, "total_steps": 3108, "loss": 0.1837, "lr": 1.4684522231024254e-05, "epoch": 4.391891891891892, "percentage": 62.74, "elapsed_time": "2:40:40", "remaining_time": "1:35:25"}
|
||||
{"current_steps": 1955, "total_steps": 3108, "loss": 0.1844, "lr": 1.4576326090658345e-05, "epoch": 4.403153153153153, "percentage": 62.9, "elapsed_time": "2:41:04", "remaining_time": "1:34:59"}
|
||||
{"current_steps": 1960, "total_steps": 3108, "loss": 0.1882, "lr": 1.4468301009812878e-05, "epoch": 4.414414414414415, "percentage": 63.06, "elapsed_time": "2:41:28", "remaining_time": "1:34:34"}
|
||||
{"current_steps": 1965, "total_steps": 3108, "loss": 0.2042, "lr": 1.4360450395536072e-05, "epoch": 4.425675675675675, "percentage": 63.22, "elapsed_time": "2:41:55", "remaining_time": "1:34:11"}
|
||||
{"current_steps": 1970, "total_steps": 3108, "loss": 0.1912, "lr": 1.4252777649373571e-05, "epoch": 4.436936936936937, "percentage": 63.38, "elapsed_time": "2:42:21", "remaining_time": "1:33:47"}
|
||||
{"current_steps": 1975, "total_steps": 3108, "loss": 0.1883, "lr": 1.4145286167261158e-05, "epoch": 4.448198198198198, "percentage": 63.55, "elapsed_time": "2:42:44", "remaining_time": "1:33:21"}
|
||||
{"current_steps": 1980, "total_steps": 3108, "loss": 0.1934, "lr": 1.4037979339417667e-05, "epoch": 4.45945945945946, "percentage": 63.71, "elapsed_time": "2:43:13", "remaining_time": "1:32:59"}
|
||||
{"current_steps": 1985, "total_steps": 3108, "loss": 0.1875, "lr": 1.3930860550238016e-05, "epoch": 4.4707207207207205, "percentage": 63.87, "elapsed_time": "2:43:34", "remaining_time": "1:32:32"}
|
||||
{"current_steps": 1990, "total_steps": 3108, "loss": 0.1832, "lr": 1.3823933178186523e-05, "epoch": 4.481981981981982, "percentage": 64.03, "elapsed_time": "2:43:58", "remaining_time": "1:32:07"}
|
||||
{"current_steps": 1995, "total_steps": 3108, "loss": 0.1881, "lr": 1.3717200595690314e-05, "epoch": 4.493243243243243, "percentage": 64.19, "elapsed_time": "2:44:24", "remaining_time": "1:31:43"}
|
||||
{"current_steps": 2000, "total_steps": 3108, "loss": 0.1915, "lr": 1.3610666169032945e-05, "epoch": 4.504504504504505, "percentage": 64.35, "elapsed_time": "2:44:55", "remaining_time": "1:31:21"}
|
||||
{"current_steps": 2005, "total_steps": 3108, "loss": 0.1905, "lr": 1.3504333258248274e-05, "epoch": 4.5157657657657655, "percentage": 64.51, "elapsed_time": "2:45:17", "remaining_time": "1:30:55"}
|
||||
{"current_steps": 2010, "total_steps": 3108, "loss": 0.1847, "lr": 1.3398205217014471e-05, "epoch": 4.527027027027027, "percentage": 64.67, "elapsed_time": "2:45:42", "remaining_time": "1:30:31"}
|
||||
{"current_steps": 2015, "total_steps": 3108, "loss": 0.1907, "lr": 1.3292285392548206e-05, "epoch": 4.538288288288288, "percentage": 64.83, "elapsed_time": "2:46:08", "remaining_time": "1:30:07"}
|
||||
{"current_steps": 2020, "total_steps": 3108, "loss": 0.1891, "lr": 1.3186577125499145e-05, "epoch": 4.54954954954955, "percentage": 64.99, "elapsed_time": "2:46:33", "remaining_time": "1:29:42"}
|
||||
{"current_steps": 2025, "total_steps": 3108, "loss": 0.1932, "lr": 1.3081083749844554e-05, "epoch": 4.5608108108108105, "percentage": 65.15, "elapsed_time": "2:46:56", "remaining_time": "1:29:16"}
|
||||
{"current_steps": 2030, "total_steps": 3108, "loss": 0.1922, "lr": 1.297580859278413e-05, "epoch": 4.572072072072072, "percentage": 65.32, "elapsed_time": "2:47:20", "remaining_time": "1:28:51"}
|
||||
{"current_steps": 2035, "total_steps": 3108, "loss": 0.1955, "lr": 1.2870754974635094e-05, "epoch": 4.583333333333333, "percentage": 65.48, "elapsed_time": "2:47:46", "remaining_time": "1:28:27"}
|
||||
{"current_steps": 2040, "total_steps": 3108, "loss": 0.1871, "lr": 1.2765926208727472e-05, "epoch": 4.594594594594595, "percentage": 65.64, "elapsed_time": "2:48:09", "remaining_time": "1:28:01"}
|
||||
{"current_steps": 2045, "total_steps": 3108, "loss": 0.185, "lr": 1.266132560129955e-05, "epoch": 4.6058558558558556, "percentage": 65.8, "elapsed_time": "2:48:32", "remaining_time": "1:27:36"}
|
||||
{"current_steps": 2050, "total_steps": 3108, "loss": 0.1918, "lr": 1.2556956451393658e-05, "epoch": 4.617117117117117, "percentage": 65.96, "elapsed_time": "2:48:54", "remaining_time": "1:27:10"}
|
||||
{"current_steps": 2055, "total_steps": 3108, "loss": 0.1952, "lr": 1.2452822050752067e-05, "epoch": 4.628378378378378, "percentage": 66.12, "elapsed_time": "2:49:19", "remaining_time": "1:26:45"}
|
||||
{"current_steps": 2060, "total_steps": 3108, "loss": 0.1854, "lr": 1.2348925683713218e-05, "epoch": 4.63963963963964, "percentage": 66.28, "elapsed_time": "2:49:42", "remaining_time": "1:26:20"}
|
||||
{"current_steps": 2065, "total_steps": 3108, "loss": 0.1887, "lr": 1.2245270627108087e-05, "epoch": 4.650900900900901, "percentage": 66.44, "elapsed_time": "2:50:05", "remaining_time": "1:25:54"}
|
||||
{"current_steps": 2070, "total_steps": 3108, "loss": 0.1851, "lr": 1.2141860150156868e-05, "epoch": 4.662162162162162, "percentage": 66.6, "elapsed_time": "2:50:29", "remaining_time": "1:25:29"}
|
||||
{"current_steps": 2075, "total_steps": 3108, "loss": 0.1919, "lr": 1.2038697514365855e-05, "epoch": 4.673423423423423, "percentage": 66.76, "elapsed_time": "2:50:54", "remaining_time": "1:25:05"}
|
||||
{"current_steps": 2080, "total_steps": 3108, "loss": 0.1885, "lr": 1.1935785973424592e-05, "epoch": 4.684684684684685, "percentage": 66.92, "elapsed_time": "2:51:14", "remaining_time": "1:24:38"}
|
||||
{"current_steps": 2085, "total_steps": 3108, "loss": 0.1939, "lr": 1.18331287731032e-05, "epoch": 4.695945945945946, "percentage": 67.08, "elapsed_time": "2:51:40", "remaining_time": "1:24:13"}
|
||||
{"current_steps": 2090, "total_steps": 3108, "loss": 0.1839, "lr": 1.173072915115008e-05, "epoch": 4.707207207207207, "percentage": 67.25, "elapsed_time": "2:52:04", "remaining_time": "1:23:48"}
|
||||
{"current_steps": 2095, "total_steps": 3108, "loss": 0.1935, "lr": 1.1628590337189754e-05, "epoch": 4.718468468468468, "percentage": 67.41, "elapsed_time": "2:52:28", "remaining_time": "1:23:24"}
|
||||
{"current_steps": 2100, "total_steps": 3108, "loss": 0.1788, "lr": 1.1526715552620987e-05, "epoch": 4.72972972972973, "percentage": 67.57, "elapsed_time": "2:52:55", "remaining_time": "1:23:00"}
|
||||
{"current_steps": 2105, "total_steps": 3108, "loss": 0.1927, "lr": 1.1425108010515249e-05, "epoch": 4.740990990990991, "percentage": 67.73, "elapsed_time": "2:53:18", "remaining_time": "1:22:34"}
|
||||
{"current_steps": 2110, "total_steps": 3108, "loss": 0.1894, "lr": 1.13237709155153e-05, "epoch": 4.752252252252252, "percentage": 67.89, "elapsed_time": "2:53:41", "remaining_time": "1:22:09"}
|
||||
{"current_steps": 2115, "total_steps": 3108, "loss": 0.1968, "lr": 1.1222707463734182e-05, "epoch": 4.763513513513513, "percentage": 68.05, "elapsed_time": "2:54:07", "remaining_time": "1:21:45"}
|
||||
{"current_steps": 2120, "total_steps": 3108, "loss": 0.1967, "lr": 1.112192084265439e-05, "epoch": 4.774774774774775, "percentage": 68.21, "elapsed_time": "2:54:32", "remaining_time": "1:21:20"}
|
||||
{"current_steps": 2125, "total_steps": 3108, "loss": 0.1884, "lr": 1.1021414231027316e-05, "epoch": 4.786036036036036, "percentage": 68.37, "elapsed_time": "2:54:57", "remaining_time": "1:20:56"}
|
||||
{"current_steps": 2130, "total_steps": 3108, "loss": 0.1919, "lr": 1.092119079877304e-05, "epoch": 4.797297297297297, "percentage": 68.53, "elapsed_time": "2:55:21", "remaining_time": "1:20:30"}
|
||||
{"current_steps": 2135, "total_steps": 3108, "loss": 0.1852, "lr": 1.0821253706880324e-05, "epoch": 4.808558558558558, "percentage": 68.69, "elapsed_time": "2:55:44", "remaining_time": "1:20:05"}
|
||||
{"current_steps": 2140, "total_steps": 3108, "loss": 0.1909, "lr": 1.0721606107306908e-05, "epoch": 4.81981981981982, "percentage": 68.85, "elapsed_time": "2:56:07", "remaining_time": "1:19:40"}
|
||||
{"current_steps": 2145, "total_steps": 3108, "loss": 0.1797, "lr": 1.0622251142880127e-05, "epoch": 4.831081081081081, "percentage": 69.02, "elapsed_time": "2:56:31", "remaining_time": "1:19:14"}
|
||||
{"current_steps": 2150, "total_steps": 3108, "loss": 0.1891, "lr": 1.052319194719778e-05, "epoch": 4.842342342342342, "percentage": 69.18, "elapsed_time": "2:56:59", "remaining_time": "1:18:51"}
|
||||
{"current_steps": 2155, "total_steps": 3108, "loss": 0.1823, "lr": 1.042443164452926e-05, "epoch": 4.853603603603604, "percentage": 69.34, "elapsed_time": "2:57:21", "remaining_time": "1:18:25"}
|
||||
{"current_steps": 2160, "total_steps": 3108, "loss": 0.178, "lr": 1.0325973349717087e-05, "epoch": 4.864864864864865, "percentage": 69.5, "elapsed_time": "2:57:44", "remaining_time": "1:18:00"}
|
||||
{"current_steps": 2165, "total_steps": 3108, "loss": 0.1913, "lr": 1.022782016807861e-05, "epoch": 4.876126126126126, "percentage": 69.66, "elapsed_time": "2:58:04", "remaining_time": "1:17:33"}
|
||||
{"current_steps": 2170, "total_steps": 3108, "loss": 0.1859, "lr": 1.0129975195308077e-05, "epoch": 4.887387387387387, "percentage": 69.82, "elapsed_time": "2:58:28", "remaining_time": "1:17:08"}
|
||||
{"current_steps": 2175, "total_steps": 3108, "loss": 0.1832, "lr": 1.0032441517379037e-05, "epoch": 4.898648648648649, "percentage": 69.98, "elapsed_time": "2:58:54", "remaining_time": "1:16:44"}
|
||||
{"current_steps": 2180, "total_steps": 3108, "loss": 0.1918, "lr": 9.935222210446938e-06, "epoch": 4.90990990990991, "percentage": 70.14, "elapsed_time": "2:59:19", "remaining_time": "1:16:20"}
|
||||
{"current_steps": 2185, "total_steps": 3108, "loss": 0.1779, "lr": 9.838320340752185e-06, "epoch": 4.921171171171171, "percentage": 70.3, "elapsed_time": "2:59:43", "remaining_time": "1:15:55"}
|
||||
{"current_steps": 2190, "total_steps": 3108, "loss": 0.1885, "lr": 9.741738964523396e-06, "epoch": 4.9324324324324325, "percentage": 70.46, "elapsed_time": "3:00:10", "remaining_time": "1:15:31"}
|
||||
{"current_steps": 2195, "total_steps": 3108, "loss": 0.1903, "lr": 9.64548112788098e-06, "epoch": 4.943693693693694, "percentage": 70.62, "elapsed_time": "3:00:34", "remaining_time": "1:15:06"}
|
||||
{"current_steps": 2200, "total_steps": 3108, "loss": 0.1899, "lr": 9.54954986674113e-06, "epoch": 4.954954954954955, "percentage": 70.79, "elapsed_time": "3:00:57", "remaining_time": "1:14:41"}
|
||||
{"current_steps": 2205, "total_steps": 3108, "loss": 0.1872, "lr": 9.453948206720023e-06, "epoch": 4.966216216216216, "percentage": 70.95, "elapsed_time": "3:01:20", "remaining_time": "1:14:15"}
|
||||
{"current_steps": 2210, "total_steps": 3108, "loss": 0.2004, "lr": 9.358679163038413e-06, "epoch": 4.9774774774774775, "percentage": 71.11, "elapsed_time": "3:01:44", "remaining_time": "1:13:50"}
|
||||
{"current_steps": 2215, "total_steps": 3108, "loss": 0.1841, "lr": 9.263745740426514e-06, "epoch": 4.988738738738739, "percentage": 71.27, "elapsed_time": "3:02:06", "remaining_time": "1:13:25"}
|
||||
{"current_steps": 2220, "total_steps": 3108, "loss": 0.1846, "lr": 9.169150933029267e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "3:02:33", "remaining_time": "1:13:01"}
|
||||
{"current_steps": 2225, "total_steps": 3108, "loss": 0.1785, "lr": 9.074897724311872e-06, "epoch": 5.011261261261262, "percentage": 71.59, "elapsed_time": "3:03:01", "remaining_time": "1:12:37"}
|
||||
{"current_steps": 2230, "total_steps": 3108, "loss": 0.1804, "lr": 8.980989086965719e-06, "epoch": 5.0225225225225225, "percentage": 71.75, "elapsed_time": "3:03:25", "remaining_time": "1:12:13"}
|
||||
{"current_steps": 2235, "total_steps": 3108, "loss": 0.1793, "lr": 8.887427982814593e-06, "epoch": 5.033783783783784, "percentage": 71.91, "elapsed_time": "3:03:50", "remaining_time": "1:11:48"}
|
||||
{"current_steps": 2240, "total_steps": 3108, "loss": 0.1837, "lr": 8.794217362721308e-06, "epoch": 5.045045045045045, "percentage": 72.07, "elapsed_time": "3:04:15", "remaining_time": "1:11:23"}
|
||||
{"current_steps": 2245, "total_steps": 3108, "loss": 0.1762, "lr": 8.701360166494614e-06, "epoch": 5.056306306306307, "percentage": 72.23, "elapsed_time": "3:04:36", "remaining_time": "1:10:58"}
|
||||
{"current_steps": 2250, "total_steps": 3108, "loss": 0.1776, "lr": 8.608859322796454e-06, "epoch": 5.0675675675675675, "percentage": 72.39, "elapsed_time": "3:05:01", "remaining_time": "1:10:33"}
|
||||
{"current_steps": 2255, "total_steps": 3108, "loss": 0.176, "lr": 8.516717749049644e-06, "epoch": 5.078828828828829, "percentage": 72.55, "elapsed_time": "3:05:25", "remaining_time": "1:10:08"}
|
||||
{"current_steps": 2260, "total_steps": 3108, "loss": 0.1834, "lr": 8.424938351345831e-06, "epoch": 5.09009009009009, "percentage": 72.72, "elapsed_time": "3:05:51", "remaining_time": "1:09:44"}
|
||||
{"current_steps": 2265, "total_steps": 3108, "loss": 0.1705, "lr": 8.333524024353823e-06, "epoch": 5.101351351351352, "percentage": 72.88, "elapsed_time": "3:06:14", "remaining_time": "1:09:19"}
|
||||
{"current_steps": 2270, "total_steps": 3108, "loss": 0.1664, "lr": 8.242477651228332e-06, "epoch": 5.112612612612613, "percentage": 73.04, "elapsed_time": "3:06:43", "remaining_time": "1:08:55"}
|
||||
{"current_steps": 2275, "total_steps": 3108, "loss": 0.1702, "lr": 8.151802103519011e-06, "epoch": 5.123873873873874, "percentage": 73.2, "elapsed_time": "3:07:05", "remaining_time": "1:08:30"}
|
||||
{"current_steps": 2280, "total_steps": 3108, "loss": 0.1778, "lr": 8.061500241079882e-06, "epoch": 5.135135135135135, "percentage": 73.36, "elapsed_time": "3:07:28", "remaining_time": "1:08:05"}
|
||||
{"current_steps": 2285, "total_steps": 3108, "loss": 0.1708, "lr": 7.971574911979174e-06, "epoch": 5.146396396396397, "percentage": 73.52, "elapsed_time": "3:07:53", "remaining_time": "1:07:40"}
|
||||
{"current_steps": 2290, "total_steps": 3108, "loss": 0.1809, "lr": 7.882028952409463e-06, "epoch": 5.157657657657658, "percentage": 73.68, "elapsed_time": "3:08:14", "remaining_time": "1:07:14"}
|
||||
{"current_steps": 2295, "total_steps": 3108, "loss": 0.1691, "lr": 7.79286518659822e-06, "epoch": 5.168918918918919, "percentage": 73.84, "elapsed_time": "3:08:35", "remaining_time": "1:06:48"}
|
||||
{"current_steps": 2300, "total_steps": 3108, "loss": 0.1752, "lr": 7.70408642671877e-06, "epoch": 5.18018018018018, "percentage": 74.0, "elapsed_time": "3:08:59", "remaining_time": "1:06:23"}
|
||||
{"current_steps": 2305, "total_steps": 3108, "loss": 0.1809, "lr": 7.615695472801545e-06, "epoch": 5.191441441441442, "percentage": 74.16, "elapsed_time": "3:09:23", "remaining_time": "1:05:58"}
|
||||
{"current_steps": 2310, "total_steps": 3108, "loss": 0.1788, "lr": 7.52769511264583e-06, "epoch": 5.202702702702703, "percentage": 74.32, "elapsed_time": "3:09:46", "remaining_time": "1:05:33"}
|
||||
{"current_steps": 2315, "total_steps": 3108, "loss": 0.1804, "lr": 7.440088121731803e-06, "epoch": 5.213963963963964, "percentage": 74.49, "elapsed_time": "3:10:12", "remaining_time": "1:05:09"}
|
||||
{"current_steps": 2320, "total_steps": 3108, "loss": 0.1754, "lr": 7.3528772631329915e-06, "epoch": 5.225225225225225, "percentage": 74.65, "elapsed_time": "3:10:36", "remaining_time": "1:04:44"}
|
||||
{"current_steps": 2325, "total_steps": 3108, "loss": 0.1641, "lr": 7.2660652874291515e-06, "epoch": 5.236486486486487, "percentage": 74.81, "elapsed_time": "3:10:58", "remaining_time": "1:04:18"}
|
||||
{"current_steps": 2330, "total_steps": 3108, "loss": 0.1783, "lr": 7.179654932619513e-06, "epoch": 5.247747747747748, "percentage": 74.97, "elapsed_time": "3:11:24", "remaining_time": "1:03:54"}
|
||||
{"current_steps": 2335, "total_steps": 3108, "loss": 0.1766, "lr": 7.093648924036394e-06, "epoch": 5.259009009009009, "percentage": 75.13, "elapsed_time": "3:11:47", "remaining_time": "1:03:29"}
|
||||
{"current_steps": 2340, "total_steps": 3108, "loss": 0.1732, "lr": 7.0080499742592834e-06, "epoch": 5.27027027027027, "percentage": 75.29, "elapsed_time": "3:12:12", "remaining_time": "1:03:05"}
|
||||
{"current_steps": 2345, "total_steps": 3108, "loss": 0.1682, "lr": 6.922860783029279e-06, "epoch": 5.281531531531532, "percentage": 75.45, "elapsed_time": "3:12:34", "remaining_time": "1:02:39"}
|
||||
{"current_steps": 2350, "total_steps": 3108, "loss": 0.1854, "lr": 6.838084037163908e-06, "epoch": 5.292792792792793, "percentage": 75.61, "elapsed_time": "3:13:00", "remaining_time": "1:02:15"}
|
||||
{"current_steps": 2355, "total_steps": 3108, "loss": 0.1675, "lr": 6.75372241047243e-06, "epoch": 5.304054054054054, "percentage": 75.77, "elapsed_time": "3:13:22", "remaining_time": "1:01:49"}
|
||||
{"current_steps": 2360, "total_steps": 3108, "loss": 0.1726, "lr": 6.66977856367149e-06, "epoch": 5.315315315315315, "percentage": 75.93, "elapsed_time": "3:13:49", "remaining_time": "1:01:25"}
|
||||
{"current_steps": 2365, "total_steps": 3108, "loss": 0.18, "lr": 6.586255144301174e-06, "epoch": 5.326576576576577, "percentage": 76.09, "elapsed_time": "3:14:12", "remaining_time": "1:01:00"}
|
||||
{"current_steps": 2370, "total_steps": 3108, "loss": 0.1745, "lr": 6.503154786641568e-06, "epoch": 5.337837837837838, "percentage": 76.25, "elapsed_time": "3:14:36", "remaining_time": "1:00:35"}
|
||||
{"current_steps": 2375, "total_steps": 3108, "loss": 0.175, "lr": 6.4204801116296014e-06, "epoch": 5.349099099099099, "percentage": 76.42, "elapsed_time": "3:15:00", "remaining_time": "1:00:11"}
|
||||
{"current_steps": 2380, "total_steps": 3108, "loss": 0.1806, "lr": 6.338233726776439e-06, "epoch": 5.36036036036036, "percentage": 76.58, "elapsed_time": "3:15:24", "remaining_time": "0:59:46"}
|
||||
{"current_steps": 2385, "total_steps": 3108, "loss": 0.1683, "lr": 6.2564182260852325e-06, "epoch": 5.371621621621622, "percentage": 76.74, "elapsed_time": "3:15:48", "remaining_time": "0:59:21"}
|
||||
{"current_steps": 2390, "total_steps": 3108, "loss": 0.1782, "lr": 6.1750361899692726e-06, "epoch": 5.382882882882883, "percentage": 76.9, "elapsed_time": "3:16:15", "remaining_time": "0:58:57"}
|
||||
{"current_steps": 2395, "total_steps": 3108, "loss": 0.1805, "lr": 6.094090185170647e-06, "epoch": 5.3941441441441444, "percentage": 77.06, "elapsed_time": "3:16:38", "remaining_time": "0:58:32"}
|
||||
{"current_steps": 2400, "total_steps": 3108, "loss": 0.181, "lr": 6.013582764679273e-06, "epoch": 5.405405405405405, "percentage": 77.22, "elapsed_time": "3:17:03", "remaining_time": "0:58:07"}
|
||||
{"current_steps": 2405, "total_steps": 3108, "loss": 0.1768, "lr": 5.933516467652354e-06, "epoch": 5.416666666666667, "percentage": 77.38, "elapsed_time": "3:17:27", "remaining_time": "0:57:43"}
|
||||
{"current_steps": 2410, "total_steps": 3108, "loss": 0.1808, "lr": 5.853893819334331e-06, "epoch": 5.427927927927928, "percentage": 77.54, "elapsed_time": "3:17:51", "remaining_time": "0:57:18"}
|
||||
{"current_steps": 2415, "total_steps": 3108, "loss": 0.178, "lr": 5.774717330977224e-06, "epoch": 5.4391891891891895, "percentage": 77.7, "elapsed_time": "3:18:15", "remaining_time": "0:56:53"}
|
||||
{"current_steps": 2420, "total_steps": 3108, "loss": 0.1765, "lr": 5.695989499761427e-06, "epoch": 5.45045045045045, "percentage": 77.86, "elapsed_time": "3:18:37", "remaining_time": "0:56:28"}
|
||||
{"current_steps": 2425, "total_steps": 3108, "loss": 0.1721, "lr": 5.617712808716933e-06, "epoch": 5.461711711711712, "percentage": 78.02, "elapsed_time": "3:19:03", "remaining_time": "0:56:03"}
|
||||
{"current_steps": 2430, "total_steps": 3108, "loss": 0.1758, "lr": 5.539889726645056e-06, "epoch": 5.472972972972973, "percentage": 78.19, "elapsed_time": "3:19:28", "remaining_time": "0:55:39"}
|
||||
{"current_steps": 2435, "total_steps": 3108, "loss": 0.1616, "lr": 5.462522708040534e-06, "epoch": 5.4842342342342345, "percentage": 78.35, "elapsed_time": "3:19:51", "remaining_time": "0:55:14"}
|
||||
{"current_steps": 2440, "total_steps": 3108, "loss": 0.1856, "lr": 5.385614193014146e-06, "epoch": 5.495495495495495, "percentage": 78.51, "elapsed_time": "3:20:15", "remaining_time": "0:54:49"}
|
||||
{"current_steps": 2445, "total_steps": 3108, "loss": 0.1739, "lr": 5.309166607215706e-06, "epoch": 5.506756756756757, "percentage": 78.67, "elapsed_time": "3:20:38", "remaining_time": "0:54:24"}
|
||||
{"current_steps": 2450, "total_steps": 3108, "loss": 0.1803, "lr": 5.233182361757612e-06, "epoch": 5.518018018018018, "percentage": 78.83, "elapsed_time": "3:21:01", "remaining_time": "0:53:59"}
|
||||
{"current_steps": 2455, "total_steps": 3108, "loss": 0.1717, "lr": 5.157663853138777e-06, "epoch": 5.5292792792792795, "percentage": 78.99, "elapsed_time": "3:21:27", "remaining_time": "0:53:34"}
|
||||
{"current_steps": 2460, "total_steps": 3108, "loss": 0.1717, "lr": 5.082613463169024e-06, "epoch": 5.54054054054054, "percentage": 79.15, "elapsed_time": "3:21:49", "remaining_time": "0:53:09"}
|
||||
{"current_steps": 2465, "total_steps": 3108, "loss": 0.1795, "lr": 5.008033558894005e-06, "epoch": 5.551801801801802, "percentage": 79.31, "elapsed_time": "3:22:14", "remaining_time": "0:52:45"}
|
||||
{"current_steps": 2470, "total_steps": 3108, "loss": 0.1775, "lr": 4.933926492520533e-06, "epoch": 5.563063063063063, "percentage": 79.47, "elapsed_time": "3:22:38", "remaining_time": "0:52:20"}
|
||||
{"current_steps": 2475, "total_steps": 3108, "loss": 0.1748, "lr": 4.860294601342359e-06, "epoch": 5.574324324324325, "percentage": 79.63, "elapsed_time": "3:23:00", "remaining_time": "0:51:55"}
|
||||
{"current_steps": 2480, "total_steps": 3108, "loss": 0.1779, "lr": 4.787140207666512e-06, "epoch": 5.585585585585585, "percentage": 79.79, "elapsed_time": "3:23:25", "remaining_time": "0:51:30"}
|
||||
{"current_steps": 2485, "total_steps": 3108, "loss": 0.1756, "lr": 4.714465618740025e-06, "epoch": 5.596846846846847, "percentage": 79.95, "elapsed_time": "3:23:50", "remaining_time": "0:51:06"}
|
||||
{"current_steps": 2490, "total_steps": 3108, "loss": 0.1769, "lr": 4.642273126677148e-06, "epoch": 5.608108108108108, "percentage": 80.12, "elapsed_time": "3:24:14", "remaining_time": "0:50:41"}
|
||||
{"current_steps": 2495, "total_steps": 3108, "loss": 0.1764, "lr": 4.570565008387104e-06, "epoch": 5.61936936936937, "percentage": 80.28, "elapsed_time": "3:24:37", "remaining_time": "0:50:16"}
|
||||
{"current_steps": 2500, "total_steps": 3108, "loss": 0.1826, "lr": 4.499343525502224e-06, "epoch": 5.63063063063063, "percentage": 80.44, "elapsed_time": "3:25:01", "remaining_time": "0:49:51"}
|
||||
{"current_steps": 2505, "total_steps": 3108, "loss": 0.178, "lr": 4.428610924306658e-06, "epoch": 5.641891891891892, "percentage": 80.6, "elapsed_time": "3:25:26", "remaining_time": "0:49:27"}
|
||||
{"current_steps": 2510, "total_steps": 3108, "loss": 0.1757, "lr": 4.3583694356655085e-06, "epoch": 5.653153153153153, "percentage": 80.76, "elapsed_time": "3:25:50", "remaining_time": "0:49:02"}
|
||||
{"current_steps": 2515, "total_steps": 3108, "loss": 0.1649, "lr": 4.288621274954463e-06, "epoch": 5.664414414414415, "percentage": 80.92, "elapsed_time": "3:26:14", "remaining_time": "0:48:37"}
|
||||
{"current_steps": 2520, "total_steps": 3108, "loss": 0.1793, "lr": 4.219368641989947e-06, "epoch": 5.675675675675675, "percentage": 81.08, "elapsed_time": "3:26:43", "remaining_time": "0:48:14"}
|
||||
{"current_steps": 2525, "total_steps": 3108, "loss": 0.179, "lr": 4.150613720959731e-06, "epoch": 5.686936936936937, "percentage": 81.24, "elapsed_time": "3:27:08", "remaining_time": "0:47:49"}
|
||||
{"current_steps": 2530, "total_steps": 3108, "loss": 0.1788, "lr": 4.082358680354019e-06, "epoch": 5.698198198198198, "percentage": 81.4, "elapsed_time": "3:27:39", "remaining_time": "0:47:26"}
|
||||
{"current_steps": 2535, "total_steps": 3108, "loss": 0.1773, "lr": 4.014605672897101e-06, "epoch": 5.70945945945946, "percentage": 81.56, "elapsed_time": "3:28:01", "remaining_time": "0:47:01"}
|
||||
{"current_steps": 2540, "total_steps": 3108, "loss": 0.1747, "lr": 3.947356835479426e-06, "epoch": 5.7207207207207205, "percentage": 81.72, "elapsed_time": "3:28:27", "remaining_time": "0:46:36"}
|
||||
{"current_steps": 2545, "total_steps": 3108, "loss": 0.1738, "lr": 3.880614289090199e-06, "epoch": 5.731981981981982, "percentage": 81.89, "elapsed_time": "3:28:48", "remaining_time": "0:46:11"}
|
||||
{"current_steps": 2550, "total_steps": 3108, "loss": 0.1778, "lr": 3.814380138750522e-06, "epoch": 5.743243243243243, "percentage": 82.05, "elapsed_time": "3:29:11", "remaining_time": "0:45:46"}
|
||||
{"current_steps": 2555, "total_steps": 3108, "loss": 0.1628, "lr": 3.7486564734469744e-06, "epoch": 5.754504504504505, "percentage": 82.21, "elapsed_time": "3:29:35", "remaining_time": "0:45:21"}
|
||||
{"current_steps": 2560, "total_steps": 3108, "loss": 0.1712, "lr": 3.683445366065723e-06, "epoch": 5.7657657657657655, "percentage": 82.37, "elapsed_time": "3:29:57", "remaining_time": "0:44:56"}
|
||||
{"current_steps": 2565, "total_steps": 3108, "loss": 0.1836, "lr": 3.6187488733271757e-06, "epoch": 5.777027027027027, "percentage": 82.53, "elapsed_time": "3:30:19", "remaining_time": "0:44:31"}
|
||||
{"current_steps": 2570, "total_steps": 3108, "loss": 0.1724, "lr": 3.5545690357210718e-06, "epoch": 5.788288288288288, "percentage": 82.69, "elapsed_time": "3:30:43", "remaining_time": "0:44:06"}
|
||||
{"current_steps": 2575, "total_steps": 3108, "loss": 0.1691, "lr": 3.490907877442162e-06, "epoch": 5.79954954954955, "percentage": 82.85, "elapsed_time": "3:31:08", "remaining_time": "0:43:42"}
|
||||
{"current_steps": 2580, "total_steps": 3108, "loss": 0.1688, "lr": 3.427767406326359e-06, "epoch": 5.8108108108108105, "percentage": 83.01, "elapsed_time": "3:31:33", "remaining_time": "0:43:17"}
|
||||
{"current_steps": 2585, "total_steps": 3108, "loss": 0.1725, "lr": 3.3651496137873862e-06, "epoch": 5.822072072072072, "percentage": 83.17, "elapsed_time": "3:31:58", "remaining_time": "0:42:53"}
|
||||
{"current_steps": 2590, "total_steps": 3108, "loss": 0.1812, "lr": 3.303056474754003e-06, "epoch": 5.833333333333333, "percentage": 83.33, "elapsed_time": "3:32:23", "remaining_time": "0:42:28"}
|
||||
{"current_steps": 2595, "total_steps": 3108, "loss": 0.1763, "lr": 3.241489947607701e-06, "epoch": 5.844594594594595, "percentage": 83.49, "elapsed_time": "3:32:49", "remaining_time": "0:42:04"}
|
||||
{"current_steps": 2600, "total_steps": 3108, "loss": 0.1686, "lr": 3.1804519741209283e-06, "epoch": 5.8558558558558556, "percentage": 83.66, "elapsed_time": "3:33:13", "remaining_time": "0:41:39"}
|
||||
{"current_steps": 2605, "total_steps": 3108, "loss": 0.1739, "lr": 3.119944479395873e-06, "epoch": 5.867117117117117, "percentage": 83.82, "elapsed_time": "3:33:39", "remaining_time": "0:41:15"}
|
||||
{"current_steps": 2610, "total_steps": 3108, "loss": 0.1798, "lr": 3.0599693718037283e-06, "epoch": 5.878378378378378, "percentage": 83.98, "elapsed_time": "3:34:06", "remaining_time": "0:40:51"}
|
||||
{"current_steps": 2615, "total_steps": 3108, "loss": 0.1715, "lr": 3.0005285429244923e-06, "epoch": 5.88963963963964, "percentage": 84.14, "elapsed_time": "3:34:32", "remaining_time": "0:40:26"}
|
||||
{"current_steps": 2620, "total_steps": 3108, "loss": 0.1892, "lr": 2.941623867487342e-06, "epoch": 5.900900900900901, "percentage": 84.3, "elapsed_time": "3:34:59", "remaining_time": "0:40:02"}
|
||||
{"current_steps": 2625, "total_steps": 3108, "loss": 0.1781, "lr": 2.883257203311476e-06, "epoch": 5.912162162162162, "percentage": 84.46, "elapsed_time": "3:35:25", "remaining_time": "0:39:38"}
|
||||
{"current_steps": 2630, "total_steps": 3108, "loss": 0.1779, "lr": 2.825430391247541e-06, "epoch": 5.923423423423423, "percentage": 84.62, "elapsed_time": "3:35:49", "remaining_time": "0:39:13"}
|
||||
{"current_steps": 2635, "total_steps": 3108, "loss": 0.1779, "lr": 2.768145255119543e-06, "epoch": 5.934684684684685, "percentage": 84.78, "elapsed_time": "3:36:10", "remaining_time": "0:38:48"}
|
||||
{"current_steps": 2640, "total_steps": 3108, "loss": 0.1807, "lr": 2.711403601667362e-06, "epoch": 5.945945945945946, "percentage": 84.94, "elapsed_time": "3:36:36", "remaining_time": "0:38:23"}
|
||||
{"current_steps": 2645, "total_steps": 3108, "loss": 0.1684, "lr": 2.655207220489744e-06, "epoch": 5.957207207207207, "percentage": 85.1, "elapsed_time": "3:37:00", "remaining_time": "0:37:59"}
|
||||
{"current_steps": 2650, "total_steps": 3108, "loss": 0.172, "lr": 2.599557883987871e-06, "epoch": 5.968468468468468, "percentage": 85.26, "elapsed_time": "3:37:24", "remaining_time": "0:37:34"}
|
||||
{"current_steps": 2655, "total_steps": 3108, "loss": 0.1829, "lr": 2.5444573473094346e-06, "epoch": 5.97972972972973, "percentage": 85.42, "elapsed_time": "3:37:50", "remaining_time": "0:37:10"}
|
||||
{"current_steps": 2660, "total_steps": 3108, "loss": 0.1754, "lr": 2.489907348293321e-06, "epoch": 5.990990990990991, "percentage": 85.59, "elapsed_time": "3:38:12", "remaining_time": "0:36:45"}
|
||||
{"current_steps": 2665, "total_steps": 3108, "loss": 0.1697, "lr": 2.4359096074147747e-06, "epoch": 6.002252252252252, "percentage": 85.75, "elapsed_time": "3:38:37", "remaining_time": "0:36:20"}
|
||||
{"current_steps": 2670, "total_steps": 3108, "loss": 0.1706, "lr": 2.3824658277311265e-06, "epoch": 6.013513513513513, "percentage": 85.91, "elapsed_time": "3:39:00", "remaining_time": "0:35:55"}
|
||||
{"current_steps": 2675, "total_steps": 3108, "loss": 0.1722, "lr": 2.3295776948281046e-06, "epoch": 6.024774774774775, "percentage": 86.07, "elapsed_time": "3:39:23", "remaining_time": "0:35:30"}
|
||||
{"current_steps": 2680, "total_steps": 3108, "loss": 0.166, "lr": 2.2772468767666633e-06, "epoch": 6.036036036036036, "percentage": 86.23, "elapsed_time": "3:39:50", "remaining_time": "0:35:06"}
|
||||
{"current_steps": 2685, "total_steps": 3108, "loss": 0.1682, "lr": 2.225475024030361e-06, "epoch": 6.047297297297297, "percentage": 86.39, "elapsed_time": "3:40:15", "remaining_time": "0:34:42"}
|
||||
{"current_steps": 2690, "total_steps": 3108, "loss": 0.1582, "lr": 2.1742637694733235e-06, "epoch": 6.058558558558558, "percentage": 86.55, "elapsed_time": "3:40:41", "remaining_time": "0:34:17"}
|
||||
{"current_steps": 2695, "total_steps": 3108, "loss": 0.1678, "lr": 2.1236147282687305e-06, "epoch": 6.06981981981982, "percentage": 86.71, "elapsed_time": "3:41:05", "remaining_time": "0:33:52"}
|
||||
{"current_steps": 2700, "total_steps": 3108, "loss": 0.1767, "lr": 2.0735294978578804e-06, "epoch": 6.081081081081081, "percentage": 86.87, "elapsed_time": "3:41:30", "remaining_time": "0:33:28"}
|
||||
{"current_steps": 2705, "total_steps": 3108, "loss": 0.1583, "lr": 2.0240096578998147e-06, "epoch": 6.092342342342342, "percentage": 87.03, "elapsed_time": "3:41:53", "remaining_time": "0:33:03"}
|
||||
{"current_steps": 2710, "total_steps": 3108, "loss": 0.1714, "lr": 1.975056770221475e-06, "epoch": 6.103603603603603, "percentage": 87.19, "elapsed_time": "3:42:19", "remaining_time": "0:32:39"}
|
||||
{"current_steps": 2715, "total_steps": 3108, "loss": 0.1683, "lr": 1.926672378768468e-06, "epoch": 6.114864864864865, "percentage": 87.36, "elapsed_time": "3:42:44", "remaining_time": "0:32:14"}
|
||||
{"current_steps": 2720, "total_steps": 3108, "loss": 0.1622, "lr": 1.8788580095563636e-06, "epoch": 6.126126126126126, "percentage": 87.52, "elapsed_time": "3:43:09", "remaining_time": "0:31:50"}
|
||||
{"current_steps": 2725, "total_steps": 3108, "loss": 0.1769, "lr": 1.8316151706225471e-06, "epoch": 6.137387387387387, "percentage": 87.68, "elapsed_time": "3:43:33", "remaining_time": "0:31:25"}
|
||||
{"current_steps": 2730, "total_steps": 3108, "loss": 0.1684, "lr": 1.784945351978682e-06, "epoch": 6.148648648648648, "percentage": 87.84, "elapsed_time": "3:43:57", "remaining_time": "0:31:00"}
|
||||
{"current_steps": 2735, "total_steps": 3108, "loss": 0.1746, "lr": 1.7388500255637098e-06, "epoch": 6.15990990990991, "percentage": 88.0, "elapsed_time": "3:44:21", "remaining_time": "0:30:35"}
|
||||
{"current_steps": 2740, "total_steps": 3108, "loss": 0.1676, "lr": 1.693330645197404e-06, "epoch": 6.171171171171171, "percentage": 88.16, "elapsed_time": "3:44:47", "remaining_time": "0:30:11"}
|
||||
{"current_steps": 2745, "total_steps": 3108, "loss": 0.1671, "lr": 1.6483886465345511e-06, "epoch": 6.1824324324324325, "percentage": 88.32, "elapsed_time": "3:45:13", "remaining_time": "0:29:47"}
|
||||
{"current_steps": 2750, "total_steps": 3108, "loss": 0.1703, "lr": 1.6040254470196504e-06, "epoch": 6.193693693693693, "percentage": 88.48, "elapsed_time": "3:45:38", "remaining_time": "0:29:22"}
|
||||
{"current_steps": 2755, "total_steps": 3108, "loss": 0.1617, "lr": 1.5602424458422062e-06, "epoch": 6.204954954954955, "percentage": 88.64, "elapsed_time": "3:46:02", "remaining_time": "0:28:57"}
|
||||
{"current_steps": 2760, "total_steps": 3108, "loss": 0.1681, "lr": 1.5170410238926136e-06, "epoch": 6.216216216216216, "percentage": 88.8, "elapsed_time": "3:46:25", "remaining_time": "0:28:32"}
|
||||
{"current_steps": 2765, "total_steps": 3108, "loss": 0.1697, "lr": 1.4744225437185899e-06, "epoch": 6.2274774774774775, "percentage": 88.96, "elapsed_time": "3:46:51", "remaining_time": "0:28:08"}
|
||||
{"current_steps": 2770, "total_steps": 3108, "loss": 0.1704, "lr": 1.43238834948221e-06, "epoch": 6.238738738738739, "percentage": 89.12, "elapsed_time": "3:47:14", "remaining_time": "0:27:43"}
|
||||
{"current_steps": 2775, "total_steps": 3108, "loss": 0.1643, "lr": 1.3909397669175162e-06, "epoch": 6.25, "percentage": 89.29, "elapsed_time": "3:47:38", "remaining_time": "0:27:19"}
|
||||
{"current_steps": 2780, "total_steps": 3108, "loss": 0.1589, "lr": 1.35007810328869e-06, "epoch": 6.261261261261261, "percentage": 89.45, "elapsed_time": "3:48:03", "remaining_time": "0:26:54"}
|
||||
{"current_steps": 2785, "total_steps": 3108, "loss": 0.1657, "lr": 1.3098046473488335e-06, "epoch": 6.2725225225225225, "percentage": 89.61, "elapsed_time": "3:48:25", "remaining_time": "0:26:29"}
|
||||
{"current_steps": 2790, "total_steps": 3108, "loss": 0.1683, "lr": 1.2701206692993307e-06, "epoch": 6.283783783783784, "percentage": 89.77, "elapsed_time": "3:48:52", "remaining_time": "0:26:05"}
|
||||
{"current_steps": 2795, "total_steps": 3108, "loss": 0.1701, "lr": 1.2310274207497575e-06, "epoch": 6.295045045045045, "percentage": 89.93, "elapsed_time": "3:49:14", "remaining_time": "0:25:40"}
|
||||
{"current_steps": 2800, "total_steps": 3108, "loss": 0.1695, "lr": 1.1925261346784378e-06, "epoch": 6.306306306306306, "percentage": 90.09, "elapsed_time": "3:49:37", "remaining_time": "0:25:15"}
|
||||
{"current_steps": 2805, "total_steps": 3108, "loss": 0.168, "lr": 1.1546180253935436e-06, "epoch": 6.3175675675675675, "percentage": 90.25, "elapsed_time": "3:50:01", "remaining_time": "0:24:50"}
|
||||
{"current_steps": 2810, "total_steps": 3108, "loss": 0.1683, "lr": 1.1173042884947894e-06, "epoch": 6.328828828828829, "percentage": 90.41, "elapsed_time": "3:50:29", "remaining_time": "0:24:26"}
|
||||
{"current_steps": 2815, "total_steps": 3108, "loss": 0.1708, "lr": 1.0805861008357344e-06, "epoch": 6.34009009009009, "percentage": 90.57, "elapsed_time": "3:50:53", "remaining_time": "0:24:01"}
|
||||
{"current_steps": 2820, "total_steps": 3108, "loss": 0.1702, "lr": 1.0444646204866603e-06, "epoch": 6.351351351351352, "percentage": 90.73, "elapsed_time": "3:51:18", "remaining_time": "0:23:37"}
|
||||
{"current_steps": 2825, "total_steps": 3108, "loss": 0.1701, "lr": 1.0089409866980481e-06, "epoch": 6.362612612612613, "percentage": 90.89, "elapsed_time": "3:51:46", "remaining_time": "0:23:13"}
|
||||
{"current_steps": 2830, "total_steps": 3108, "loss": 0.1851, "lr": 9.740163198646435e-07, "epoch": 6.373873873873874, "percentage": 91.06, "elapsed_time": "3:52:14", "remaining_time": "0:22:48"}
|
||||
{"current_steps": 2835, "total_steps": 3108, "loss": 0.1666, "lr": 9.396917214901258e-07, "epoch": 6.385135135135135, "percentage": 91.22, "elapsed_time": "3:52:37", "remaining_time": "0:22:24"}
|
||||
{"current_steps": 2840, "total_steps": 3108, "loss": 0.166, "lr": 9.059682741523646e-07, "epoch": 6.396396396396397, "percentage": 91.38, "elapsed_time": "3:53:04", "remaining_time": "0:21:59"}
|
||||
{"current_steps": 2845, "total_steps": 3108, "loss": 0.1722, "lr": 8.728470414692714e-07, "epoch": 6.407657657657658, "percentage": 91.54, "elapsed_time": "3:53:30", "remaining_time": "0:21:35"}
|
||||
{"current_steps": 2850, "total_steps": 3108, "loss": 0.1724, "lr": 8.403290680652598e-07, "epoch": 6.418918918918919, "percentage": 91.7, "elapsed_time": "3:53:55", "remaining_time": "0:21:10"}
|
||||
{"current_steps": 2855, "total_steps": 3108, "loss": 0.1579, "lr": 8.084153795382987e-07, "epoch": 6.43018018018018, "percentage": 91.86, "elapsed_time": "3:54:20", "remaining_time": "0:20:45"}
|
||||
{"current_steps": 2860, "total_steps": 3108, "loss": 0.1643, "lr": 7.771069824275623e-07, "epoch": 6.441441441441442, "percentage": 92.02, "elapsed_time": "3:54:43", "remaining_time": "0:20:21"}
|
||||
{"current_steps": 2865, "total_steps": 3108, "loss": 0.172, "lr": 7.464048641816846e-07, "epoch": 6.452702702702703, "percentage": 92.18, "elapsed_time": "3:55:09", "remaining_time": "0:19:56"}
|
||||
{"current_steps": 2870, "total_steps": 3108, "loss": 0.1648, "lr": 7.163099931276152e-07, "epoch": 6.463963963963964, "percentage": 92.34, "elapsed_time": "3:55:33", "remaining_time": "0:19:32"}
|
||||
{"current_steps": 2875, "total_steps": 3108, "loss": 0.1634, "lr": 6.868233184400864e-07, "epoch": 6.475225225225225, "percentage": 92.5, "elapsed_time": "3:55:54", "remaining_time": "0:19:07"}
|
||||
{"current_steps": 2880, "total_steps": 3108, "loss": 0.1699, "lr": 6.57945770111661e-07, "epoch": 6.486486486486487, "percentage": 92.66, "elapsed_time": "3:56:19", "remaining_time": "0:18:42"}
|
||||
{"current_steps": 2885, "total_steps": 3108, "loss": 0.1715, "lr": 6.296782589234251e-07, "epoch": 6.497747747747748, "percentage": 92.82, "elapsed_time": "3:56:42", "remaining_time": "0:18:17"}
|
||||
{"current_steps": 2890, "total_steps": 3108, "loss": 0.1607, "lr": 6.020216764162335e-07, "epoch": 6.509009009009009, "percentage": 92.99, "elapsed_time": "3:57:05", "remaining_time": "0:17:53"}
|
||||
{"current_steps": 2895, "total_steps": 3108, "loss": 0.1815, "lr": 5.749768948626133e-07, "epoch": 6.52027027027027, "percentage": 93.15, "elapsed_time": "3:57:25", "remaining_time": "0:17:28"}
|
||||
{"current_steps": 2900, "total_steps": 3108, "loss": 0.1561, "lr": 5.485447672392474e-07, "epoch": 6.531531531531532, "percentage": 93.31, "elapsed_time": "3:57:50", "remaining_time": "0:17:03"}
|
||||
{"current_steps": 2905, "total_steps": 3108, "loss": 0.1731, "lr": 5.227261272000617e-07, "epoch": 6.542792792792793, "percentage": 93.47, "elapsed_time": "3:58:17", "remaining_time": "0:16:39"}
|
||||
{"current_steps": 2910, "total_steps": 3108, "loss": 0.165, "lr": 4.975217890499484e-07, "epoch": 6.554054054054054, "percentage": 93.63, "elapsed_time": "3:58:39", "remaining_time": "0:16:14"}
|
||||
{"current_steps": 2915, "total_steps": 3108, "loss": 0.1641, "lr": 4.7293254771907604e-07, "epoch": 6.565315315315315, "percentage": 93.79, "elapsed_time": "3:59:01", "remaining_time": "0:15:49"}
|
||||
{"current_steps": 2920, "total_steps": 3108, "loss": 0.174, "lr": 4.489591787378067e-07, "epoch": 6.576576576576577, "percentage": 93.95, "elapsed_time": "3:59:27", "remaining_time": "0:15:25"}
|
||||
{"current_steps": 2925, "total_steps": 3108, "loss": 0.1812, "lr": 4.256024382122603e-07, "epoch": 6.587837837837838, "percentage": 94.11, "elapsed_time": "3:59:54", "remaining_time": "0:15:00"}
|
||||
{"current_steps": 2930, "total_steps": 3108, "loss": 0.1637, "lr": 4.028630628004471e-07, "epoch": 6.599099099099099, "percentage": 94.27, "elapsed_time": "4:00:17", "remaining_time": "0:14:35"}
|
||||
{"current_steps": 2935, "total_steps": 3108, "loss": 0.1695, "lr": 3.8074176968903697e-07, "epoch": 6.61036036036036, "percentage": 94.43, "elapsed_time": "4:00:41", "remaining_time": "0:14:11"}
|
||||
{"current_steps": 2940, "total_steps": 3108, "loss": 0.1723, "lr": 3.5923925657075143e-07, "epoch": 6.621621621621622, "percentage": 94.59, "elapsed_time": "4:01:04", "remaining_time": "0:13:46"}
|
||||
{"current_steps": 2945, "total_steps": 3108, "loss": 0.1767, "lr": 3.383562016223474e-07, "epoch": 6.632882882882883, "percentage": 94.76, "elapsed_time": "4:01:28", "remaining_time": "0:13:21"}
|
||||
{"current_steps": 2950, "total_steps": 3108, "loss": 0.1693, "lr": 3.180932634832279e-07, "epoch": 6.6441441441441444, "percentage": 94.92, "elapsed_time": "4:01:53", "remaining_time": "0:12:57"}
|
||||
{"current_steps": 2955, "total_steps": 3108, "loss": 0.1692, "lr": 2.98451081234683e-07, "epoch": 6.655405405405405, "percentage": 95.08, "elapsed_time": "4:02:16", "remaining_time": "0:12:32"}
|
||||
{"current_steps": 2960, "total_steps": 3108, "loss": 0.1702, "lr": 2.794302743797084e-07, "epoch": 6.666666666666667, "percentage": 95.24, "elapsed_time": "4:02:38", "remaining_time": "0:12:07"}
|
||||
{"current_steps": 2965, "total_steps": 3108, "loss": 0.1712, "lr": 2.610314428234939e-07, "epoch": 6.677927927927928, "percentage": 95.4, "elapsed_time": "4:03:00", "remaining_time": "0:11:43"}
|
||||
{"current_steps": 2970, "total_steps": 3108, "loss": 0.1752, "lr": 2.4325516685448356e-07, "epoch": 6.6891891891891895, "percentage": 95.56, "elapsed_time": "4:03:26", "remaining_time": "0:11:18"}
|
||||
{"current_steps": 2975, "total_steps": 3108, "loss": 0.1685, "lr": 2.261020071260789e-07, "epoch": 6.70045045045045, "percentage": 95.72, "elapsed_time": "4:03:48", "remaining_time": "0:10:53"}
|
||||
{"current_steps": 2980, "total_steps": 3108, "loss": 0.1676, "lr": 2.0957250463896629e-07, "epoch": 6.711711711711712, "percentage": 95.88, "elapsed_time": "4:04:11", "remaining_time": "0:10:29"}
|
||||
{"current_steps": 2985, "total_steps": 3108, "loss": 0.1742, "lr": 1.9366718072403757e-07, "epoch": 6.722972972972973, "percentage": 96.04, "elapsed_time": "4:04:34", "remaining_time": "0:10:04"}
|
||||
{"current_steps": 2990, "total_steps": 3108, "loss": 0.1715, "lr": 1.7838653702595633e-07, "epoch": 6.7342342342342345, "percentage": 96.2, "elapsed_time": "4:04:59", "remaining_time": "0:09:40"}
|
||||
{"current_steps": 2995, "total_steps": 3108, "loss": 0.1802, "lr": 1.6373105548733947e-07, "epoch": 6.745495495495495, "percentage": 96.36, "elapsed_time": "4:05:24", "remaining_time": "0:09:15"}
|
||||
{"current_steps": 3000, "total_steps": 3108, "loss": 0.1723, "lr": 1.4970119833354723e-07, "epoch": 6.756756756756757, "percentage": 96.53, "elapsed_time": "4:05:46", "remaining_time": "0:08:50"}
|
||||
{"current_steps": 3005, "total_steps": 3108, "loss": 0.1752, "lr": 1.3629740805811699e-07, "epoch": 6.768018018018018, "percentage": 96.69, "elapsed_time": "4:06:21", "remaining_time": "0:08:26"}
|
||||
{"current_steps": 3010, "total_steps": 3108, "loss": 0.1656, "lr": 1.2352010740879438e-07, "epoch": 6.7792792792792795, "percentage": 96.85, "elapsed_time": "4:06:45", "remaining_time": "0:08:02"}
|
||||
{"current_steps": 3015, "total_steps": 3108, "loss": 0.1659, "lr": 1.1136969937421305e-07, "epoch": 6.79054054054054, "percentage": 97.01, "elapsed_time": "4:07:12", "remaining_time": "0:07:37"}
|
||||
{"current_steps": 3020, "total_steps": 3108, "loss": 0.1742, "lr": 9.984656717117347e-08, "epoch": 6.801801801801802, "percentage": 97.17, "elapsed_time": "4:07:39", "remaining_time": "0:07:12"}
|
||||
{"current_steps": 3025, "total_steps": 3108, "loss": 0.1725, "lr": 8.895107423256611e-08, "epoch": 6.813063063063063, "percentage": 97.33, "elapsed_time": "4:08:07", "remaining_time": "0:06:48"}
|
||||
{"current_steps": 3030, "total_steps": 3108, "loss": 0.1734, "lr": 7.868356419589606e-08, "epoch": 6.824324324324325, "percentage": 97.49, "elapsed_time": "4:08:28", "remaining_time": "0:06:23"}
|
||||
{"current_steps": 3035, "total_steps": 3108, "loss": 0.1711, "lr": 6.904436089246069e-08, "epoch": 6.835585585585585, "percentage": 97.65, "elapsed_time": "4:08:54", "remaining_time": "0:05:59"}
|
||||
{"current_steps": 3040, "total_steps": 3108, "loss": 0.1752, "lr": 6.003376833712438e-08, "epoch": 6.846846846846847, "percentage": 97.81, "elapsed_time": "4:09:18", "remaining_time": "0:05:34"}
|
||||
{"current_steps": 3045, "total_steps": 3108, "loss": 0.1689, "lr": 5.165207071873735e-08, "epoch": 6.858108108108108, "percentage": 97.97, "elapsed_time": "4:09:42", "remaining_time": "0:05:09"}
|
||||
{"current_steps": 3050, "total_steps": 3108, "loss": 0.1647, "lr": 4.389953239116507e-08, "epoch": 6.86936936936937, "percentage": 98.13, "elapsed_time": "4:10:04", "remaining_time": "0:04:45"}
|
||||
{"current_steps": 3055, "total_steps": 3108, "loss": 0.1739, "lr": 3.6776397864957126e-08, "epoch": 6.88063063063063, "percentage": 98.29, "elapsed_time": "4:10:26", "remaining_time": "0:04:20"}
|
||||
{"current_steps": 3060, "total_steps": 3108, "loss": 0.1707, "lr": 3.028289179963562e-08, "epoch": 6.891891891891892, "percentage": 98.46, "elapsed_time": "4:10:50", "remaining_time": "0:03:56"}
|
||||
{"current_steps": 3065, "total_steps": 3108, "loss": 0.1689, "lr": 2.4419218996603046e-08, "epoch": 6.903153153153153, "percentage": 98.62, "elapsed_time": "4:11:16", "remaining_time": "0:03:31"}
|
||||
{"current_steps": 3070, "total_steps": 3108, "loss": 0.1757, "lr": 1.9185564392689703e-08, "epoch": 6.914414414414415, "percentage": 98.78, "elapsed_time": "4:11:39", "remaining_time": "0:03:06"}
|
||||
{"current_steps": 3075, "total_steps": 3108, "loss": 0.1648, "lr": 1.4582093054318346e-08, "epoch": 6.925675675675675, "percentage": 98.94, "elapsed_time": "4:12:01", "remaining_time": "0:02:42"}
|
||||
{"current_steps": 3080, "total_steps": 3108, "loss": 0.1721, "lr": 1.0608950172295018e-08, "epoch": 6.936936936936937, "percentage": 99.1, "elapsed_time": "4:12:25", "remaining_time": "0:02:17"}
|
||||
{"current_steps": 3085, "total_steps": 3108, "loss": 0.1601, "lr": 7.266261057239376e-09, "epoch": 6.948198198198198, "percentage": 99.26, "elapsed_time": "4:12:49", "remaining_time": "0:01:53"}
|
||||
{"current_steps": 3090, "total_steps": 3108, "loss": 0.1722, "lr": 4.554131135616757e-09, "epoch": 6.95945945945946, "percentage": 99.42, "elapsed_time": "4:13:13", "remaining_time": "0:01:28"}
|
||||
{"current_steps": 3095, "total_steps": 3108, "loss": 0.1613, "lr": 2.4726459464319284e-09, "epoch": 6.9707207207207205, "percentage": 99.58, "elapsed_time": "4:13:38", "remaining_time": "0:01:03"}
|
||||
{"current_steps": 3100, "total_steps": 3108, "loss": 0.1797, "lr": 1.0218711385134861e-09, "epoch": 6.981981981981982, "percentage": 99.74, "elapsed_time": "4:14:03", "remaining_time": "0:00:39"}
|
||||
{"current_steps": 3105, "total_steps": 3108, "loss": 0.1736, "lr": 2.01852468455499e-10, "epoch": 6.993243243243243, "percentage": 99.9, "elapsed_time": "4:14:26", "remaining_time": "0:00:14"}
|
||||
{"current_steps": 3108, "total_steps": 3108, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "4:14:48", "remaining_time": "0:00:00"}
|
||||
9562
trainer_state.json
Normal file
9562
trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:19f81aae2f308d8613f07aedbd21021a2f1509da2c5513b9ed6c8b0c40de82fa
|
||||
size 8529
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 40 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user