初始化项目,由ModelHub XC社区提供模型
Model: DCAgent/a1-freelancer Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
60
README.md
Normal file
60
README.md
Normal file
@@ -0,0 +1,60 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: other
|
||||
base_model: Qwen/Qwen3-8B
|
||||
tags:
|
||||
- llama-factory
|
||||
- full
|
||||
- generated_from_trainer
|
||||
model-index:
|
||||
- name: sft_a1_freelancer__Qwen3-8B
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# sft_a1_freelancer__Qwen3-8B
|
||||
|
||||
This model is a fine-tuned version of [Qwen/Qwen3-8B](https://huggingface.co/Qwen/Qwen3-8B) on the /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--perturbed-docker-exp-freelancer-tasks_glm_4.7_traces/snapshots/678a5760f0b5306a6ab1f04d6276204b2e4f91f6_thinking_preprocessed dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 4e-05
|
||||
- train_batch_size: 1
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- total_train_batch_size: 16
|
||||
- total_eval_batch_size: 128
|
||||
- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.98) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 7.0
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.57.6
|
||||
- Pytorch 2.9.1+cu130
|
||||
- Datasets 4.7.0
|
||||
- Tokenizers 0.22.2
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
16
all_results.json
Normal file
16
all_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.0028009995286067996,
|
||||
"achieved_tflops_per_gpu_theoretical": 619.9865349966398,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.2468055635690689,
|
||||
"mfu_percent": 0.00019795049672132858,
|
||||
"mfu_percent_theoretical": 43.81530282661765,
|
||||
"total_flos": 491779805085696.0,
|
||||
"train_loss": 0.32948466579139146,
|
||||
"train_runtime": 10973.3106,
|
||||
"train_samples_per_second": 2.924,
|
||||
"train_steps_per_second": 0.183,
|
||||
"valid_targets_mean": 2558.1,
|
||||
"valid_targets_min": 187
|
||||
}
|
||||
89
chat_template.jinja
Normal file
89
chat_template.jinja
Normal file
@@ -0,0 +1,89 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
68
config.json
Normal file
68
config.json
Normal file
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 12288,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 36,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151643,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"temperature": 0.6,
|
||||
"top_k": 20,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7ffd1f03742de0871fa3a583f1cf636a83069ef401370a78036998b502d8df3a
|
||||
size 4902257696
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d91ec3d380b41c6aba2c096d2598bf71087150abd4208fa94eb2a45b232d0534
|
||||
size 4915960368
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b85107a5ad97f946e76ab435017eaad2558f27517203458035572fefa6ea5ce9
|
||||
size 4983068496
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:491aed2282075e9e0477b69c54466095d13b150671d7cec3eb0b769e85767c33
|
||||
size 1580230264
|
||||
407
model.safetensors.index.json
Normal file
407
model.safetensors.index.json
Normal file
@@ -0,0 +1,407 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_parameters": 308224,
|
||||
"total_size": 16381470720
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.32.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.33.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_norm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.34.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_norm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.35.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
12
run_summary.json
Normal file
12
run_summary.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"agent_name": "678a5760f0b5306a6ab1f04d6276204b2e4f91f6_thinking_preprocessed",
|
||||
"training_start": null,
|
||||
"training_end": null,
|
||||
"created_by": "raoof1",
|
||||
"base_model_name": "Qwen/Qwen3-8B",
|
||||
"dataset_name": "/e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--perturbed-docker-exp-freelancer-tasks_glm_4.7_traces/snapshots/678a5760f0b5306a6ab1f04d6276204b2e4f91f6_thinking_preprocessed",
|
||||
"training_type": "SFT",
|
||||
"training_parameters": "https://huggingface.co/DCAgent/a1-freelancer/blob/main/config.json",
|
||||
"wandb_link": null,
|
||||
"traces_location_s3": null
|
||||
}
|
||||
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
|
||||
size 11422654
|
||||
240
tokenizer_config.json
Normal file
240
tokenizer_config.json
Normal file
@@ -0,0 +1,240 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 32768,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"padding_side": "right",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
16
train_results.json
Normal file
16
train_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"achieved_tflops_per_gpu": 0.0028009995286067996,
|
||||
"achieved_tflops_per_gpu_theoretical": 619.9865349966398,
|
||||
"epoch": 7.0,
|
||||
"loss_nan_ranks": 0,
|
||||
"loss_rank_avg": 0.2468055635690689,
|
||||
"mfu_percent": 0.00019795049672132858,
|
||||
"mfu_percent_theoretical": 43.81530282661765,
|
||||
"total_flos": 491779805085696.0,
|
||||
"train_loss": 0.32948466579139146,
|
||||
"train_runtime": 10973.3106,
|
||||
"train_samples_per_second": 2.924,
|
||||
"train_steps_per_second": 0.183,
|
||||
"valid_targets_mean": 2558.1,
|
||||
"valid_targets_min": 187
|
||||
}
|
||||
402
trainer_log.jsonl
Normal file
402
trainer_log.jsonl
Normal file
@@ -0,0 +1,402 @@
|
||||
{"current_steps": 5, "total_steps": 2009, "loss": 1.0291, "lr": 7.960199004975125e-07, "epoch": 0.017421602787456445, "percentage": 0.25, "elapsed_time": "0:00:34", "remaining_time": "3:49:09"}
|
||||
{"current_steps": 10, "total_steps": 2009, "loss": 1.0492, "lr": 1.791044776119403e-06, "epoch": 0.03484320557491289, "percentage": 0.5, "elapsed_time": "0:00:59", "remaining_time": "3:19:12"}
|
||||
{"current_steps": 15, "total_steps": 2009, "loss": 0.9669, "lr": 2.786069651741294e-06, "epoch": 0.05226480836236934, "percentage": 0.75, "elapsed_time": "0:01:19", "remaining_time": "2:56:31"}
|
||||
{"current_steps": 20, "total_steps": 2009, "loss": 0.8687, "lr": 3.7810945273631843e-06, "epoch": 0.06968641114982578, "percentage": 1.0, "elapsed_time": "0:01:42", "remaining_time": "2:49:25"}
|
||||
{"current_steps": 25, "total_steps": 2009, "loss": 0.7415, "lr": 4.7761194029850745e-06, "epoch": 0.08710801393728224, "percentage": 1.24, "elapsed_time": "0:02:18", "remaining_time": "3:03:27"}
|
||||
{"current_steps": 30, "total_steps": 2009, "loss": 0.7184, "lr": 5.771144278606966e-06, "epoch": 0.10452961672473868, "percentage": 1.49, "elapsed_time": "0:02:42", "remaining_time": "2:58:54"}
|
||||
{"current_steps": 35, "total_steps": 2009, "loss": 0.6932, "lr": 6.766169154228857e-06, "epoch": 0.12195121951219512, "percentage": 1.74, "elapsed_time": "0:03:10", "remaining_time": "2:58:59"}
|
||||
{"current_steps": 40, "total_steps": 2009, "loss": 0.6469, "lr": 7.761194029850747e-06, "epoch": 0.13937282229965156, "percentage": 1.99, "elapsed_time": "0:03:34", "remaining_time": "2:56:07"}
|
||||
{"current_steps": 45, "total_steps": 2009, "loss": 0.6325, "lr": 8.756218905472637e-06, "epoch": 0.156794425087108, "percentage": 2.24, "elapsed_time": "0:04:03", "remaining_time": "2:57:28"}
|
||||
{"current_steps": 50, "total_steps": 2009, "loss": 0.6417, "lr": 9.751243781094527e-06, "epoch": 0.17421602787456447, "percentage": 2.49, "elapsed_time": "0:04:29", "remaining_time": "2:56:00"}
|
||||
{"current_steps": 55, "total_steps": 2009, "loss": 0.5886, "lr": 1.074626865671642e-05, "epoch": 0.1916376306620209, "percentage": 2.74, "elapsed_time": "0:04:53", "remaining_time": "2:54:04"}
|
||||
{"current_steps": 60, "total_steps": 2009, "loss": 0.5979, "lr": 1.174129353233831e-05, "epoch": 0.20905923344947736, "percentage": 2.99, "elapsed_time": "0:05:18", "remaining_time": "2:52:30"}
|
||||
{"current_steps": 65, "total_steps": 2009, "loss": 0.5626, "lr": 1.27363184079602e-05, "epoch": 0.2264808362369338, "percentage": 3.24, "elapsed_time": "0:05:45", "remaining_time": "2:52:05"}
|
||||
{"current_steps": 70, "total_steps": 2009, "loss": 0.5543, "lr": 1.373134328358209e-05, "epoch": 0.24390243902439024, "percentage": 3.48, "elapsed_time": "0:06:08", "remaining_time": "2:50:05"}
|
||||
{"current_steps": 75, "total_steps": 2009, "loss": 0.5463, "lr": 1.472636815920398e-05, "epoch": 0.2613240418118467, "percentage": 3.73, "elapsed_time": "0:06:36", "remaining_time": "2:50:19"}
|
||||
{"current_steps": 80, "total_steps": 2009, "loss": 0.5041, "lr": 1.572139303482587e-05, "epoch": 0.2787456445993031, "percentage": 3.98, "elapsed_time": "0:07:03", "remaining_time": "2:50:19"}
|
||||
{"current_steps": 85, "total_steps": 2009, "loss": 0.5189, "lr": 1.671641791044776e-05, "epoch": 0.2961672473867596, "percentage": 4.23, "elapsed_time": "0:07:33", "remaining_time": "2:50:58"}
|
||||
{"current_steps": 90, "total_steps": 2009, "loss": 0.486, "lr": 1.771144278606965e-05, "epoch": 0.313588850174216, "percentage": 4.48, "elapsed_time": "0:08:06", "remaining_time": "2:52:55"}
|
||||
{"current_steps": 95, "total_steps": 2009, "loss": 0.5135, "lr": 1.8706467661691545e-05, "epoch": 0.3310104529616725, "percentage": 4.73, "elapsed_time": "0:08:35", "remaining_time": "2:53:09"}
|
||||
{"current_steps": 100, "total_steps": 2009, "loss": 0.4705, "lr": 1.9701492537313435e-05, "epoch": 0.34843205574912894, "percentage": 4.98, "elapsed_time": "0:09:01", "remaining_time": "2:52:11"}
|
||||
{"current_steps": 105, "total_steps": 2009, "loss": 0.4873, "lr": 2.0696517412935325e-05, "epoch": 0.36585365853658536, "percentage": 5.23, "elapsed_time": "0:09:35", "remaining_time": "2:53:54"}
|
||||
{"current_steps": 110, "total_steps": 2009, "loss": 0.488, "lr": 2.1691542288557215e-05, "epoch": 0.3832752613240418, "percentage": 5.48, "elapsed_time": "0:10:09", "remaining_time": "2:55:27"}
|
||||
{"current_steps": 115, "total_steps": 2009, "loss": 0.4869, "lr": 2.2686567164179106e-05, "epoch": 0.40069686411149824, "percentage": 5.72, "elapsed_time": "0:10:35", "remaining_time": "2:54:23"}
|
||||
{"current_steps": 120, "total_steps": 2009, "loss": 0.455, "lr": 2.3681592039800996e-05, "epoch": 0.4181184668989547, "percentage": 5.97, "elapsed_time": "0:11:01", "remaining_time": "2:53:37"}
|
||||
{"current_steps": 125, "total_steps": 2009, "loss": 0.4655, "lr": 2.4676616915422886e-05, "epoch": 0.4355400696864111, "percentage": 6.22, "elapsed_time": "0:11:30", "remaining_time": "2:53:26"}
|
||||
{"current_steps": 130, "total_steps": 2009, "loss": 0.458, "lr": 2.5671641791044776e-05, "epoch": 0.4529616724738676, "percentage": 6.47, "elapsed_time": "0:11:57", "remaining_time": "2:52:56"}
|
||||
{"current_steps": 135, "total_steps": 2009, "loss": 0.4544, "lr": 2.6666666666666667e-05, "epoch": 0.47038327526132406, "percentage": 6.72, "elapsed_time": "0:12:28", "remaining_time": "2:53:08"}
|
||||
{"current_steps": 140, "total_steps": 2009, "loss": 0.4496, "lr": 2.7661691542288557e-05, "epoch": 0.4878048780487805, "percentage": 6.97, "elapsed_time": "0:12:57", "remaining_time": "2:52:54"}
|
||||
{"current_steps": 145, "total_steps": 2009, "loss": 0.4412, "lr": 2.8656716417910447e-05, "epoch": 0.5052264808362369, "percentage": 7.22, "elapsed_time": "0:13:34", "remaining_time": "2:54:36"}
|
||||
{"current_steps": 150, "total_steps": 2009, "loss": 0.4584, "lr": 2.9651741293532337e-05, "epoch": 0.5226480836236934, "percentage": 7.47, "elapsed_time": "0:14:03", "remaining_time": "2:54:10"}
|
||||
{"current_steps": 155, "total_steps": 2009, "loss": 0.4483, "lr": 3.064676616915423e-05, "epoch": 0.5400696864111498, "percentage": 7.72, "elapsed_time": "0:14:28", "remaining_time": "2:53:03"}
|
||||
{"current_steps": 160, "total_steps": 2009, "loss": 0.4589, "lr": 3.164179104477612e-05, "epoch": 0.5574912891986062, "percentage": 7.96, "elapsed_time": "0:14:59", "remaining_time": "2:53:12"}
|
||||
{"current_steps": 165, "total_steps": 2009, "loss": 0.4514, "lr": 3.263681592039801e-05, "epoch": 0.5749128919860628, "percentage": 8.21, "elapsed_time": "0:15:28", "remaining_time": "2:52:59"}
|
||||
{"current_steps": 170, "total_steps": 2009, "loss": 0.4216, "lr": 3.36318407960199e-05, "epoch": 0.5923344947735192, "percentage": 8.46, "elapsed_time": "0:15:58", "remaining_time": "2:52:44"}
|
||||
{"current_steps": 175, "total_steps": 2009, "loss": 0.4312, "lr": 3.462686567164179e-05, "epoch": 0.6097560975609756, "percentage": 8.71, "elapsed_time": "0:16:26", "remaining_time": "2:52:20"}
|
||||
{"current_steps": 180, "total_steps": 2009, "loss": 0.4272, "lr": 3.562189054726369e-05, "epoch": 0.627177700348432, "percentage": 8.96, "elapsed_time": "0:16:52", "remaining_time": "2:51:31"}
|
||||
{"current_steps": 185, "total_steps": 2009, "loss": 0.4523, "lr": 3.6616915422885576e-05, "epoch": 0.6445993031358885, "percentage": 9.21, "elapsed_time": "0:17:20", "remaining_time": "2:50:57"}
|
||||
{"current_steps": 190, "total_steps": 2009, "loss": 0.405, "lr": 3.761194029850747e-05, "epoch": 0.662020905923345, "percentage": 9.46, "elapsed_time": "0:17:50", "remaining_time": "2:50:53"}
|
||||
{"current_steps": 195, "total_steps": 2009, "loss": 0.432, "lr": 3.8606965174129356e-05, "epoch": 0.6794425087108014, "percentage": 9.71, "elapsed_time": "0:18:12", "remaining_time": "2:49:25"}
|
||||
{"current_steps": 200, "total_steps": 2009, "loss": 0.4092, "lr": 3.960199004975125e-05, "epoch": 0.6968641114982579, "percentage": 9.96, "elapsed_time": "0:18:39", "remaining_time": "2:48:50"}
|
||||
{"current_steps": 205, "total_steps": 2009, "loss": 0.4342, "lr": 3.9999728265726364e-05, "epoch": 0.7142857142857143, "percentage": 10.2, "elapsed_time": "0:19:13", "remaining_time": "2:49:13"}
|
||||
{"current_steps": 210, "total_steps": 2009, "loss": 0.4078, "lr": 3.9998067694127626e-05, "epoch": 0.7317073170731707, "percentage": 10.45, "elapsed_time": "0:19:39", "remaining_time": "2:48:24"}
|
||||
{"current_steps": 215, "total_steps": 2009, "loss": 0.4176, "lr": 3.99948976396059e-05, "epoch": 0.7491289198606271, "percentage": 10.7, "elapsed_time": "0:20:17", "remaining_time": "2:49:18"}
|
||||
{"current_steps": 220, "total_steps": 2009, "loss": 0.4038, "lr": 3.999021834144146e-05, "epoch": 0.7665505226480837, "percentage": 10.95, "elapsed_time": "0:20:45", "remaining_time": "2:48:46"}
|
||||
{"current_steps": 225, "total_steps": 2009, "loss": 0.4386, "lr": 3.998403015283447e-05, "epoch": 0.7839721254355401, "percentage": 11.2, "elapsed_time": "0:21:15", "remaining_time": "2:48:36"}
|
||||
{"current_steps": 230, "total_steps": 2009, "loss": 0.4432, "lr": 3.997633354087831e-05, "epoch": 0.8013937282229965, "percentage": 11.45, "elapsed_time": "0:21:44", "remaining_time": "2:48:12"}
|
||||
{"current_steps": 235, "total_steps": 2009, "loss": 0.4198, "lr": 3.996712908652431e-05, "epoch": 0.818815331010453, "percentage": 11.7, "elapsed_time": "0:22:12", "remaining_time": "2:47:42"}
|
||||
{"current_steps": 240, "total_steps": 2009, "loss": 0.4257, "lr": 3.9956417484537956e-05, "epoch": 0.8362369337979094, "percentage": 11.95, "elapsed_time": "0:22:48", "remaining_time": "2:48:04"}
|
||||
{"current_steps": 245, "total_steps": 2009, "loss": 0.4322, "lr": 3.994419954344635e-05, "epoch": 0.8536585365853658, "percentage": 12.2, "elapsed_time": "0:23:19", "remaining_time": "2:47:59"}
|
||||
{"current_steps": 250, "total_steps": 2009, "loss": 0.3865, "lr": 3.9930476185477286e-05, "epoch": 0.8710801393728222, "percentage": 12.44, "elapsed_time": "0:23:48", "remaining_time": "2:47:33"}
|
||||
{"current_steps": 255, "total_steps": 2009, "loss": 0.419, "lr": 3.9915248446489564e-05, "epoch": 0.8885017421602788, "percentage": 12.69, "elapsed_time": "0:24:15", "remaining_time": "2:46:53"}
|
||||
{"current_steps": 260, "total_steps": 2009, "loss": 0.4263, "lr": 3.989851747589482e-05, "epoch": 0.9059233449477352, "percentage": 12.94, "elapsed_time": "0:24:41", "remaining_time": "2:46:07"}
|
||||
{"current_steps": 265, "total_steps": 2009, "loss": 0.4121, "lr": 3.9880284536570796e-05, "epoch": 0.9233449477351916, "percentage": 13.19, "elapsed_time": "0:25:07", "remaining_time": "2:45:22"}
|
||||
{"current_steps": 270, "total_steps": 2009, "loss": 0.4327, "lr": 3.9860551004765985e-05, "epoch": 0.9407665505226481, "percentage": 13.44, "elapsed_time": "0:25:34", "remaining_time": "2:44:44"}
|
||||
{"current_steps": 275, "total_steps": 2009, "loss": 0.4212, "lr": 3.983931836999576e-05, "epoch": 0.9581881533101045, "percentage": 13.69, "elapsed_time": "0:26:02", "remaining_time": "2:44:14"}
|
||||
{"current_steps": 280, "total_steps": 2009, "loss": 0.4256, "lr": 3.981658823492995e-05, "epoch": 0.975609756097561, "percentage": 13.94, "elapsed_time": "0:26:29", "remaining_time": "2:43:34"}
|
||||
{"current_steps": 285, "total_steps": 2009, "loss": 0.4126, "lr": 3.979236231527185e-05, "epoch": 0.9930313588850174, "percentage": 14.19, "elapsed_time": "0:26:57", "remaining_time": "2:43:05"}
|
||||
{"current_steps": 290, "total_steps": 2009, "loss": 0.4157, "lr": 3.976664243962872e-05, "epoch": 1.0104529616724738, "percentage": 14.44, "elapsed_time": "0:27:20", "remaining_time": "2:42:02"}
|
||||
{"current_steps": 295, "total_steps": 2009, "loss": 0.379, "lr": 3.9739430549373796e-05, "epoch": 1.0278745644599303, "percentage": 14.68, "elapsed_time": "0:27:51", "remaining_time": "2:41:52"}
|
||||
{"current_steps": 300, "total_steps": 2009, "loss": 0.3806, "lr": 3.97107286984997e-05, "epoch": 1.0452961672473868, "percentage": 14.93, "elapsed_time": "0:28:17", "remaining_time": "2:41:12"}
|
||||
{"current_steps": 305, "total_steps": 2009, "loss": 0.3972, "lr": 3.968053905346343e-05, "epoch": 1.0627177700348431, "percentage": 15.18, "elapsed_time": "0:28:41", "remaining_time": "2:40:16"}
|
||||
{"current_steps": 310, "total_steps": 2009, "loss": 0.3738, "lr": 3.964886389302284e-05, "epoch": 1.0801393728222997, "percentage": 15.43, "elapsed_time": "0:29:10", "remaining_time": "2:39:54"}
|
||||
{"current_steps": 315, "total_steps": 2009, "loss": 0.3954, "lr": 3.961570560806461e-05, "epoch": 1.0975609756097562, "percentage": 15.68, "elapsed_time": "0:29:40", "remaining_time": "2:39:34"}
|
||||
{"current_steps": 320, "total_steps": 2009, "loss": 0.3862, "lr": 3.9581066701423796e-05, "epoch": 1.1149825783972125, "percentage": 15.93, "elapsed_time": "0:30:12", "remaining_time": "2:39:25"}
|
||||
{"current_steps": 325, "total_steps": 2009, "loss": 0.3896, "lr": 3.954494978769491e-05, "epoch": 1.132404181184669, "percentage": 16.18, "elapsed_time": "0:30:40", "remaining_time": "2:38:55"}
|
||||
{"current_steps": 330, "total_steps": 2009, "loss": 0.3783, "lr": 3.950735759303456e-05, "epoch": 1.1498257839721253, "percentage": 16.43, "elapsed_time": "0:31:15", "remaining_time": "2:39:03"}
|
||||
{"current_steps": 335, "total_steps": 2009, "loss": 0.3969, "lr": 3.94682929549557e-05, "epoch": 1.1672473867595818, "percentage": 16.67, "elapsed_time": "0:31:41", "remaining_time": "2:38:23"}
|
||||
{"current_steps": 340, "total_steps": 2009, "loss": 0.3918, "lr": 3.942775882211341e-05, "epoch": 1.1846689895470384, "percentage": 16.92, "elapsed_time": "0:32:05", "remaining_time": "2:37:29"}
|
||||
{"current_steps": 345, "total_steps": 2009, "loss": 0.3854, "lr": 3.938575825408236e-05, "epoch": 1.202090592334495, "percentage": 17.17, "elapsed_time": "0:32:37", "remaining_time": "2:37:20"}
|
||||
{"current_steps": 350, "total_steps": 2009, "loss": 0.4158, "lr": 3.934229442112585e-05, "epoch": 1.2195121951219512, "percentage": 17.42, "elapsed_time": "0:33:05", "remaining_time": "2:36:49"}
|
||||
{"current_steps": 355, "total_steps": 2009, "loss": 0.4017, "lr": 3.929737060395655e-05, "epoch": 1.2369337979094077, "percentage": 17.67, "elapsed_time": "0:33:34", "remaining_time": "2:36:25"}
|
||||
{"current_steps": 360, "total_steps": 2009, "loss": 0.3924, "lr": 3.925099019348878e-05, "epoch": 1.254355400696864, "percentage": 17.92, "elapsed_time": "0:34:03", "remaining_time": "2:36:01"}
|
||||
{"current_steps": 365, "total_steps": 2009, "loss": 0.3682, "lr": 3.920315669058268e-05, "epoch": 1.2717770034843205, "percentage": 18.17, "elapsed_time": "0:34:29", "remaining_time": "2:35:20"}
|
||||
{"current_steps": 370, "total_steps": 2009, "loss": 0.3838, "lr": 3.9153873705779874e-05, "epoch": 1.289198606271777, "percentage": 18.42, "elapsed_time": "0:35:01", "remaining_time": "2:35:07"}
|
||||
{"current_steps": 375, "total_steps": 2009, "loss": 0.3922, "lr": 3.910314495903095e-05, "epoch": 1.3066202090592334, "percentage": 18.67, "elapsed_time": "0:35:31", "remaining_time": "2:34:49"}
|
||||
{"current_steps": 380, "total_steps": 2009, "loss": 0.3723, "lr": 3.9050974279414714e-05, "epoch": 1.32404181184669, "percentage": 18.91, "elapsed_time": "0:36:04", "remaining_time": "2:34:37"}
|
||||
{"current_steps": 385, "total_steps": 2009, "loss": 0.3802, "lr": 3.899736560484912e-05, "epoch": 1.3414634146341464, "percentage": 19.16, "elapsed_time": "0:36:30", "remaining_time": "2:33:59"}
|
||||
{"current_steps": 390, "total_steps": 2009, "loss": 0.3709, "lr": 3.8942322981794055e-05, "epoch": 1.3588850174216027, "percentage": 19.41, "elapsed_time": "0:36:55", "remaining_time": "2:33:15"}
|
||||
{"current_steps": 395, "total_steps": 2009, "loss": 0.3765, "lr": 3.8885850564945914e-05, "epoch": 1.3763066202090593, "percentage": 19.66, "elapsed_time": "0:37:19", "remaining_time": "2:32:30"}
|
||||
{"current_steps": 400, "total_steps": 2009, "loss": 0.3801, "lr": 3.882795261692397e-05, "epoch": 1.3937282229965158, "percentage": 19.91, "elapsed_time": "0:37:51", "remaining_time": "2:32:18"}
|
||||
{"current_steps": 405, "total_steps": 2009, "loss": 0.3663, "lr": 3.876863350794863e-05, "epoch": 1.411149825783972, "percentage": 20.16, "elapsed_time": "0:38:17", "remaining_time": "2:31:37"}
|
||||
{"current_steps": 410, "total_steps": 2009, "loss": 0.3848, "lr": 3.87078977155116e-05, "epoch": 1.4285714285714286, "percentage": 20.41, "elapsed_time": "0:38:41", "remaining_time": "2:30:54"}
|
||||
{"current_steps": 415, "total_steps": 2009, "loss": 0.369, "lr": 3.864574982403789e-05, "epoch": 1.445993031358885, "percentage": 20.66, "elapsed_time": "0:39:08", "remaining_time": "2:30:22"}
|
||||
{"current_steps": 420, "total_steps": 2009, "loss": 0.3719, "lr": 3.858219452453975e-05, "epoch": 1.4634146341463414, "percentage": 20.91, "elapsed_time": "0:39:32", "remaining_time": "2:29:37"}
|
||||
{"current_steps": 425, "total_steps": 2009, "loss": 0.3516, "lr": 3.851723661426264e-05, "epoch": 1.480836236933798, "percentage": 21.15, "elapsed_time": "0:40:02", "remaining_time": "2:29:14"}
|
||||
{"current_steps": 430, "total_steps": 2009, "loss": 0.3911, "lr": 3.845088099632309e-05, "epoch": 1.4982578397212545, "percentage": 21.4, "elapsed_time": "0:40:33", "remaining_time": "2:28:56"}
|
||||
{"current_steps": 435, "total_steps": 2009, "loss": 0.3801, "lr": 3.838313267933861e-05, "epoch": 1.5156794425087108, "percentage": 21.65, "elapsed_time": "0:41:04", "remaining_time": "2:28:39"}
|
||||
{"current_steps": 440, "total_steps": 2009, "loss": 0.381, "lr": 3.8313996777049644e-05, "epoch": 1.533101045296167, "percentage": 21.9, "elapsed_time": "0:41:32", "remaining_time": "2:28:06"}
|
||||
{"current_steps": 445, "total_steps": 2009, "loss": 0.3624, "lr": 3.824347850793357e-05, "epoch": 1.5505226480836236, "percentage": 22.15, "elapsed_time": "0:41:55", "remaining_time": "2:27:21"}
|
||||
{"current_steps": 450, "total_steps": 2009, "loss": 0.3666, "lr": 3.817158319481081e-05, "epoch": 1.5679442508710801, "percentage": 22.4, "elapsed_time": "0:42:22", "remaining_time": "2:26:47"}
|
||||
{"current_steps": 455, "total_steps": 2009, "loss": 0.3862, "lr": 3.8098316264443033e-05, "epoch": 1.5853658536585367, "percentage": 22.65, "elapsed_time": "0:42:47", "remaining_time": "2:26:10"}
|
||||
{"current_steps": 460, "total_steps": 2009, "loss": 0.3663, "lr": 3.802368324712357e-05, "epoch": 1.6027874564459932, "percentage": 22.9, "elapsed_time": "0:43:08", "remaining_time": "2:25:14"}
|
||||
{"current_steps": 465, "total_steps": 2009, "loss": 0.3602, "lr": 3.794768977625994e-05, "epoch": 1.6202090592334495, "percentage": 23.15, "elapsed_time": "0:43:32", "remaining_time": "2:24:34"}
|
||||
{"current_steps": 470, "total_steps": 2009, "loss": 0.3997, "lr": 3.787034158794867e-05, "epoch": 1.6376306620209058, "percentage": 23.39, "elapsed_time": "0:43:53", "remaining_time": "2:23:44"}
|
||||
{"current_steps": 475, "total_steps": 2009, "loss": 0.378, "lr": 3.779164452054231e-05, "epoch": 1.6550522648083623, "percentage": 23.64, "elapsed_time": "0:44:19", "remaining_time": "2:23:08"}
|
||||
{"current_steps": 480, "total_steps": 2009, "loss": 0.3874, "lr": 3.771160451420873e-05, "epoch": 1.6724738675958188, "percentage": 23.89, "elapsed_time": "0:44:46", "remaining_time": "2:22:37"}
|
||||
{"current_steps": 485, "total_steps": 2009, "loss": 0.3648, "lr": 3.763022761048276e-05, "epoch": 1.6898954703832754, "percentage": 24.14, "elapsed_time": "0:45:13", "remaining_time": "2:22:05"}
|
||||
{"current_steps": 490, "total_steps": 2009, "loss": 0.3817, "lr": 3.7547519951810196e-05, "epoch": 1.7073170731707317, "percentage": 24.39, "elapsed_time": "0:45:39", "remaining_time": "2:21:32"}
|
||||
{"current_steps": 495, "total_steps": 2009, "loss": 0.3681, "lr": 3.7463487781084116e-05, "epoch": 1.7247386759581882, "percentage": 24.64, "elapsed_time": "0:46:06", "remaining_time": "2:21:02"}
|
||||
{"current_steps": 500, "total_steps": 2009, "loss": 0.3869, "lr": 3.737813744117366e-05, "epoch": 1.7421602787456445, "percentage": 24.89, "elapsed_time": "0:46:39", "remaining_time": "2:20:49"}
|
||||
{"current_steps": 505, "total_steps": 2009, "loss": 0.3715, "lr": 3.72914753744453e-05, "epoch": 1.759581881533101, "percentage": 25.14, "elapsed_time": "0:47:07", "remaining_time": "2:20:22"}
|
||||
{"current_steps": 510, "total_steps": 2009, "loss": 0.3628, "lr": 3.7203508122276517e-05, "epoch": 1.7770034843205575, "percentage": 25.39, "elapsed_time": "0:47:33", "remaining_time": "2:19:45"}
|
||||
{"current_steps": 515, "total_steps": 2009, "loss": 0.3702, "lr": 3.7114242324562066e-05, "epoch": 1.794425087108014, "percentage": 25.63, "elapsed_time": "0:47:59", "remaining_time": "2:19:14"}
|
||||
{"current_steps": 520, "total_steps": 2009, "loss": 0.3548, "lr": 3.702368471921281e-05, "epoch": 1.8118466898954704, "percentage": 25.88, "elapsed_time": "0:48:21", "remaining_time": "2:18:27"}
|
||||
{"current_steps": 525, "total_steps": 2009, "loss": 0.3753, "lr": 3.693184214164708e-05, "epoch": 1.8292682926829267, "percentage": 26.13, "elapsed_time": "0:48:47", "remaining_time": "2:17:54"}
|
||||
{"current_steps": 530, "total_steps": 2009, "loss": 0.351, "lr": 3.683872152427479e-05, "epoch": 1.8466898954703832, "percentage": 26.38, "elapsed_time": "0:49:12", "remaining_time": "2:17:18"}
|
||||
{"current_steps": 535, "total_steps": 2009, "loss": 0.3938, "lr": 3.674432989597411e-05, "epoch": 1.8641114982578397, "percentage": 26.63, "elapsed_time": "0:49:38", "remaining_time": "2:16:45"}
|
||||
{"current_steps": 540, "total_steps": 2009, "loss": 0.379, "lr": 3.664867438156096e-05, "epoch": 1.8815331010452963, "percentage": 26.88, "elapsed_time": "0:50:06", "remaining_time": "2:16:19"}
|
||||
{"current_steps": 545, "total_steps": 2009, "loss": 0.3539, "lr": 3.65517622012512e-05, "epoch": 1.8989547038327528, "percentage": 27.13, "elapsed_time": "0:50:38", "remaining_time": "2:16:02"}
|
||||
{"current_steps": 550, "total_steps": 2009, "loss": 0.3655, "lr": 3.645360067011564e-05, "epoch": 1.916376306620209, "percentage": 27.38, "elapsed_time": "0:51:02", "remaining_time": "2:15:24"}
|
||||
{"current_steps": 555, "total_steps": 2009, "loss": 0.3849, "lr": 3.6354197197527884e-05, "epoch": 1.9337979094076654, "percentage": 27.63, "elapsed_time": "0:51:35", "remaining_time": "2:15:08"}
|
||||
{"current_steps": 560, "total_steps": 2009, "loss": 0.3643, "lr": 3.625355928660506e-05, "epoch": 1.951219512195122, "percentage": 27.87, "elapsed_time": "0:52:06", "remaining_time": "2:14:49"}
|
||||
{"current_steps": 565, "total_steps": 2009, "loss": 0.379, "lr": 3.6151694533641496e-05, "epoch": 1.9686411149825784, "percentage": 28.12, "elapsed_time": "0:52:31", "remaining_time": "2:14:15"}
|
||||
{"current_steps": 570, "total_steps": 2009, "loss": 0.3805, "lr": 3.6048610627535296e-05, "epoch": 1.986062717770035, "percentage": 28.37, "elapsed_time": "0:53:05", "remaining_time": "2:14:02"}
|
||||
{"current_steps": 575, "total_steps": 2009, "loss": 0.3636, "lr": 3.5944315349208016e-05, "epoch": 2.0034843205574915, "percentage": 28.62, "elapsed_time": "0:53:39", "remaining_time": "2:13:49"}
|
||||
{"current_steps": 580, "total_steps": 2009, "loss": 0.3174, "lr": 3.5838816571017324e-05, "epoch": 2.0209059233449476, "percentage": 28.87, "elapsed_time": "0:54:06", "remaining_time": "2:13:17"}
|
||||
{"current_steps": 585, "total_steps": 2009, "loss": 0.3089, "lr": 3.57321222561628e-05, "epoch": 2.038327526132404, "percentage": 29.12, "elapsed_time": "0:54:34", "remaining_time": "2:12:51"}
|
||||
{"current_steps": 590, "total_steps": 2009, "loss": 0.3227, "lr": 3.562424045808483e-05, "epoch": 2.0557491289198606, "percentage": 29.37, "elapsed_time": "0:55:01", "remaining_time": "2:12:21"}
|
||||
{"current_steps": 595, "total_steps": 2009, "loss": 0.3452, "lr": 3.551517931985676e-05, "epoch": 2.073170731707317, "percentage": 29.62, "elapsed_time": "0:55:29", "remaining_time": "2:11:52"}
|
||||
{"current_steps": 600, "total_steps": 2009, "loss": 0.3259, "lr": 3.5404947073570225e-05, "epoch": 2.0905923344947737, "percentage": 29.87, "elapsed_time": "0:55:53", "remaining_time": "2:11:14"}
|
||||
{"current_steps": 605, "total_steps": 2009, "loss": 0.3434, "lr": 3.529355203971377e-05, "epoch": 2.10801393728223, "percentage": 30.11, "elapsed_time": "0:56:20", "remaining_time": "2:10:45"}
|
||||
{"current_steps": 610, "total_steps": 2009, "loss": 0.3301, "lr": 3.518100262654483e-05, "epoch": 2.1254355400696863, "percentage": 30.36, "elapsed_time": "0:56:50", "remaining_time": "2:10:22"}
|
||||
{"current_steps": 615, "total_steps": 2009, "loss": 0.3288, "lr": 3.506730732945506e-05, "epoch": 2.142857142857143, "percentage": 30.61, "elapsed_time": "0:57:11", "remaining_time": "2:09:37"}
|
||||
{"current_steps": 620, "total_steps": 2009, "loss": 0.3532, "lr": 3.495247473032905e-05, "epoch": 2.1602787456445993, "percentage": 30.86, "elapsed_time": "0:57:47", "remaining_time": "2:09:29"}
|
||||
{"current_steps": 625, "total_steps": 2009, "loss": 0.348, "lr": 3.483651349689661e-05, "epoch": 2.177700348432056, "percentage": 31.11, "elapsed_time": "0:58:18", "remaining_time": "2:09:06"}
|
||||
{"current_steps": 630, "total_steps": 2009, "loss": 0.3206, "lr": 3.4719432382078496e-05, "epoch": 2.1951219512195124, "percentage": 31.36, "elapsed_time": "0:58:44", "remaining_time": "2:08:34"}
|
||||
{"current_steps": 635, "total_steps": 2009, "loss": 0.3286, "lr": 3.4601240223325704e-05, "epoch": 2.2125435540069684, "percentage": 31.61, "elapsed_time": "0:59:15", "remaining_time": "2:08:14"}
|
||||
{"current_steps": 640, "total_steps": 2009, "loss": 0.343, "lr": 3.448194594195244e-05, "epoch": 2.229965156794425, "percentage": 31.86, "elapsed_time": "0:59:40", "remaining_time": "2:07:39"}
|
||||
{"current_steps": 645, "total_steps": 2009, "loss": 0.3471, "lr": 3.436155854246269e-05, "epoch": 2.2473867595818815, "percentage": 32.11, "elapsed_time": "1:00:06", "remaining_time": "2:07:06"}
|
||||
{"current_steps": 650, "total_steps": 2009, "loss": 0.343, "lr": 3.42400871118706e-05, "epoch": 2.264808362369338, "percentage": 32.35, "elapsed_time": "1:00:33", "remaining_time": "2:06:37"}
|
||||
{"current_steps": 655, "total_steps": 2009, "loss": 0.341, "lr": 3.411754081901451e-05, "epoch": 2.2822299651567945, "percentage": 32.6, "elapsed_time": "1:01:02", "remaining_time": "2:06:10"}
|
||||
{"current_steps": 660, "total_steps": 2009, "loss": 0.3287, "lr": 3.3993928913864934e-05, "epoch": 2.2996515679442506, "percentage": 32.85, "elapsed_time": "1:01:30", "remaining_time": "2:05:43"}
|
||||
{"current_steps": 665, "total_steps": 2009, "loss": 0.3447, "lr": 3.386926072682632e-05, "epoch": 2.317073170731707, "percentage": 33.1, "elapsed_time": "1:01:58", "remaining_time": "2:05:16"}
|
||||
{"current_steps": 670, "total_steps": 2009, "loss": 0.3452, "lr": 3.374354566803281e-05, "epoch": 2.3344947735191637, "percentage": 33.35, "elapsed_time": "1:02:22", "remaining_time": "2:04:39"}
|
||||
{"current_steps": 675, "total_steps": 2009, "loss": 0.3393, "lr": 3.361679322663792e-05, "epoch": 2.35191637630662, "percentage": 33.6, "elapsed_time": "1:02:46", "remaining_time": "2:04:03"}
|
||||
{"current_steps": 680, "total_steps": 2009, "loss": 0.3496, "lr": 3.348901297009829e-05, "epoch": 2.3693379790940767, "percentage": 33.85, "elapsed_time": "1:03:12", "remaining_time": "2:03:31"}
|
||||
{"current_steps": 685, "total_steps": 2009, "loss": 0.3213, "lr": 3.3360214543451546e-05, "epoch": 2.3867595818815333, "percentage": 34.1, "elapsed_time": "1:03:40", "remaining_time": "2:03:03"}
|
||||
{"current_steps": 690, "total_steps": 2009, "loss": 0.3274, "lr": 3.323040766858824e-05, "epoch": 2.40418118466899, "percentage": 34.35, "elapsed_time": "1:04:07", "remaining_time": "2:02:35"}
|
||||
{"current_steps": 695, "total_steps": 2009, "loss": 0.3391, "lr": 3.3099602143518054e-05, "epoch": 2.421602787456446, "percentage": 34.59, "elapsed_time": "1:04:35", "remaining_time": "2:02:08"}
|
||||
{"current_steps": 700, "total_steps": 2009, "loss": 0.3306, "lr": 3.2967807841630224e-05, "epoch": 2.4390243902439024, "percentage": 34.84, "elapsed_time": "1:05:07", "remaining_time": "2:01:47"}
|
||||
{"current_steps": 705, "total_steps": 2009, "loss": 0.3408, "lr": 3.2835034710948295e-05, "epoch": 2.456445993031359, "percentage": 35.09, "elapsed_time": "1:05:36", "remaining_time": "2:01:21"}
|
||||
{"current_steps": 710, "total_steps": 2009, "loss": 0.3473, "lr": 3.270129277337919e-05, "epoch": 2.4738675958188154, "percentage": 35.34, "elapsed_time": "1:06:09", "remaining_time": "2:01:02"}
|
||||
{"current_steps": 715, "total_steps": 2009, "loss": 0.3285, "lr": 3.25665921239568e-05, "epoch": 2.491289198606272, "percentage": 35.59, "elapsed_time": "1:06:31", "remaining_time": "2:00:24"}
|
||||
{"current_steps": 720, "total_steps": 2009, "loss": 0.3419, "lr": 3.243094293007995e-05, "epoch": 2.508710801393728, "percentage": 35.84, "elapsed_time": "1:06:57", "remaining_time": "1:59:53"}
|
||||
{"current_steps": 725, "total_steps": 2009, "loss": 0.3303, "lr": 3.2294355430744955e-05, "epoch": 2.5261324041811846, "percentage": 36.09, "elapsed_time": "1:07:29", "remaining_time": "1:59:31"}
|
||||
{"current_steps": 730, "total_steps": 2009, "loss": 0.341, "lr": 3.2156839935772805e-05, "epoch": 2.543554006968641, "percentage": 36.34, "elapsed_time": "1:08:00", "remaining_time": "1:59:09"}
|
||||
{"current_steps": 735, "total_steps": 2009, "loss": 0.3334, "lr": 3.201840682503091e-05, "epoch": 2.5609756097560976, "percentage": 36.59, "elapsed_time": "1:08:29", "remaining_time": "1:58:42"}
|
||||
{"current_steps": 740, "total_steps": 2009, "loss": 0.3263, "lr": 3.1879066547649645e-05, "epoch": 2.578397212543554, "percentage": 36.83, "elapsed_time": "1:08:53", "remaining_time": "1:58:08"}
|
||||
{"current_steps": 745, "total_steps": 2009, "loss": 0.3464, "lr": 3.173882962123364e-05, "epoch": 2.59581881533101, "percentage": 37.08, "elapsed_time": "1:09:23", "remaining_time": "1:57:44"}
|
||||
{"current_steps": 750, "total_steps": 2009, "loss": 0.3255, "lr": 3.1597706631067864e-05, "epoch": 2.6132404181184667, "percentage": 37.33, "elapsed_time": "1:09:55", "remaining_time": "1:57:23"}
|
||||
{"current_steps": 755, "total_steps": 2009, "loss": 0.3331, "lr": 3.145570822931868e-05, "epoch": 2.6306620209059233, "percentage": 37.58, "elapsed_time": "1:10:19", "remaining_time": "1:56:48"}
|
||||
{"current_steps": 760, "total_steps": 2009, "loss": 0.3358, "lr": 3.1312845134229744e-05, "epoch": 2.64808362369338, "percentage": 37.83, "elapsed_time": "1:10:43", "remaining_time": "1:56:13"}
|
||||
{"current_steps": 765, "total_steps": 2009, "loss": 0.3319, "lr": 3.116912812931304e-05, "epoch": 2.6655052264808363, "percentage": 38.08, "elapsed_time": "1:11:06", "remaining_time": "1:55:38"}
|
||||
{"current_steps": 770, "total_steps": 2009, "loss": 0.3202, "lr": 3.102456806253488e-05, "epoch": 2.682926829268293, "percentage": 38.33, "elapsed_time": "1:11:37", "remaining_time": "1:55:15"}
|
||||
{"current_steps": 775, "total_steps": 2009, "loss": 0.3403, "lr": 3.087917584549708e-05, "epoch": 2.7003484320557494, "percentage": 38.58, "elapsed_time": "1:11:59", "remaining_time": "1:54:38"}
|
||||
{"current_steps": 780, "total_steps": 2009, "loss": 0.325, "lr": 3.0732962452613385e-05, "epoch": 2.7177700348432055, "percentage": 38.83, "elapsed_time": "1:12:30", "remaining_time": "1:54:15"}
|
||||
{"current_steps": 785, "total_steps": 2009, "loss": 0.335, "lr": 3.0585938920281075e-05, "epoch": 2.735191637630662, "percentage": 39.07, "elapsed_time": "1:12:49", "remaining_time": "1:53:33"}
|
||||
{"current_steps": 790, "total_steps": 2009, "loss": 0.3257, "lr": 3.0438116346047897e-05, "epoch": 2.7526132404181185, "percentage": 39.32, "elapsed_time": "1:13:21", "remaining_time": "1:53:11"}
|
||||
{"current_steps": 795, "total_steps": 2009, "loss": 0.3281, "lr": 3.0289505887774445e-05, "epoch": 2.770034843205575, "percentage": 39.57, "elapsed_time": "1:13:43", "remaining_time": "1:52:34"}
|
||||
{"current_steps": 800, "total_steps": 2009, "loss": 0.331, "lr": 3.0140118762791935e-05, "epoch": 2.7874564459930316, "percentage": 39.82, "elapsed_time": "1:14:11", "remaining_time": "1:52:07"}
|
||||
{"current_steps": 805, "total_steps": 2009, "loss": 0.3265, "lr": 2.99899662470555e-05, "epoch": 2.8048780487804876, "percentage": 40.07, "elapsed_time": "1:14:38", "remaining_time": "1:51:37"}
|
||||
{"current_steps": 810, "total_steps": 2009, "loss": 0.3379, "lr": 2.9839059674293058e-05, "epoch": 2.822299651567944, "percentage": 40.32, "elapsed_time": "1:15:02", "remaining_time": "1:51:05"}
|
||||
{"current_steps": 815, "total_steps": 2009, "loss": 0.3355, "lr": 2.9687410435149865e-05, "epoch": 2.8397212543554007, "percentage": 40.57, "elapsed_time": "1:15:30", "remaining_time": "1:50:37"}
|
||||
{"current_steps": 820, "total_steps": 2009, "loss": 0.3431, "lr": 2.9535029976328675e-05, "epoch": 2.857142857142857, "percentage": 40.82, "elapsed_time": "1:15:55", "remaining_time": "1:50:04"}
|
||||
{"current_steps": 825, "total_steps": 2009, "loss": 0.3322, "lr": 2.9381929799725764e-05, "epoch": 2.8745644599303137, "percentage": 41.07, "elapsed_time": "1:16:20", "remaining_time": "1:49:33"}
|
||||
{"current_steps": 830, "total_steps": 2009, "loss": 0.338, "lr": 2.9228121461562754e-05, "epoch": 2.89198606271777, "percentage": 41.31, "elapsed_time": "1:16:53", "remaining_time": "1:49:13"}
|
||||
{"current_steps": 835, "total_steps": 2009, "loss": 0.3241, "lr": 2.90736165715143e-05, "epoch": 2.9094076655052263, "percentage": 41.56, "elapsed_time": "1:17:20", "remaining_time": "1:48:44"}
|
||||
{"current_steps": 840, "total_steps": 2009, "loss": 0.3113, "lr": 2.8918426791831815e-05, "epoch": 2.926829268292683, "percentage": 41.81, "elapsed_time": "1:17:45", "remaining_time": "1:48:12"}
|
||||
{"current_steps": 845, "total_steps": 2009, "loss": 0.3354, "lr": 2.8762563836463155e-05, "epoch": 2.9442508710801394, "percentage": 42.06, "elapsed_time": "1:18:14", "remaining_time": "1:47:46"}
|
||||
{"current_steps": 850, "total_steps": 2009, "loss": 0.3198, "lr": 2.860603947016845e-05, "epoch": 2.961672473867596, "percentage": 42.31, "elapsed_time": "1:18:41", "remaining_time": "1:47:18"}
|
||||
{"current_steps": 855, "total_steps": 2009, "loss": 0.36, "lr": 2.8448865507632075e-05, "epoch": 2.979094076655052, "percentage": 42.56, "elapsed_time": "1:19:07", "remaining_time": "1:46:48"}
|
||||
{"current_steps": 860, "total_steps": 2009, "loss": 0.3345, "lr": 2.8291053812570862e-05, "epoch": 2.996515679442509, "percentage": 42.81, "elapsed_time": "1:19:33", "remaining_time": "1:46:17"}
|
||||
{"current_steps": 865, "total_steps": 2009, "loss": 0.3103, "lr": 2.8132616296838623e-05, "epoch": 3.013937282229965, "percentage": 43.06, "elapsed_time": "1:20:01", "remaining_time": "1:45:50"}
|
||||
{"current_steps": 870, "total_steps": 2009, "loss": 0.2778, "lr": 2.7973564919526998e-05, "epoch": 3.0313588850174216, "percentage": 43.31, "elapsed_time": "1:20:37", "remaining_time": "1:45:32"}
|
||||
{"current_steps": 875, "total_steps": 2009, "loss": 0.3086, "lr": 2.7813911686062804e-05, "epoch": 3.048780487804878, "percentage": 43.55, "elapsed_time": "1:21:03", "remaining_time": "1:45:03"}
|
||||
{"current_steps": 880, "total_steps": 2009, "loss": 0.3001, "lr": 2.7653668647301797e-05, "epoch": 3.0662020905923346, "percentage": 43.8, "elapsed_time": "1:21:31", "remaining_time": "1:44:35"}
|
||||
{"current_steps": 885, "total_steps": 2009, "loss": 0.2918, "lr": 2.7492847898619115e-05, "epoch": 3.083623693379791, "percentage": 44.05, "elapsed_time": "1:21:55", "remaining_time": "1:44:03"}
|
||||
{"current_steps": 890, "total_steps": 2009, "loss": 0.31, "lr": 2.733146157899626e-05, "epoch": 3.1010452961672472, "percentage": 44.3, "elapsed_time": "1:22:21", "remaining_time": "1:43:33"}
|
||||
{"current_steps": 895, "total_steps": 2009, "loss": 0.3023, "lr": 2.7169521870104833e-05, "epoch": 3.1184668989547037, "percentage": 44.55, "elapsed_time": "1:22:47", "remaining_time": "1:43:02"}
|
||||
{"current_steps": 900, "total_steps": 2009, "loss": 0.2915, "lr": 2.7007040995387065e-05, "epoch": 3.1358885017421603, "percentage": 44.8, "elapsed_time": "1:23:23", "remaining_time": "1:42:45"}
|
||||
{"current_steps": 905, "total_steps": 2009, "loss": 0.3023, "lr": 2.6844031219133164e-05, "epoch": 3.153310104529617, "percentage": 45.05, "elapsed_time": "1:23:47", "remaining_time": "1:42:12"}
|
||||
{"current_steps": 910, "total_steps": 2009, "loss": 0.3072, "lr": 2.6680504845555577e-05, "epoch": 3.1707317073170733, "percentage": 45.3, "elapsed_time": "1:24:12", "remaining_time": "1:41:42"}
|
||||
{"current_steps": 915, "total_steps": 2009, "loss": 0.308, "lr": 2.651647421786026e-05, "epoch": 3.1881533101045294, "percentage": 45.55, "elapsed_time": "1:24:38", "remaining_time": "1:41:11"}
|
||||
{"current_steps": 920, "total_steps": 2009, "loss": 0.3038, "lr": 2.6351951717315012e-05, "epoch": 3.205574912891986, "percentage": 45.79, "elapsed_time": "1:25:05", "remaining_time": "1:40:43"}
|
||||
{"current_steps": 925, "total_steps": 2009, "loss": 0.295, "lr": 2.618694976231488e-05, "epoch": 3.2229965156794425, "percentage": 46.04, "elapsed_time": "1:25:38", "remaining_time": "1:40:22"}
|
||||
{"current_steps": 930, "total_steps": 2009, "loss": 0.3232, "lr": 2.602148080744484e-05, "epoch": 3.240418118466899, "percentage": 46.29, "elapsed_time": "1:26:05", "remaining_time": "1:39:52"}
|
||||
{"current_steps": 935, "total_steps": 2009, "loss": 0.2872, "lr": 2.5855557342539683e-05, "epoch": 3.2578397212543555, "percentage": 46.54, "elapsed_time": "1:26:35", "remaining_time": "1:39:27"}
|
||||
{"current_steps": 940, "total_steps": 2009, "loss": 0.3147, "lr": 2.5689191891741274e-05, "epoch": 3.275261324041812, "percentage": 46.79, "elapsed_time": "1:26:59", "remaining_time": "1:38:55"}
|
||||
{"current_steps": 945, "total_steps": 2009, "loss": 0.2904, "lr": 2.5522397012553204e-05, "epoch": 3.292682926829268, "percentage": 47.04, "elapsed_time": "1:27:35", "remaining_time": "1:38:37"}
|
||||
{"current_steps": 950, "total_steps": 2009, "loss": 0.3015, "lr": 2.5355185294892945e-05, "epoch": 3.3101045296167246, "percentage": 47.29, "elapsed_time": "1:28:03", "remaining_time": "1:38:10"}
|
||||
{"current_steps": 955, "total_steps": 2009, "loss": 0.2952, "lr": 2.5187569360141534e-05, "epoch": 3.327526132404181, "percentage": 47.54, "elapsed_time": "1:28:27", "remaining_time": "1:37:37"}
|
||||
{"current_steps": 960, "total_steps": 2009, "loss": 0.3007, "lr": 2.5019561860190883e-05, "epoch": 3.3449477351916377, "percentage": 47.78, "elapsed_time": "1:28:50", "remaining_time": "1:37:04"}
|
||||
{"current_steps": 965, "total_steps": 2009, "loss": 0.2861, "lr": 2.485117547648883e-05, "epoch": 3.362369337979094, "percentage": 48.03, "elapsed_time": "1:29:11", "remaining_time": "1:36:29"}
|
||||
{"current_steps": 970, "total_steps": 2009, "loss": 0.301, "lr": 2.468242291908188e-05, "epoch": 3.3797909407665507, "percentage": 48.28, "elapsed_time": "1:29:35", "remaining_time": "1:35:57"}
|
||||
{"current_steps": 975, "total_steps": 2009, "loss": 0.3105, "lr": 2.4513316925655882e-05, "epoch": 3.397212543554007, "percentage": 48.53, "elapsed_time": "1:30:00", "remaining_time": "1:35:27"}
|
||||
{"current_steps": 980, "total_steps": 2009, "loss": 0.3055, "lr": 2.4343870260574528e-05, "epoch": 3.4146341463414633, "percentage": 48.78, "elapsed_time": "1:30:30", "remaining_time": "1:35:01"}
|
||||
{"current_steps": 985, "total_steps": 2009, "loss": 0.2793, "lr": 2.4174095713915898e-05, "epoch": 3.43205574912892, "percentage": 49.03, "elapsed_time": "1:31:04", "remaining_time": "1:34:40"}
|
||||
{"current_steps": 990, "total_steps": 2009, "loss": 0.3102, "lr": 2.4004006100507048e-05, "epoch": 3.4494773519163764, "percentage": 49.28, "elapsed_time": "1:31:32", "remaining_time": "1:34:13"}
|
||||
{"current_steps": 995, "total_steps": 2009, "loss": 0.2941, "lr": 2.3833614258956725e-05, "epoch": 3.466898954703833, "percentage": 49.53, "elapsed_time": "1:32:02", "remaining_time": "1:33:48"}
|
||||
{"current_steps": 1000, "total_steps": 2009, "loss": 0.3022, "lr": 2.3662933050686293e-05, "epoch": 3.484320557491289, "percentage": 49.78, "elapsed_time": "1:32:31", "remaining_time": "1:33:21"}
|
||||
{"current_steps": 1005, "total_steps": 2009, "loss": 0.2991, "lr": 2.3491975358958925e-05, "epoch": 3.5017421602787455, "percentage": 50.02, "elapsed_time": "1:32:56", "remaining_time": "1:32:51"}
|
||||
{"current_steps": 1010, "total_steps": 2009, "loss": 0.3025, "lr": 2.3320754087907176e-05, "epoch": 3.519163763066202, "percentage": 50.27, "elapsed_time": "1:33:23", "remaining_time": "1:32:22"}
|
||||
{"current_steps": 1015, "total_steps": 2009, "loss": 0.2821, "lr": 2.314928216155893e-05, "epoch": 3.5365853658536586, "percentage": 50.52, "elapsed_time": "1:33:46", "remaining_time": "1:31:50"}
|
||||
{"current_steps": 1020, "total_steps": 2009, "loss": 0.3093, "lr": 2.297757252286191e-05, "epoch": 3.554006968641115, "percentage": 50.77, "elapsed_time": "1:34:12", "remaining_time": "1:31:20"}
|
||||
{"current_steps": 1025, "total_steps": 2009, "loss": 0.3047, "lr": 2.2805638132706696e-05, "epoch": 3.571428571428571, "percentage": 51.02, "elapsed_time": "1:34:37", "remaining_time": "1:30:50"}
|
||||
{"current_steps": 1030, "total_steps": 2009, "loss": 0.2992, "lr": 2.2633491968948454e-05, "epoch": 3.588850174216028, "percentage": 51.27, "elapsed_time": "1:35:01", "remaining_time": "1:30:19"}
|
||||
{"current_steps": 1035, "total_steps": 2009, "loss": 0.2833, "lr": 2.246114702542729e-05, "epoch": 3.6062717770034842, "percentage": 51.52, "elapsed_time": "1:35:32", "remaining_time": "1:29:54"}
|
||||
{"current_steps": 1040, "total_steps": 2009, "loss": 0.3232, "lr": 2.228861631098753e-05, "epoch": 3.6236933797909407, "percentage": 51.77, "elapsed_time": "1:35:56", "remaining_time": "1:29:23"}
|
||||
{"current_steps": 1045, "total_steps": 2009, "loss": 0.2973, "lr": 2.2115912848495725e-05, "epoch": 3.6411149825783973, "percentage": 52.02, "elapsed_time": "1:36:27", "remaining_time": "1:28:58"}
|
||||
{"current_steps": 1050, "total_steps": 2009, "loss": 0.3058, "lr": 2.194304967385772e-05, "epoch": 3.658536585365854, "percentage": 52.26, "elapsed_time": "1:36:58", "remaining_time": "1:28:34"}
|
||||
{"current_steps": 1055, "total_steps": 2009, "loss": 0.2897, "lr": 2.177003983503465e-05, "epoch": 3.6759581881533103, "percentage": 52.51, "elapsed_time": "1:37:24", "remaining_time": "1:28:05"}
|
||||
{"current_steps": 1060, "total_steps": 2009, "loss": 0.2989, "lr": 2.1596896391058082e-05, "epoch": 3.6933797909407664, "percentage": 52.76, "elapsed_time": "1:37:49", "remaining_time": "1:27:34"}
|
||||
{"current_steps": 1065, "total_steps": 2009, "loss": 0.2905, "lr": 2.1423632411044283e-05, "epoch": 3.710801393728223, "percentage": 53.01, "elapsed_time": "1:38:09", "remaining_time": "1:27:00"}
|
||||
{"current_steps": 1070, "total_steps": 2009, "loss": 0.3053, "lr": 2.1250260973207757e-05, "epoch": 3.7282229965156795, "percentage": 53.26, "elapsed_time": "1:38:31", "remaining_time": "1:26:27"}
|
||||
{"current_steps": 1075, "total_steps": 2009, "loss": 0.2897, "lr": 2.1076795163874096e-05, "epoch": 3.745644599303136, "percentage": 53.51, "elapsed_time": "1:38:59", "remaining_time": "1:26:00"}
|
||||
{"current_steps": 1080, "total_steps": 2009, "loss": 0.2959, "lr": 2.0903248076492152e-05, "epoch": 3.7630662020905925, "percentage": 53.76, "elapsed_time": "1:39:31", "remaining_time": "1:25:36"}
|
||||
{"current_steps": 1085, "total_steps": 2009, "loss": 0.2978, "lr": 2.072963281064579e-05, "epoch": 3.7804878048780486, "percentage": 54.01, "elapsed_time": "1:39:55", "remaining_time": "1:25:06"}
|
||||
{"current_steps": 1090, "total_steps": 2009, "loss": 0.2898, "lr": 2.0555962471065082e-05, "epoch": 3.797909407665505, "percentage": 54.26, "elapsed_time": "1:40:21", "remaining_time": "1:24:37"}
|
||||
{"current_steps": 1095, "total_steps": 2009, "loss": 0.317, "lr": 2.0382250166637133e-05, "epoch": 3.8153310104529616, "percentage": 54.5, "elapsed_time": "1:40:46", "remaining_time": "1:24:07"}
|
||||
{"current_steps": 1100, "total_steps": 2009, "loss": 0.3014, "lr": 2.020850900941662e-05, "epoch": 3.832752613240418, "percentage": 54.75, "elapsed_time": "1:41:15", "remaining_time": "1:23:40"}
|
||||
{"current_steps": 1105, "total_steps": 2009, "loss": 0.3022, "lr": 2.0034752113636072e-05, "epoch": 3.8501742160278747, "percentage": 55.0, "elapsed_time": "1:41:45", "remaining_time": "1:23:15"}
|
||||
{"current_steps": 1110, "total_steps": 2009, "loss": 0.2898, "lr": 1.9860992594715988e-05, "epoch": 3.8675958188153308, "percentage": 55.25, "elapsed_time": "1:42:08", "remaining_time": "1:22:43"}
|
||||
{"current_steps": 1115, "total_steps": 2009, "loss": 0.2967, "lr": 1.968724356827485e-05, "epoch": 3.8850174216027873, "percentage": 55.5, "elapsed_time": "1:42:33", "remaining_time": "1:22:13"}
|
||||
{"current_steps": 1120, "total_steps": 2009, "loss": 0.2993, "lr": 1.9513518149139183e-05, "epoch": 3.902439024390244, "percentage": 55.75, "elapsed_time": "1:43:02", "remaining_time": "1:21:47"}
|
||||
{"current_steps": 1125, "total_steps": 2009, "loss": 0.3074, "lr": 1.9339829450353574e-05, "epoch": 3.9198606271777003, "percentage": 56.0, "elapsed_time": "1:43:26", "remaining_time": "1:21:17"}
|
||||
{"current_steps": 1130, "total_steps": 2009, "loss": 0.3016, "lr": 1.9166190582190907e-05, "epoch": 3.937282229965157, "percentage": 56.25, "elapsed_time": "1:43:56", "remaining_time": "1:20:51"}
|
||||
{"current_steps": 1135, "total_steps": 2009, "loss": 0.3052, "lr": 1.89926146511628e-05, "epoch": 3.9547038327526134, "percentage": 56.5, "elapsed_time": "1:44:27", "remaining_time": "1:20:26"}
|
||||
{"current_steps": 1140, "total_steps": 2009, "loss": 0.3073, "lr": 1.8819114759030255e-05, "epoch": 3.97212543554007, "percentage": 56.74, "elapsed_time": "1:44:53", "remaining_time": "1:19:57"}
|
||||
{"current_steps": 1145, "total_steps": 2009, "loss": 0.313, "lr": 1.8645704001814766e-05, "epoch": 3.989547038327526, "percentage": 56.99, "elapsed_time": "1:45:19", "remaining_time": "1:19:28"}
|
||||
{"current_steps": 1150, "total_steps": 2009, "loss": 0.2716, "lr": 1.84723954688098e-05, "epoch": 4.006968641114983, "percentage": 57.24, "elapsed_time": "1:45:50", "remaining_time": "1:19:03"}
|
||||
{"current_steps": 1155, "total_steps": 2009, "loss": 0.2572, "lr": 1.8299202241592785e-05, "epoch": 4.024390243902439, "percentage": 57.49, "elapsed_time": "1:46:21", "remaining_time": "1:18:38"}
|
||||
{"current_steps": 1160, "total_steps": 2009, "loss": 0.2743, "lr": 1.8126137393037687e-05, "epoch": 4.041811846689895, "percentage": 57.74, "elapsed_time": "1:46:45", "remaining_time": "1:18:08"}
|
||||
{"current_steps": 1165, "total_steps": 2009, "loss": 0.2682, "lr": 1.795321398632831e-05, "epoch": 4.059233449477352, "percentage": 57.99, "elapsed_time": "1:47:18", "remaining_time": "1:17:44"}
|
||||
{"current_steps": 1170, "total_steps": 2009, "loss": 0.2715, "lr": 1.778044507397217e-05, "epoch": 4.076655052264808, "percentage": 58.24, "elapsed_time": "1:47:52", "remaining_time": "1:17:21"}
|
||||
{"current_steps": 1175, "total_steps": 2009, "loss": 0.2917, "lr": 1.7607843696815376e-05, "epoch": 4.094076655052265, "percentage": 58.49, "elapsed_time": "1:48:12", "remaining_time": "1:16:48"}
|
||||
{"current_steps": 1180, "total_steps": 2009, "loss": 0.2737, "lr": 1.743542288305821e-05, "epoch": 4.111498257839721, "percentage": 58.74, "elapsed_time": "1:48:30", "remaining_time": "1:16:14"}
|
||||
{"current_steps": 1185, "total_steps": 2009, "loss": 0.2744, "lr": 1.726319564727176e-05, "epoch": 4.128919860627177, "percentage": 58.98, "elapsed_time": "1:49:00", "remaining_time": "1:15:47"}
|
||||
{"current_steps": 1190, "total_steps": 2009, "loss": 0.2671, "lr": 1.7091174989415607e-05, "epoch": 4.146341463414634, "percentage": 59.23, "elapsed_time": "1:49:26", "remaining_time": "1:15:19"}
|
||||
{"current_steps": 1195, "total_steps": 2009, "loss": 0.2799, "lr": 1.69193738938565e-05, "epoch": 4.16376306620209, "percentage": 59.48, "elapsed_time": "1:49:54", "remaining_time": "1:14:52"}
|
||||
{"current_steps": 1200, "total_steps": 2009, "loss": 0.283, "lr": 1.6747805328388314e-05, "epoch": 4.181184668989547, "percentage": 59.73, "elapsed_time": "1:50:18", "remaining_time": "1:14:22"}
|
||||
{"current_steps": 1205, "total_steps": 2009, "loss": 0.2949, "lr": 1.6576482243253243e-05, "epoch": 4.198606271777003, "percentage": 59.98, "elapsed_time": "1:50:47", "remaining_time": "1:13:55"}
|
||||
{"current_steps": 1210, "total_steps": 2009, "loss": 0.292, "lr": 1.6405417570164244e-05, "epoch": 4.21602787456446, "percentage": 60.23, "elapsed_time": "1:51:09", "remaining_time": "1:13:23"}
|
||||
{"current_steps": 1215, "total_steps": 2009, "loss": 0.2925, "lr": 1.623462422132896e-05, "epoch": 4.2334494773519165, "percentage": 60.48, "elapsed_time": "1:51:29", "remaining_time": "1:12:51"}
|
||||
{"current_steps": 1220, "total_steps": 2009, "loss": 0.2672, "lr": 1.606411508847513e-05, "epoch": 4.2508710801393725, "percentage": 60.73, "elapsed_time": "1:51:50", "remaining_time": "1:12:19"}
|
||||
{"current_steps": 1225, "total_steps": 2009, "loss": 0.2585, "lr": 1.5893903041877413e-05, "epoch": 4.2682926829268295, "percentage": 60.98, "elapsed_time": "1:52:22", "remaining_time": "1:11:55"}
|
||||
{"current_steps": 1230, "total_steps": 2009, "loss": 0.264, "lr": 1.5724000929385984e-05, "epoch": 4.285714285714286, "percentage": 61.22, "elapsed_time": "1:52:48", "remaining_time": "1:11:26"}
|
||||
{"current_steps": 1235, "total_steps": 2009, "loss": 0.2681, "lr": 1.5554421575456765e-05, "epoch": 4.303135888501743, "percentage": 61.47, "elapsed_time": "1:53:13", "remaining_time": "1:10:57"}
|
||||
{"current_steps": 1240, "total_steps": 2009, "loss": 0.2741, "lr": 1.5385177780183372e-05, "epoch": 4.320557491289199, "percentage": 61.72, "elapsed_time": "1:53:37", "remaining_time": "1:10:27"}
|
||||
{"current_steps": 1245, "total_steps": 2009, "loss": 0.2732, "lr": 1.5216282318330983e-05, "epoch": 4.337979094076655, "percentage": 61.97, "elapsed_time": "1:54:13", "remaining_time": "1:10:05"}
|
||||
{"current_steps": 1250, "total_steps": 2009, "loss": 0.2562, "lr": 1.5047747938372078e-05, "epoch": 4.355400696864112, "percentage": 62.22, "elapsed_time": "1:54:39", "remaining_time": "1:09:37"}
|
||||
{"current_steps": 1255, "total_steps": 2009, "loss": 0.2855, "lr": 1.4879587361524136e-05, "epoch": 4.372822299651568, "percentage": 62.47, "elapsed_time": "1:55:02", "remaining_time": "1:09:07"}
|
||||
{"current_steps": 1260, "total_steps": 2009, "loss": 0.2709, "lr": 1.4711813280789443e-05, "epoch": 4.390243902439025, "percentage": 62.72, "elapsed_time": "1:55:26", "remaining_time": "1:08:37"}
|
||||
{"current_steps": 1265, "total_steps": 2009, "loss": 0.2779, "lr": 1.4544438359997024e-05, "epoch": 4.407665505226481, "percentage": 62.97, "elapsed_time": "1:55:52", "remaining_time": "1:08:09"}
|
||||
{"current_steps": 1270, "total_steps": 2009, "loss": 0.2627, "lr": 1.4377475232846714e-05, "epoch": 4.425087108013937, "percentage": 63.22, "elapsed_time": "1:56:21", "remaining_time": "1:07:42"}
|
||||
{"current_steps": 1275, "total_steps": 2009, "loss": 0.2753, "lr": 1.4210936501955572e-05, "epoch": 4.442508710801394, "percentage": 63.46, "elapsed_time": "1:56:48", "remaining_time": "1:07:14"}
|
||||
{"current_steps": 1280, "total_steps": 2009, "loss": 0.2839, "lr": 1.4044834737906634e-05, "epoch": 4.45993031358885, "percentage": 63.71, "elapsed_time": "1:57:20", "remaining_time": "1:06:50"}
|
||||
{"current_steps": 1285, "total_steps": 2009, "loss": 0.2731, "lr": 1.3879182478300029e-05, "epoch": 4.477351916376307, "percentage": 63.96, "elapsed_time": "1:57:46", "remaining_time": "1:06:21"}
|
||||
{"current_steps": 1290, "total_steps": 2009, "loss": 0.2624, "lr": 1.371399222680666e-05, "epoch": 4.494773519163763, "percentage": 64.21, "elapsed_time": "1:58:17", "remaining_time": "1:05:55"}
|
||||
{"current_steps": 1295, "total_steps": 2009, "loss": 0.2658, "lr": 1.354927645222439e-05, "epoch": 4.512195121951219, "percentage": 64.46, "elapsed_time": "1:58:54", "remaining_time": "1:05:33"}
|
||||
{"current_steps": 1300, "total_steps": 2009, "loss": 0.2852, "lr": 1.3385047587536879e-05, "epoch": 4.529616724738676, "percentage": 64.71, "elapsed_time": "1:59:20", "remaining_time": "1:05:05"}
|
||||
{"current_steps": 1305, "total_steps": 2009, "loss": 0.2754, "lr": 1.3221318028975116e-05, "epoch": 4.547038327526132, "percentage": 64.96, "elapsed_time": "1:59:41", "remaining_time": "1:04:33"}
|
||||
{"current_steps": 1310, "total_steps": 2009, "loss": 0.275, "lr": 1.3058100135081775e-05, "epoch": 4.564459930313589, "percentage": 65.21, "elapsed_time": "2:00:01", "remaining_time": "1:04:02"}
|
||||
{"current_steps": 1315, "total_steps": 2009, "loss": 0.2876, "lr": 1.2895406225778316e-05, "epoch": 4.581881533101045, "percentage": 65.46, "elapsed_time": "2:00:25", "remaining_time": "1:03:33"}
|
||||
{"current_steps": 1320, "total_steps": 2009, "loss": 0.2858, "lr": 1.2733248581435096e-05, "epoch": 4.599303135888501, "percentage": 65.7, "elapsed_time": "2:00:55", "remaining_time": "1:03:07"}
|
||||
{"current_steps": 1325, "total_steps": 2009, "loss": 0.2626, "lr": 1.257163944194443e-05, "epoch": 4.616724738675958, "percentage": 65.95, "elapsed_time": "2:01:25", "remaining_time": "1:02:41"}
|
||||
{"current_steps": 1330, "total_steps": 2009, "loss": 0.2766, "lr": 1.2410591005796695e-05, "epoch": 4.634146341463414, "percentage": 66.2, "elapsed_time": "2:01:58", "remaining_time": "1:02:16"}
|
||||
{"current_steps": 1335, "total_steps": 2009, "loss": 0.2681, "lr": 1.2250115429159562e-05, "epoch": 4.651567944250871, "percentage": 66.45, "elapsed_time": "2:02:26", "remaining_time": "1:01:49"}
|
||||
{"current_steps": 1340, "total_steps": 2009, "loss": 0.2797, "lr": 1.2090224824960455e-05, "epoch": 4.668989547038327, "percentage": 66.7, "elapsed_time": "2:02:49", "remaining_time": "1:01:19"}
|
||||
{"current_steps": 1345, "total_steps": 2009, "loss": 0.2836, "lr": 1.1930931261972231e-05, "epoch": 4.686411149825784, "percentage": 66.95, "elapsed_time": "2:03:11", "remaining_time": "1:00:48"}
|
||||
{"current_steps": 1350, "total_steps": 2009, "loss": 0.2858, "lr": 1.1772246763902206e-05, "epoch": 4.70383275261324, "percentage": 67.2, "elapsed_time": "2:03:37", "remaining_time": "1:00:20"}
|
||||
{"current_steps": 1355, "total_steps": 2009, "loss": 0.2767, "lr": 1.1614183308484637e-05, "epoch": 4.7212543554006965, "percentage": 67.45, "elapsed_time": "2:04:01", "remaining_time": "0:59:51"}
|
||||
{"current_steps": 1360, "total_steps": 2009, "loss": 0.2659, "lr": 1.1456752826576535e-05, "epoch": 4.7386759581881535, "percentage": 67.7, "elapsed_time": "2:04:27", "remaining_time": "0:59:23"}
|
||||
{"current_steps": 1365, "total_steps": 2009, "loss": 0.2669, "lr": 1.1299967201257198e-05, "epoch": 4.7560975609756095, "percentage": 67.94, "elapsed_time": "2:04:53", "remaining_time": "0:58:55"}
|
||||
{"current_steps": 1370, "total_steps": 2009, "loss": 0.2802, "lr": 1.114383826693121e-05, "epoch": 4.7735191637630665, "percentage": 68.19, "elapsed_time": "2:05:17", "remaining_time": "0:58:26"}
|
||||
{"current_steps": 1375, "total_steps": 2009, "loss": 0.288, "lr": 1.0988377808435184e-05, "epoch": 4.790940766550523, "percentage": 68.44, "elapsed_time": "2:05:47", "remaining_time": "0:58:00"}
|
||||
{"current_steps": 1380, "total_steps": 2009, "loss": 0.2761, "lr": 1.0833597560148181e-05, "epoch": 4.80836236933798, "percentage": 68.69, "elapsed_time": "2:06:15", "remaining_time": "0:57:32"}
|
||||
{"current_steps": 1385, "total_steps": 2009, "loss": 0.2821, "lr": 1.0679509205106068e-05, "epoch": 4.825783972125436, "percentage": 68.94, "elapsed_time": "2:06:40", "remaining_time": "0:57:04"}
|
||||
{"current_steps": 1390, "total_steps": 2009, "loss": 0.2672, "lr": 1.052612437411957e-05, "epoch": 4.843205574912892, "percentage": 69.19, "elapsed_time": "2:07:04", "remaining_time": "0:56:35"}
|
||||
{"current_steps": 1395, "total_steps": 2009, "loss": 0.2714, "lr": 1.0373454644896428e-05, "epoch": 4.860627177700349, "percentage": 69.44, "elapsed_time": "2:07:29", "remaining_time": "0:56:07"}
|
||||
{"current_steps": 1400, "total_steps": 2009, "loss": 0.2752, "lr": 1.0221511541167473e-05, "epoch": 4.878048780487805, "percentage": 69.69, "elapsed_time": "2:07:53", "remaining_time": "0:55:37"}
|
||||
{"current_steps": 1405, "total_steps": 2009, "loss": 0.2503, "lr": 1.0070306531816775e-05, "epoch": 4.895470383275262, "percentage": 69.94, "elapsed_time": "2:08:13", "remaining_time": "0:55:07"}
|
||||
{"current_steps": 1410, "total_steps": 2009, "loss": 0.2775, "lr": 9.919851030016006e-06, "epoch": 4.912891986062718, "percentage": 70.18, "elapsed_time": "2:08:45", "remaining_time": "0:54:41"}
|
||||
{"current_steps": 1415, "total_steps": 2009, "loss": 0.2559, "lr": 9.770156392362917e-06, "epoch": 4.930313588850174, "percentage": 70.43, "elapsed_time": "2:09:13", "remaining_time": "0:54:14"}
|
||||
{"current_steps": 1420, "total_steps": 2009, "loss": 0.2822, "lr": 9.621233918024148e-06, "epoch": 4.947735191637631, "percentage": 70.68, "elapsed_time": "2:09:44", "remaining_time": "0:53:48"}
|
||||
{"current_steps": 1425, "total_steps": 2009, "loss": 0.2708, "lr": 9.473094847882314e-06, "epoch": 4.965156794425087, "percentage": 70.93, "elapsed_time": "2:10:06", "remaining_time": "0:53:19"}
|
||||
{"current_steps": 1430, "total_steps": 2009, "loss": 0.2721, "lr": 9.325750363687599e-06, "epoch": 4.982578397212544, "percentage": 71.18, "elapsed_time": "2:10:32", "remaining_time": "0:52:51"}
|
||||
{"current_steps": 1435, "total_steps": 2009, "loss": 0.2605, "lr": 9.179211587213648e-06, "epoch": 5.0, "percentage": 71.43, "elapsed_time": "2:11:02", "remaining_time": "0:52:24"}
|
||||
{"current_steps": 1440, "total_steps": 2009, "loss": 0.2552, "lr": 9.033489579418162e-06, "epoch": 5.017421602787456, "percentage": 71.68, "elapsed_time": "2:11:26", "remaining_time": "0:51:56"}
|
||||
{"current_steps": 1445, "total_steps": 2009, "loss": 0.2751, "lr": 8.888595339607961e-06, "epoch": 5.034843205574913, "percentage": 71.93, "elapsed_time": "2:11:47", "remaining_time": "0:51:26"}
|
||||
{"current_steps": 1450, "total_steps": 2009, "loss": 0.26, "lr": 8.744539804608727e-06, "epoch": 5.052264808362369, "percentage": 72.18, "elapsed_time": "2:12:20", "remaining_time": "0:51:01"}
|
||||
{"current_steps": 1455, "total_steps": 2009, "loss": 0.2528, "lr": 8.601333847939519e-06, "epoch": 5.069686411149826, "percentage": 72.42, "elapsed_time": "2:12:45", "remaining_time": "0:50:32"}
|
||||
{"current_steps": 1460, "total_steps": 2009, "loss": 0.259, "lr": 8.45898827899199e-06, "epoch": 5.087108013937282, "percentage": 72.67, "elapsed_time": "2:13:10", "remaining_time": "0:50:04"}
|
||||
{"current_steps": 1465, "total_steps": 2009, "loss": 0.2513, "lr": 8.317513842214502e-06, "epoch": 5.104529616724738, "percentage": 72.92, "elapsed_time": "2:13:37", "remaining_time": "0:49:37"}
|
||||
{"current_steps": 1470, "total_steps": 2009, "loss": 0.2341, "lr": 8.176921216301075e-06, "epoch": 5.121951219512195, "percentage": 73.17, "elapsed_time": "2:14:00", "remaining_time": "0:49:08"}
|
||||
{"current_steps": 1475, "total_steps": 2009, "loss": 0.2346, "lr": 8.037221013385425e-06, "epoch": 5.139372822299651, "percentage": 73.42, "elapsed_time": "2:14:30", "remaining_time": "0:48:41"}
|
||||
{"current_steps": 1480, "total_steps": 2009, "loss": 0.269, "lr": 7.898423778239857e-06, "epoch": 5.156794425087108, "percentage": 73.67, "elapsed_time": "2:14:55", "remaining_time": "0:48:13"}
|
||||
{"current_steps": 1485, "total_steps": 2009, "loss": 0.2562, "lr": 7.760539987479383e-06, "epoch": 5.174216027874564, "percentage": 73.92, "elapsed_time": "2:15:17", "remaining_time": "0:47:44"}
|
||||
{"current_steps": 1490, "total_steps": 2009, "loss": 0.2693, "lr": 7.62358004877092e-06, "epoch": 5.191637630662021, "percentage": 74.17, "elapsed_time": "2:15:39", "remaining_time": "0:47:15"}
|
||||
{"current_steps": 1495, "total_steps": 2009, "loss": 0.2405, "lr": 7.487554300047706e-06, "epoch": 5.209059233449477, "percentage": 74.42, "elapsed_time": "2:16:09", "remaining_time": "0:46:48"}
|
||||
{"current_steps": 1500, "total_steps": 2009, "loss": 0.2543, "lr": 7.352473008728962e-06, "epoch": 5.2264808362369335, "percentage": 74.66, "elapsed_time": "2:16:33", "remaining_time": "0:46:20"}
|
||||
{"current_steps": 1505, "total_steps": 2009, "loss": 0.254, "lr": 7.218346370944913e-06, "epoch": 5.2439024390243905, "percentage": 74.91, "elapsed_time": "2:17:13", "remaining_time": "0:45:57"}
|
||||
{"current_steps": 1510, "total_steps": 2009, "loss": 0.2597, "lr": 7.085184510767173e-06, "epoch": 5.2613240418118465, "percentage": 75.16, "elapsed_time": "2:17:45", "remaining_time": "0:45:31"}
|
||||
{"current_steps": 1515, "total_steps": 2009, "loss": 0.239, "lr": 6.952997479444523e-06, "epoch": 5.2787456445993035, "percentage": 75.41, "elapsed_time": "2:18:07", "remaining_time": "0:45:02"}
|
||||
{"current_steps": 1520, "total_steps": 2009, "loss": 0.2536, "lr": 6.821795254644304e-06, "epoch": 5.29616724738676, "percentage": 75.66, "elapsed_time": "2:18:37", "remaining_time": "0:44:35"}
|
||||
{"current_steps": 1525, "total_steps": 2009, "loss": 0.2485, "lr": 6.691587739699199e-06, "epoch": 5.313588850174216, "percentage": 75.91, "elapsed_time": "2:19:02", "remaining_time": "0:44:07"}
|
||||
{"current_steps": 1530, "total_steps": 2009, "loss": 0.2533, "lr": 6.562384762859788e-06, "epoch": 5.331010452961673, "percentage": 76.16, "elapsed_time": "2:19:33", "remaining_time": "0:43:41"}
|
||||
{"current_steps": 1535, "total_steps": 2009, "loss": 0.2713, "lr": 6.434196076552661e-06, "epoch": 5.348432055749129, "percentage": 76.41, "elapsed_time": "2:20:04", "remaining_time": "0:43:15"}
|
||||
{"current_steps": 1540, "total_steps": 2009, "loss": 0.2659, "lr": 6.307031356644304e-06, "epoch": 5.365853658536586, "percentage": 76.66, "elapsed_time": "2:20:33", "remaining_time": "0:42:48"}
|
||||
{"current_steps": 1545, "total_steps": 2009, "loss": 0.2624, "lr": 6.180900201710733e-06, "epoch": 5.383275261324042, "percentage": 76.9, "elapsed_time": "2:21:01", "remaining_time": "0:42:21"}
|
||||
{"current_steps": 1550, "total_steps": 2009, "loss": 0.2605, "lr": 6.055812132313006e-06, "epoch": 5.400696864111498, "percentage": 77.15, "elapsed_time": "2:21:28", "remaining_time": "0:41:53"}
|
||||
{"current_steps": 1555, "total_steps": 2009, "loss": 0.253, "lr": 5.931776590278584e-06, "epoch": 5.418118466898955, "percentage": 77.4, "elapsed_time": "2:21:58", "remaining_time": "0:41:27"}
|
||||
{"current_steps": 1560, "total_steps": 2009, "loss": 0.2554, "lr": 5.8088029379886265e-06, "epoch": 5.435540069686411, "percentage": 77.65, "elapsed_time": "2:22:35", "remaining_time": "0:41:02"}
|
||||
{"current_steps": 1565, "total_steps": 2009, "loss": 0.2532, "lr": 5.686900457671367e-06, "epoch": 5.452961672473868, "percentage": 77.9, "elapsed_time": "2:22:59", "remaining_time": "0:40:34"}
|
||||
{"current_steps": 1570, "total_steps": 2009, "loss": 0.2691, "lr": 5.566078350701398e-06, "epoch": 5.470383275261324, "percentage": 78.15, "elapsed_time": "2:23:25", "remaining_time": "0:40:06"}
|
||||
{"current_steps": 1575, "total_steps": 2009, "loss": 0.2643, "lr": 5.4463457369051984e-06, "epoch": 5.487804878048781, "percentage": 78.4, "elapsed_time": "2:23:53", "remaining_time": "0:39:39"}
|
||||
{"current_steps": 1580, "total_steps": 2009, "loss": 0.2473, "lr": 5.3277116538727316e-06, "epoch": 5.505226480836237, "percentage": 78.65, "elapsed_time": "2:24:26", "remaining_time": "0:39:13"}
|
||||
{"current_steps": 1585, "total_steps": 2009, "loss": 0.2658, "lr": 5.210185056275281e-06, "epoch": 5.522648083623693, "percentage": 78.89, "elapsed_time": "2:24:53", "remaining_time": "0:38:45"}
|
||||
{"current_steps": 1590, "total_steps": 2009, "loss": 0.264, "lr": 5.09377481518952e-06, "epoch": 5.54006968641115, "percentage": 79.14, "elapsed_time": "2:25:15", "remaining_time": "0:38:16"}
|
||||
{"current_steps": 1595, "total_steps": 2009, "loss": 0.2675, "lr": 4.978489717427948e-06, "epoch": 5.557491289198606, "percentage": 79.39, "elapsed_time": "2:25:48", "remaining_time": "0:37:50"}
|
||||
{"current_steps": 1600, "total_steps": 2009, "loss": 0.2368, "lr": 4.864338464875618e-06, "epoch": 5.574912891986063, "percentage": 79.64, "elapsed_time": "2:26:13", "remaining_time": "0:37:22"}
|
||||
{"current_steps": 1605, "total_steps": 2009, "loss": 0.2517, "lr": 4.751329673833298e-06, "epoch": 5.592334494773519, "percentage": 79.89, "elapsed_time": "2:26:35", "remaining_time": "0:36:53"}
|
||||
{"current_steps": 1610, "total_steps": 2009, "loss": 0.2663, "lr": 4.6394718743671605e-06, "epoch": 5.609756097560975, "percentage": 80.14, "elapsed_time": "2:27:03", "remaining_time": "0:36:26"}
|
||||
{"current_steps": 1615, "total_steps": 2009, "loss": 0.2411, "lr": 4.528773509664832e-06, "epoch": 5.627177700348432, "percentage": 80.39, "elapsed_time": "2:27:25", "remaining_time": "0:35:58"}
|
||||
{"current_steps": 1620, "total_steps": 2009, "loss": 0.2535, "lr": 4.41924293539816e-06, "epoch": 5.644599303135888, "percentage": 80.64, "elapsed_time": "2:27:56", "remaining_time": "0:35:31"}
|
||||
{"current_steps": 1625, "total_steps": 2009, "loss": 0.2532, "lr": 4.310888419092485e-06, "epoch": 5.662020905923345, "percentage": 80.89, "elapsed_time": "2:28:22", "remaining_time": "0:35:03"}
|
||||
{"current_steps": 1630, "total_steps": 2009, "loss": 0.268, "lr": 4.203718139502601e-06, "epoch": 5.679442508710801, "percentage": 81.13, "elapsed_time": "2:28:43", "remaining_time": "0:34:34"}
|
||||
{"current_steps": 1635, "total_steps": 2009, "loss": 0.2461, "lr": 4.0977401859954045e-06, "epoch": 5.696864111498257, "percentage": 81.38, "elapsed_time": "2:29:12", "remaining_time": "0:34:07"}
|
||||
{"current_steps": 1640, "total_steps": 2009, "loss": 0.2553, "lr": 3.9929625579393106e-06, "epoch": 5.714285714285714, "percentage": 81.63, "elapsed_time": "2:29:38", "remaining_time": "0:33:40"}
|
||||
{"current_steps": 1645, "total_steps": 2009, "loss": 0.2441, "lr": 3.8893931641004435e-06, "epoch": 5.7317073170731705, "percentage": 81.88, "elapsed_time": "2:30:03", "remaining_time": "0:33:12"}
|
||||
{"current_steps": 1650, "total_steps": 2009, "loss": 0.2635, "lr": 3.787039822045655e-06, "epoch": 5.7491289198606275, "percentage": 82.13, "elapsed_time": "2:30:25", "remaining_time": "0:32:43"}
|
||||
{"current_steps": 1655, "total_steps": 2009, "loss": 0.2402, "lr": 3.6859102575524874e-06, "epoch": 5.7665505226480835, "percentage": 82.38, "elapsed_time": "2:30:46", "remaining_time": "0:32:14"}
|
||||
{"current_steps": 1660, "total_steps": 2009, "loss": 0.2687, "lr": 3.5860121040259623e-06, "epoch": 5.78397212543554, "percentage": 82.63, "elapsed_time": "2:31:15", "remaining_time": "0:31:47"}
|
||||
{"current_steps": 1665, "total_steps": 2009, "loss": 0.2538, "lr": 3.4873529019224472e-06, "epoch": 5.801393728222997, "percentage": 82.88, "elapsed_time": "2:31:39", "remaining_time": "0:31:20"}
|
||||
{"current_steps": 1670, "total_steps": 2009, "loss": 0.2594, "lr": 3.3899400981804688e-06, "epoch": 5.818815331010453, "percentage": 83.13, "elapsed_time": "2:32:09", "remaining_time": "0:30:53"}
|
||||
{"current_steps": 1675, "total_steps": 2009, "loss": 0.2585, "lr": 3.293781045658615e-06, "epoch": 5.83623693379791, "percentage": 83.37, "elapsed_time": "2:32:30", "remaining_time": "0:30:24"}
|
||||
{"current_steps": 1680, "total_steps": 2009, "loss": 0.2671, "lr": 3.1988830025805174e-06, "epoch": 5.853658536585366, "percentage": 83.62, "elapsed_time": "2:33:05", "remaining_time": "0:29:58"}
|
||||
{"current_steps": 1685, "total_steps": 2009, "loss": 0.2637, "lr": 3.1052531319870137e-06, "epoch": 5.871080139372822, "percentage": 83.87, "elapsed_time": "2:33:34", "remaining_time": "0:29:31"}
|
||||
{"current_steps": 1690, "total_steps": 2009, "loss": 0.2651, "lr": 3.012898501195447e-06, "epoch": 5.888501742160279, "percentage": 84.12, "elapsed_time": "2:33:58", "remaining_time": "0:29:03"}
|
||||
{"current_steps": 1695, "total_steps": 2009, "loss": 0.2565, "lr": 2.9218260812662345e-06, "epoch": 5.905923344947735, "percentage": 84.37, "elapsed_time": "2:34:22", "remaining_time": "0:28:35"}
|
||||
{"current_steps": 1700, "total_steps": 2009, "loss": 0.2389, "lr": 2.8320427464766733e-06, "epoch": 5.923344947735192, "percentage": 84.62, "elapsed_time": "2:34:47", "remaining_time": "0:28:08"}
|
||||
{"current_steps": 1705, "total_steps": 2009, "loss": 0.2552, "lr": 2.7435552738020434e-06, "epoch": 5.940766550522648, "percentage": 84.87, "elapsed_time": "2:35:16", "remaining_time": "0:27:41"}
|
||||
{"current_steps": 1710, "total_steps": 2009, "loss": 0.2515, "lr": 2.6563703424041045e-06, "epoch": 5.958188153310105, "percentage": 85.12, "elapsed_time": "2:35:42", "remaining_time": "0:27:13"}
|
||||
{"current_steps": 1715, "total_steps": 2009, "loss": 0.248, "lr": 2.5704945331269217e-06, "epoch": 5.975609756097561, "percentage": 85.37, "elapsed_time": "2:36:04", "remaining_time": "0:26:45"}
|
||||
{"current_steps": 1720, "total_steps": 2009, "loss": 0.2519, "lr": 2.485934328000139e-06, "epoch": 5.993031358885017, "percentage": 85.61, "elapsed_time": "2:36:33", "remaining_time": "0:26:18"}
|
||||
{"current_steps": 1725, "total_steps": 2009, "loss": 0.2708, "lr": 2.4026961097497026e-06, "epoch": 6.010452961672474, "percentage": 85.86, "elapsed_time": "2:36:56", "remaining_time": "0:25:50"}
|
||||
{"current_steps": 1730, "total_steps": 2009, "loss": 0.2257, "lr": 2.3207861613160885e-06, "epoch": 6.02787456445993, "percentage": 86.11, "elapsed_time": "2:37:29", "remaining_time": "0:25:23"}
|
||||
{"current_steps": 1735, "total_steps": 2009, "loss": 0.238, "lr": 2.2402106653800626e-06, "epoch": 6.045296167247387, "percentage": 86.36, "elapsed_time": "2:37:57", "remaining_time": "0:24:56"}
|
||||
{"current_steps": 1740, "total_steps": 2009, "loss": 0.2385, "lr": 2.1609757038959887e-06, "epoch": 6.062717770034843, "percentage": 86.61, "elapsed_time": "2:38:19", "remaining_time": "0:24:28"}
|
||||
{"current_steps": 1745, "total_steps": 2009, "loss": 0.257, "lr": 2.0830872576327652e-06, "epoch": 6.080139372822299, "percentage": 86.86, "elapsed_time": "2:38:48", "remaining_time": "0:24:01"}
|
||||
{"current_steps": 1750, "total_steps": 2009, "loss": 0.26, "lr": 2.006551205722376e-06, "epoch": 6.097560975609756, "percentage": 87.11, "elapsed_time": "2:39:15", "remaining_time": "0:23:34"}
|
||||
{"current_steps": 1755, "total_steps": 2009, "loss": 0.2541, "lr": 1.9313733252161526e-06, "epoch": 6.114982578397212, "percentage": 87.36, "elapsed_time": "2:39:41", "remaining_time": "0:23:06"}
|
||||
{"current_steps": 1760, "total_steps": 2009, "loss": 0.2504, "lr": 1.8575592906486717e-06, "epoch": 6.132404181184669, "percentage": 87.61, "elapsed_time": "2:40:07", "remaining_time": "0:22:39"}
|
||||
{"current_steps": 1765, "total_steps": 2009, "loss": 0.2494, "lr": 1.7851146736094716e-06, "epoch": 6.149825783972125, "percentage": 87.85, "elapsed_time": "2:40:34", "remaining_time": "0:22:11"}
|
||||
{"current_steps": 1770, "total_steps": 2009, "loss": 0.2487, "lr": 1.714044942322477e-06, "epoch": 6.167247386759582, "percentage": 88.1, "elapsed_time": "2:41:01", "remaining_time": "0:21:44"}
|
||||
{"current_steps": 1775, "total_steps": 2009, "loss": 0.2268, "lr": 1.644355461233269e-06, "epoch": 6.184668989547038, "percentage": 88.35, "elapsed_time": "2:41:26", "remaining_time": "0:21:16"}
|
||||
{"current_steps": 1780, "total_steps": 2009, "loss": 0.2379, "lr": 1.5760514906041401e-06, "epoch": 6.2020905923344944, "percentage": 88.6, "elapsed_time": "2:41:51", "remaining_time": "0:20:49"}
|
||||
{"current_steps": 1785, "total_steps": 2009, "loss": 0.2345, "lr": 1.5091381861170762e-06, "epoch": 6.219512195121951, "percentage": 88.85, "elapsed_time": "2:42:23", "remaining_time": "0:20:22"}
|
||||
{"current_steps": 1790, "total_steps": 2009, "loss": 0.2365, "lr": 1.4436205984845763e-06, "epoch": 6.2369337979094075, "percentage": 89.1, "elapsed_time": "2:42:55", "remaining_time": "0:19:55"}
|
||||
{"current_steps": 1795, "total_steps": 2009, "loss": 0.2548, "lr": 1.3795036730684119e-06, "epoch": 6.2543554006968645, "percentage": 89.35, "elapsed_time": "2:43:16", "remaining_time": "0:19:27"}
|
||||
{"current_steps": 1800, "total_steps": 2009, "loss": 0.2452, "lr": 1.3167922495063823e-06, "epoch": 6.2717770034843205, "percentage": 89.6, "elapsed_time": "2:43:42", "remaining_time": "0:19:00"}
|
||||
{"current_steps": 1805, "total_steps": 2009, "loss": 0.2602, "lr": 1.2554910613469563e-06, "epoch": 6.289198606271777, "percentage": 89.85, "elapsed_time": "2:44:06", "remaining_time": "0:18:32"}
|
||||
{"current_steps": 1810, "total_steps": 2009, "loss": 0.252, "lr": 1.1956047356920197e-06, "epoch": 6.306620209059234, "percentage": 90.09, "elapsed_time": "2:44:30", "remaining_time": "0:18:05"}
|
||||
{"current_steps": 1815, "total_steps": 2009, "loss": 0.2357, "lr": 1.1371377928475958e-06, "epoch": 6.32404181184669, "percentage": 90.34, "elapsed_time": "2:44:57", "remaining_time": "0:17:37"}
|
||||
{"current_steps": 1820, "total_steps": 2009, "loss": 0.2473, "lr": 1.0800946459826611e-06, "epoch": 6.341463414634147, "percentage": 90.59, "elapsed_time": "2:45:24", "remaining_time": "0:17:10"}
|
||||
{"current_steps": 1825, "total_steps": 2009, "loss": 0.2483, "lr": 1.0244796007960066e-06, "epoch": 6.358885017421603, "percentage": 90.84, "elapsed_time": "2:45:56", "remaining_time": "0:16:43"}
|
||||
{"current_steps": 1830, "total_steps": 2009, "loss": 0.2518, "lr": 9.702968551912662e-07, "epoch": 6.376306620209059, "percentage": 91.09, "elapsed_time": "2:46:20", "remaining_time": "0:16:16"}
|
||||
{"current_steps": 1835, "total_steps": 2009, "loss": 0.2431, "lr": 9.175504989600382e-07, "epoch": 6.393728222996516, "percentage": 91.34, "elapsed_time": "2:46:45", "remaining_time": "0:15:48"}
|
||||
{"current_steps": 1840, "total_steps": 2009, "loss": 0.2588, "lr": 8.662445134731757e-07, "epoch": 6.411149825783972, "percentage": 91.59, "elapsed_time": "2:47:11", "remaining_time": "0:15:21"}
|
||||
{"current_steps": 1845, "total_steps": 2009, "loss": 0.2523, "lr": 8.163827713802952e-07, "epoch": 6.428571428571429, "percentage": 91.84, "elapsed_time": "2:47:35", "remaining_time": "0:14:53"}
|
||||
{"current_steps": 1850, "total_steps": 2009, "loss": 0.25, "lr": 7.679690363174197e-07, "epoch": 6.445993031358885, "percentage": 92.09, "elapsed_time": "2:47:58", "remaining_time": "0:14:26"}
|
||||
{"current_steps": 1855, "total_steps": 2009, "loss": 0.2466, "lr": 7.210069626229365e-07, "epoch": 6.463414634146342, "percentage": 92.33, "elapsed_time": "2:48:25", "remaining_time": "0:13:58"}
|
||||
{"current_steps": 1860, "total_steps": 2009, "loss": 0.2524, "lr": 6.755000950617363e-07, "epoch": 6.480836236933798, "percentage": 92.58, "elapsed_time": "2:48:47", "remaining_time": "0:13:31"}
|
||||
{"current_steps": 1865, "total_steps": 2009, "loss": 0.2436, "lr": 6.314518685576598e-07, "epoch": 6.498257839721254, "percentage": 92.83, "elapsed_time": "2:49:14", "remaining_time": "0:13:04"}
|
||||
{"current_steps": 1870, "total_steps": 2009, "loss": 0.2427, "lr": 5.888656079342169e-07, "epoch": 6.515679442508711, "percentage": 93.08, "elapsed_time": "2:49:40", "remaining_time": "0:12:36"}
|
||||
{"current_steps": 1875, "total_steps": 2009, "loss": 0.2508, "lr": 5.477445276636339e-07, "epoch": 6.533101045296167, "percentage": 93.33, "elapsed_time": "2:50:07", "remaining_time": "0:12:09"}
|
||||
{"current_steps": 1880, "total_steps": 2009, "loss": 0.2418, "lr": 5.080917316242184e-07, "epoch": 6.550522648083624, "percentage": 93.58, "elapsed_time": "2:50:33", "remaining_time": "0:11:42"}
|
||||
{"current_steps": 1885, "total_steps": 2009, "loss": 0.2553, "lr": 4.6991021286606263e-07, "epoch": 6.56794425087108, "percentage": 93.83, "elapsed_time": "2:51:00", "remaining_time": "0:11:14"}
|
||||
{"current_steps": 1890, "total_steps": 2009, "loss": 0.2556, "lr": 4.332028533851462e-07, "epoch": 6.585365853658536, "percentage": 94.08, "elapsed_time": "2:51:35", "remaining_time": "0:10:48"}
|
||||
{"current_steps": 1895, "total_steps": 2009, "loss": 0.2561, "lr": 3.979724239057703e-07, "epoch": 6.602787456445993, "percentage": 94.33, "elapsed_time": "2:51:56", "remaining_time": "0:10:20"}
|
||||
{"current_steps": 1900, "total_steps": 2009, "loss": 0.2482, "lr": 3.6422158367144687e-07, "epoch": 6.620209059233449, "percentage": 94.57, "elapsed_time": "2:52:32", "remaining_time": "0:09:53"}
|
||||
{"current_steps": 1905, "total_steps": 2009, "loss": 0.2443, "lr": 3.3195288024415515e-07, "epoch": 6.637630662020906, "percentage": 94.82, "elapsed_time": "2:52:56", "remaining_time": "0:09:26"}
|
||||
{"current_steps": 1910, "total_steps": 2009, "loss": 0.2494, "lr": 3.0116874931205965e-07, "epoch": 6.655052264808362, "percentage": 95.07, "elapsed_time": "2:53:23", "remaining_time": "0:08:59"}
|
||||
{"current_steps": 1915, "total_steps": 2009, "loss": 0.253, "lr": 2.7187151450565495e-07, "epoch": 6.672473867595819, "percentage": 95.32, "elapsed_time": "2:53:47", "remaining_time": "0:08:31"}
|
||||
{"current_steps": 1920, "total_steps": 2009, "loss": 0.2451, "lr": 2.440633872223774e-07, "epoch": 6.689895470383275, "percentage": 95.57, "elapsed_time": "2:54:20", "remaining_time": "0:08:04"}
|
||||
{"current_steps": 1925, "total_steps": 2009, "loss": 0.2374, "lr": 2.1774646645968734e-07, "epoch": 6.7073170731707314, "percentage": 95.82, "elapsed_time": "2:54:47", "remaining_time": "0:07:37"}
|
||||
{"current_steps": 1930, "total_steps": 2009, "loss": 0.2455, "lr": 1.929227386566246e-07, "epoch": 6.724738675958188, "percentage": 96.07, "elapsed_time": "2:55:14", "remaining_time": "0:07:10"}
|
||||
{"current_steps": 1935, "total_steps": 2009, "loss": 0.2446, "lr": 1.6959407754388424e-07, "epoch": 6.7421602787456445, "percentage": 96.32, "elapsed_time": "2:55:36", "remaining_time": "0:06:42"}
|
||||
{"current_steps": 1940, "total_steps": 2009, "loss": 0.2513, "lr": 1.477622440023696e-07, "epoch": 6.7595818815331015, "percentage": 96.57, "elapsed_time": "2:56:05", "remaining_time": "0:06:15"}
|
||||
{"current_steps": 1945, "total_steps": 2009, "loss": 0.2494, "lr": 1.2742888593028967e-07, "epoch": 6.7770034843205575, "percentage": 96.81, "elapsed_time": "2:56:43", "remaining_time": "0:05:48"}
|
||||
{"current_steps": 1950, "total_steps": 2009, "loss": 0.2479, "lr": 1.0859553811876978e-07, "epoch": 6.794425087108014, "percentage": 97.06, "elapsed_time": "2:57:19", "remaining_time": "0:05:21"}
|
||||
{"current_steps": 1955, "total_steps": 2009, "loss": 0.2407, "lr": 9.126362213599749e-08, "epoch": 6.811846689895471, "percentage": 97.31, "elapsed_time": "2:57:44", "remaining_time": "0:04:54"}
|
||||
{"current_steps": 1960, "total_steps": 2009, "loss": 0.2392, "lr": 7.543444621992857e-08, "epoch": 6.829268292682927, "percentage": 97.56, "elapsed_time": "2:58:09", "remaining_time": "0:04:27"}
|
||||
{"current_steps": 1965, "total_steps": 2009, "loss": 0.2427, "lr": 6.1109205179537e-08, "epoch": 6.846689895470384, "percentage": 97.81, "elapsed_time": "2:58:37", "remaining_time": "0:03:59"}
|
||||
{"current_steps": 1970, "total_steps": 2009, "loss": 0.238, "lr": 4.8288980304627144e-08, "epoch": 6.86411149825784, "percentage": 98.06, "elapsed_time": "2:59:06", "remaining_time": "0:03:32"}
|
||||
{"current_steps": 1975, "total_steps": 2009, "loss": 0.2671, "lr": 3.697473928422124e-08, "epoch": 6.881533101045296, "percentage": 98.31, "elapsed_time": "2:59:34", "remaining_time": "0:03:05"}
|
||||
{"current_steps": 1980, "total_steps": 2009, "loss": 0.2542, "lr": 2.716733613350675e-08, "epoch": 6.898954703832753, "percentage": 98.56, "elapsed_time": "3:00:07", "remaining_time": "0:02:38"}
|
||||
{"current_steps": 1985, "total_steps": 2009, "loss": 0.2538, "lr": 1.8867511129387893e-08, "epoch": 6.916376306620209, "percentage": 98.81, "elapsed_time": "3:00:36", "remaining_time": "0:02:11"}
|
||||
{"current_steps": 1990, "total_steps": 2009, "loss": 0.2543, "lr": 1.207589075459481e-08, "epoch": 6.933797909407666, "percentage": 99.05, "elapsed_time": "3:01:02", "remaining_time": "0:01:43"}
|
||||
{"current_steps": 1995, "total_steps": 2009, "loss": 0.2296, "lr": 6.7929876504080335e-09, "epoch": 6.951219512195122, "percentage": 99.3, "elapsed_time": "3:01:33", "remaining_time": "0:01:16"}
|
||||
{"current_steps": 2000, "total_steps": 2009, "loss": 0.2517, "lr": 3.0192005779516863e-09, "epoch": 6.968641114982578, "percentage": 99.55, "elapsed_time": "3:01:55", "remaining_time": "0:00:49"}
|
||||
{"current_steps": 2005, "total_steps": 2009, "loss": 0.2563, "lr": 7.548143881064285e-10, "epoch": 6.986062717770035, "percentage": 99.8, "elapsed_time": "3:02:26", "remaining_time": "0:00:21"}
|
||||
{"current_steps": 2009, "total_steps": 2009, "epoch": 7.0, "percentage": 100.0, "elapsed_time": "3:02:48", "remaining_time": "0:00:00"}
|
||||
9595
trainer_state.json
Normal file
9595
trainer_state.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cecf4e397d7f9143cfb03ec9fcf99915478b3fd67f236128bb56224468a3bf31
|
||||
size 8529
|
||||
BIN
training_loss.png
Normal file
BIN
training_loss.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 41 KiB |
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user