From b234b8c8e74ac066d097ac247b85b02269552725 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Wed, 27 May 2026 23:44:33 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: ZonglinY/MOOSE-Star-R1D-7B Source: Original Platform --- .gitattributes | 36 + README.md | 601 + all_results.json | 8 + chat_template.jinja | 1 + config.json | 60 + generation_config.json | 12 + model-00001-of-00004.safetensors | 3 + model-00002-of-00004.safetensors | 3 + model-00003-of-00004.safetensors | 3 + model-00004-of-00004.safetensors | 3 + model.safetensors.index.json | 347 + ...5037798.dsw-396638-5b5d784bd8-z9vqr.6789.0 | 3 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 197 + train_results.json | 8 + trainer_log.jsonl | 2070 +++ trainer_state.json | 14526 ++++++++++++++++ training_args.bin | 3 + 19 files changed, 17910 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00004.safetensors create mode 100644 model-00002-of-00004.safetensors create mode 100644 model-00003-of-00004.safetensors create mode 100644 model-00004-of-00004.safetensors create mode 100644 model.safetensors.index.json create mode 100644 runs/Apr01_17-59-24_dsw-396638-5b5d784bd8-z9vqr/events.out.tfevents.1775037798.dsw-396638-5b5d784bd8-z9vqr.6789.0 create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_log.jsonl create mode 100644 trainer_state.json create mode 100644 training_args.bin diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..7c5dbb9 --- /dev/null +++ b/README.md @@ -0,0 +1,601 @@ +--- +language: +- en +license: apache-2.0 +base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B +tags: +- scientific-discovery +- hypothesis-generation +- inspiration-retrieval +- multi-task +datasets: +- ZonglinY/TOMATO-Star-SFT-Data-R1D-32B +library_name: transformers +pipeline_tag: text-generation +--- + +# MOOSE-Star-R1D-7B Model Card + +## Overview + +**MOOSE-Star-R1D-7B** (referred to as **MS-7B** in the paper) is a 7B parameter multi-task language model fine-tuned for both **inspiration retrieval** and **hypothesis composition** in scientific discovery workflows. It matches the IR performance of the single-task model ([MOOSE-Star-IR-R1D-7B](https://huggingface.co/ZonglinY/MOOSE-Star-IR-R1D-7B)) while significantly outperforming the single-task HC model ([MOOSE-Star-HC-R1D-7B](https://huggingface.co/ZonglinY/MOOSE-Star-HC-R1D-7B)), all in a single unified model. + +- **Paper**: [MOOSE-Star: Unlocking Tractable Training for Scientific Discovery by Breaking the Complexity Barrier](https://arxiv.org/abs/2603.03756) (arXiv:2603.03756) +- **Base Model**: [DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B) +- **License**: Apache 2.0 +- **Code**: [ZonglinY/MOOSE-Star](https://github.com/ZonglinY/MOOSE-Star) + +## Model Description + +| Parameter | Value | +|-----------|-------| +| **Base Model** | DeepSeek-R1-Distill-Qwen-7B | +| **Training Method** | Full-parameter SFT (ZeRO-3) | +| **Training Data** | TOMATO-Star-SFT-Data-R1D-32B: IR split (150,218 samples) + HC split with 1x bounded (114,548 samples) | +| **Chat Template** | deepseekr1 | +| **Cutoff Length** | 16384 | +| **Learning Rate** | 1e-5 | +| **Epochs** | 1 | +| **Batch Size** | 128 | + +## Task 1: Inspiration Retrieval (IR) + +The model selects the most relevant **cross-paper inspiration** from 15 candidates (A-O) that includes 1 correct inspiration and 14 hard negatives. + +### IR Prompt Format (Simplified Overview) + +The full prompt template is constructed via `instruction_prompts()` in the code examples below. The general structure is: + +``` +[Task instruction preamble] + +## Context + +**Research Question:** +{research_question} + +**Background Survey (existing methods for THIS task):** +{background_survey} + +**Previous Hypothesis (if any):** +{previous_hypothesis_or_none} + +## Candidate Inspiration Papers + +### Candidate [A] +**Title:** {title_A} +**Abstract:** {abstract_A} + +... (15 candidates total, A through O) + +## Output Format + + +[reasoning process] + + +**Selected ID starts:** [X] **Selected ID ends** + +**Selection Reason starts:** [reason] **Selection Reason ends** +``` + +### IR Usage + +**Prerequisites**: Clone the [MOOSE-Star repo](https://github.com/ZonglinY/MOOSE-Star) for prompt templates and inference utilities: +```bash +git clone https://github.com/ZonglinY/MOOSE-Star.git && cd MOOSE-Star +# See requirements.txt for full dependencies; at minimum: pip install transformers torch +``` + +#### Option A: SGLang Deployment (Recommended) + +```bash +# SGLang requires a separate environment; see https://github.com/sgl-project/sglang for installation +# Start the server +python -m sglang.launch_server --model-path ZonglinY/MOOSE-Star-R1D-7B --port 1235 +``` + +```python +import sys +sys.path.insert(0, "./Inference") +from ir_probability_extractor import IRProbabilityExtractor + +extractor = IRProbabilityExtractor(base_urls=["http://localhost:1235/v1"]) +result = extractor.get_selection_probabilities( + research_question="Your research question", + background_survey="Your background survey", + candidates=[ + {"title": "Candidate A title", "abstract": "Candidate A abstract"}, + {"title": "Candidate B title", "abstract": "Candidate B abstract"}, + # ... up to 15 candidates (labeled A-O) + ], +) +print(f"Selected: [{result.selected_label}]") +print(f"Probabilities: {result.probabilities}") +``` + +#### Option B: Direct HuggingFace Inference + +```python +import sys +sys.path.insert(0, "./utils") +from prompt_store import instruction_prompts +from transformers import AutoModelForCausalLM, AutoTokenizer +import re + +model_name = "ZonglinY/MOOSE-Star-R1D-7B" +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained(model_name, dtype="auto", device_map="auto") + +p = instruction_prompts("inspiration_retrieval_with_reasoning_with_alphabetical_candidates") + +candidates = [{"title": "...", "abstract": "..."}, ...] +candidates_text = "".join( + f"### Candidate [{chr(ord('A') + i)}]\n**Title:** {c['title']}\n**Abstract:** {c['abstract']}\n\n" + for i, c in enumerate(candidates) +) + +research_question = "Your research question" +background_survey = "Your background survey" +prompt = (p[0] + research_question + + p[1] + background_survey + + p[2] + "No previous hypothesis." + + p[3] + candidates_text + + p[4]) + +messages = [{"role": "user", "content": prompt}] +formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False) +formatted += "<\uff5cAssistant\uff5c>" + +inputs = tokenizer(formatted, return_tensors="pt").to(model.device) +outputs = model.generate(**inputs, max_new_tokens=8192, temperature=0.6, top_p=0.9, do_sample=True) +response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) + +match = re.search(r"\*\*Selected ID starts:\*\*\s*\[(\w)\]\s*\*\*Selected ID ends\*\*", response) +if match: + print(f"Selected: [{match.group(1)}]") +``` + +## Task 2: Hypothesis Composition (HC) + +The model generates **delta hypotheses** from inspiration papers. Given a research question, background survey, and new inspiration paper, it outputs structured hypothesis components. + +### HC Prompt Format (Simplified Overview) + +The full prompt template is constructed via `instruction_prompts()` in the code examples below. The general structure is: + +``` +[Task instruction preamble] + +## Information Provided + +**Research Question**: +{research_question} + +**Background Survey**: +{background_survey} + +**Previous Hypothesis**: +{previous_hypothesis_or_none} + +**New Inspiration Paper Title**: +{inspiration_title} + +**New Inspiration Paper Abstract**: +{inspiration_abstract} + +## Your Response + + +[reasoning process] + + +Inspiration: [Key concept] +- Motivation (WHY): [Why this addresses a gap] +- Mechanism (HOW IT WORKS): [How the concept works] +- Methodology (HOW IT'S INTEGRATED): [Implementation steps] +``` + +### HC Usage + +```python +import sys +sys.path.insert(0, "./utils") +from prompt_store import instruction_prompts +from transformers import AutoModelForCausalLM, AutoTokenizer + +model_name = "ZonglinY/MOOSE-Star-R1D-7B" +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained(model_name, dtype="auto", device_map="auto") + +p = instruction_prompts("prepare_HC_sft_data_to_go_comprehensive_v2_delta") + +research_question = "Your research question here" +background_survey = "Your background survey here" +inspiration_title = "Inspiration paper title" +inspiration_abstract = "Inspiration paper abstract" + +prompt = (p[0] + research_question + + p[1] + background_survey + + p[2] + "No previous hypothesis." + + p[3] + inspiration_title + + p[4] + inspiration_abstract + + p[5]) + +messages = [{"role": "user", "content": prompt}] +formatted = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False) +formatted += "<\uff5cAssistant\uff5c>" + +inputs = tokenizer(formatted, return_tensors="pt").to(model.device) +outputs = model.generate(**inputs, max_new_tokens=8192, temperature=0.6, top_p=0.9, do_sample=True) +response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True) +print(response) +``` + +## Evaluation Results + +### Inspiration Retrieval (Table 1) + +| Model | Accuracy | +|-------|----------| +| Random Selection | 6.70% | +| R1-Distilled-Qwen-7B (base) | 28.42% | +| MS-IR-7B (single-task) | 54.37% | +| **MS-7B (this model)** | **54.34%** | + +### Hypothesis Composition - Normal (Table 2) + +Rubric-based evaluation with ground-truth inspirations (Judge: GPT-4o): + +| Model | Total | Mot | Mec | Met | Length | +|-------|-------|-----|-----|-----|--------| +| R1-Distilled-Qwen-7B (base) | 4.05 | 1.96 | 1.30 | 0.80 | 231.02 | +| MS-HC-7B (single-task) | 4.68 | 2.13 | 1.46 | 1.09 | 204.12 | +| MS-HC-7B w/ 1x bounded | 4.74 | 2.16 | 1.48 | 1.10 | 203.84 | +| **MS-7B (this model)** | **5.02** | **2.22** | **1.59** | **1.20** | 208.98 | + +### Hypothesis Composition - Bounded (Table 3) + +Performance under varying levels of inspiration noise (Judge: GPT-4o): + +| Model | Easy Total | Medium Total | Hard Total | +|-------|-----------|-------------|-----------| +| R1-Distilled-Qwen-7B (base) | 2.72 | 2.27 | 2.00 | +| MS-HC-7B w/ 2x bounded | 3.18 | 2.74 | 2.56 | +| **MS-7B (this model)** | **3.37** | **2.86** | **2.78** | + +## Key Findings + +- **IR performance preserved**: Multi-task training maintains full IR accuracy (54.34% vs 54.37% single-task) +- **HC significantly improved**: Multi-task HC outperforms all single-task variants, including those with bounded composition augmentation +- **Robust under noise**: Largest improvements on Hard bounded composition, suggesting IR reasoning skills transfer to HC + +## Citation + +```bibtex +@article{yang2025moosestar, + title={MOOSE-Star: Unlocking Tractable Training for Scientific Discovery by Breaking the Complexity Barrier}, + author={Yang, Zonglin and Bing, Lidong}, + journal={arXiv preprint arXiv:2603.03756}, + year={2026} +} +``` + + +## Try It on the Inference Provider (Copy-Paste Examples) + +The HF Inference Provider playground for this model currently has known +limitations (default `max_tokens` is set to the full context window, and the +parameter is not adjustable in the UI). Until that is fixed upstream, the +recommended way to try the model is via the API. + +### Quickstart (Python) + +```python +from huggingface_hub import InferenceClient + +client = InferenceClient(provider="featherless-ai", token="") +prompt = """""" + +r = client.chat_completion( + messages=[{"role": "user", "content": prompt}], + model="", + max_tokens=4096, # IMPORTANT: must be < 32768 - len(prompt_tokens) + temperature=0.6, + top_p=0.9, +) +print(r.choices[0].message.content) +``` + +> **Note on chat template**: under the DeepSeek-R1 chat template the opening +> `` tag is injected by the template itself, so the raw output begins +> directly with reasoning text and contains only the closing ``. This +> is expected. + +### Example Case (real TOMATO-Star test paper, October 2025) + +- **Research question**: *How to comprehensively explain and predict the heterogeneity of neurodegeneration in Alzheimer's disease?* +- **Ground-truth inspiration**: *"Network neuroscience"* (Bassett & Sporns, 2017) — the kind of cross-disciplinary leap MOOSE-Star is trained to surface (using graph-theoretic brain-network tools to explain AD heterogeneity that single-pathology models miss). + + +
+IR prompt — select 1 inspiration from 15 candidates A–O (ground-truth = [F] "Network neuroscience.") + +```` +You are helping with scientific hypothesis generation by selecting an inspiration that solves a fundamental problem in the current approach. + +## Core Task: Problem Identification and Solution + +**Your Primary Goal**: Identify which candidate paper can best help solve a fundamental problem in the existing methods/hypothesis - either directly or by inspiring a solution. + +**Key Principle**: Good inspirations help solve real problems. They might directly provide a solution, or they might spark an idea, remind you of related concepts, or inspire a creative adaptation. The best breakthroughs often come from unexpected connections. + +**What Makes a Good Inspiration**: +1. **Problem-Solution Fit**: Either addresses a known limitation OR reveals new improvement opportunities +2. **Enables Progress**: The paper provides concepts, sparks ideas, or inspires solutions that advance the research +3. **Creative Connection**: The link might be indirect, non-obvious, or emerge during exploration +4. **Clear Impact**: You can explain how this paper contributes to progress, even if the path is unexpected + +**The Research Process**: +1. **Background**: Research question + existing methods (with their limitations) +2. **Problem Identification**: What fundamental issue prevents progress? +3. **Inspiration Selection**: Which concept best solves this problem? +4. **Hypothesis Formation**: Adapt the solution to create a better method + +**Classic Example - Backpropagation**: +- **Research Question**: How to use data to automatically improve parameters of multi-layer logistic regression? +- **Existing Methods**: Could only do inference, not learning +- **FUNDAMENTAL PROBLEM**: No way to compute gradients through multiple layers +- **Solution Found**: Chain rule from calculus +- **Why It Solves the Problem**: Chain rule computes derivatives of composite functions; neural networks ARE composite functions +- **Result**: Backpropagation algorithm + +Note: The focus was on SOLVING THE GRADIENT PROBLEM. The breakthrough came from recognizing neural networks as composite functions. + +## Your Current Task + +**Flexible Reasoning Process** (these steps can happen in any order or iteratively): +- **Problem Recognition**: Identify limitations in current methods/hypothesis (can happen before OR after seeing candidates) +- **Opportunity Discovery**: For each candidate, explore how it might advance the research: + - It might solve a problem you already identified + - It might reveal a problem you hadn't noticed and simultaneously offer a solution + - It might spark ideas for improvements you hadn't considered +- **Selection**: Choose the candidate that enables the most meaningful progress + +**Note**: The reasoning is often bidirectional - seeing a candidate can make you realize "oh, this could address limitation X that I hadn't fully articulated" or "this suggests a way to improve aspect Y" + +**Remember**: +- The best inspiration might not seem immediately relevant +- Focus on problem-solving potential, not keyword matching +- Creative connections often lead to breakthroughs +- Consider how concepts could be adapted or repurposed + +**Avoid**: +- Choosing based on surface-level similarity +- Dismissing candidates that seem unrelated at first glance + +## Context + +**Research Question:** +How to comprehensively explain and predict the heterogeneity of neurodegeneration in Alzheimer's disease? + +**Background Survey (existing methods for THIS task):** +Current Alzheimer's disease (AD) research is dominated by unifactorial approaches, primarily the amyloid-beta (Aβ) hypothesis, which posits Aβ accumulation as the central cause of neurodegeneration. Established methods include: +- Biomarker-focused frameworks (e.g., ATN framework), which categorize AD pathology into amyloid/tau/neurodegeneration but oversimplify heterogeneity. +- Reductionist therapeutics like Aβ antibodies (lecanemab [17], donanemab [18]) that remove plaques but yield marginal cognitive improvements (±25% delay) and cause adverse effects (brain swelling/microbleeds). +- Siloed scale investigations: Molecular studies (Aβ/tau at nanoscale [4]), cellular analyses (neuroinflammation at microscale [29]), and epidemiological risk tracking (exposcale [33]) lack integration. + +Key Limitations: +- Aβ-centric models fail to explain: + - Aβ plaques in cognitively healthy seniors [8–10] + - Poor correlation between Aβ burden and cognitive decline [11–13] + - Heterogeneity in clinical presentations (e.g., visual vs. memory-predominant AD [56]) +- Clinical trials targeting single entities ignore cross-scale interactions (e.g., exercise improves cognition via vascular/metabolic pathways [58]). + +**Previous Hypothesis (if any - current progress built from earlier inspirations):** +None (starting from background knowledge) + +## Candidate Inspiration Papers + +### Candidate [A] +**Title:** Social cognitive network neuroscience. +**Abstract:** Over the past three decades, research from the field of social neuroscience has identified a constellation of brain regions that relate to social cognition. Although these studies have provided important insights into the specific neural regions underlying social behavior, they may overlook the broader neural context in which those regions and the interactions between them are embedded. Network neuroscience is an emerging discipline that focuses on modeling and analyzing brain networks-collections of interacting neural elements. Because human cognition requires integrating information across multiple brain regions and systems, we argue that a novel social cognitive network neuroscience approach-which leverages methods from the field of network neuroscience and graph theory-can advance our understanding of how brain systems give rise to social behavior. This review provides an overview of the field of network neuroscience, discusses studies that have leveraged this approach to advance social neuroscience research, highlights the potential contributions of social cognitive network neuroscience to understanding social behavior and provides suggested tools and resources for conducting network neuroscience research. + +### Candidate [B] +**Title:** Editorial: Topological Neuroscience. +**Abstract:** Topology, in its many forms, describes relations. It has thus long been a central concept in neuroscience, capturing structural and functional aspects of the organization of the nervous system and their links to cognition. Recent advances in computational topology have extended the breadth and depth of topological descriptions. This Focus Feature offers a unified overview of the emerging field of topological neuroscience and of its applications across the many scales of the nervous system from macro-, over meso-, to microscales. + +### Candidate [C] +**Title:** Exogenous neuritin treatment improves survivability and functions of Schwann cells with improved outgrowth of neurons in rat diabetic neuropathy. +**Abstract:** Pathogenesis and treatment for diabetic neuropathy are still complex. A deficit of neurotrophic factors affecting Schwann cells is a very important cause of diabetic neuropathy. Neuritin is a newly discovered potential neurotrophic factor. In this study, we explored the effect of exogenous neuritin on survivability and functions of diabetic Schwann cells of rats with experimental diabetic neuropathy. Diabetic neuropathy was induced in rats. 12-week diabetic rats contrasted with non-diabetic normal rats had decreased levels of serum neuritin and slowed nerve conduction velocities (NCVs). Schwann cells isolated from these diabetic rats and cultured in high glucose showed reduced cell neuritin mRNA and protein and supernatant neuritin protein, increased apoptosis rates, increased caspase-3 activities and progressively reduced viability. In contrast, exogenous neuritin treatment reduced apoptosis and improved viability, with elevated Bcl-2 levels (not Bax) and decreased caspase-3 activities. Co-cultured with diabetic Schwann cells pre-treated with exogenous neuritin in high glucose media, and diabetic DRG neurons showed lessened decreased neurite outgrowth and supernatant NGF concentration occurring in co-culture of diabetic cells. Exogenous neuritin treatment ameliorated survivability and functions of diabetic Schwann cells of rats with diabetic neuropathy. Our study may provide a new mechanism and potential treatment for diabetic neuropathy. + +### Candidate [D] +**Title:** The prone position in COVID-19 impacts the thickness of peripapillary retinal nerve fiber layers and macular ganglion cell layers. +**Abstract:** The prone position reduces mortality in severe cases of COVID-19 with acute respiratory distress syndrome. However, visual loss and changes to the peripapillary retinal nerve fiber layer (p-RNFL) and the macular ganglion cell layer and inner plexiform layer (m-GCIPL) have occurred in patients undergoing surgery in the prone position. Moreover, COVID-19-related eye problems have been reported. This study compared the p-RNFL and m-GCIPL thicknesses of COVID-19 patients who were placed in the prone position with patients who were not. This prospective longitudinal and case-control study investigated 15 COVID-19 patients placed in the prone position (the "Prone Group"), 23 COVID-19 patients not in the prone position (the "Non-Prone Group"), and 23 healthy, non-COVID individuals without ocular disease or systemic conditions (the "Control Group"). The p-RNFL and m-GCIPL thicknesses of the COVID-19 patients were measured at 1, 3, and 6 months and compared within and between groups. The result showed that the Prone and Non-Prone Groups had no significant differences in their p-RNFL thicknesses at the 3 follow-ups. However, the m-GCIPL analysis revealed significant differences in the inferior sector of the Non-Prone Group between months 1 and 3 (mean difference, 0.74 μm; P = 0.009). The p-RNFL analysis showed a significantly greater thickness at 6 months for the superior sector of the Non-Prone Group (131.61 ± 12.08 μm) than for the Prone Group (118.87 ± 18.21 μm; P = 0.039). The m-GCIPL analysis revealed that the inferior sector was significantly thinner in the Non-Prone Group than in the Control Group (at 1 month 80.57 ± 4.60 versus 83.87 ± 5.43 μm; P = 0.031 and at 6 months 80.48 ± 3.96 versus 83.87 ± 5.43 μm; P = 0.044). In conclusion, the prone position in COVID-19 patients can lead to early loss of p-RNFL thickness due to rising intraocular pressure, which is independent of the timing of prone positioning. Consequently, there is no increase in COVID-19 patients' morbidity burden. + +### Candidate [E] +**Title:** TCMSP: a database of systems pharmacology for drug discovery from herbal medicines +**Abstract:** BackgroundModern medicine often clashes with traditional medicine such as Chinese herbal medicine because of the little understanding of the underlying mechanisms of action of the herbs. In an effort to promote integration of both sides and to accelerate the drug discovery from herbal medicines, an efficient systems pharmacology platform that represents ideal information convergence of pharmacochemistry, ADME properties, drug-likeness, drug targets, associated diseases and interaction networks, are urgently needed.DescriptionThe traditional Chinese medicine systems pharmacology database and analysis platform (TCMSP) was built based on the framework of systems pharmacology for herbal medicines. It consists of all the 499 Chinese herbs registered in the Chinese pharmacopoeia with 29,384 ingredients, 3,311 targets and 837 associated diseases. Twelve important ADME-related properties like human oral bioavailability, half-life, drug-likeness, Caco-2 permeability, blood-brain barrier and Lipinski’s rule of five are provided for drug screening and evaluation. TCMSP also provides drug targets and diseases of each active compound, which can automatically establish the compound-target and target-disease networks that let users view and analyze the drug action mechanisms. It is designed to fuel the development of herbal medicines and to promote integration of modern medicine and traditional medicine for drug discovery and development.ConclusionsThe particular strengths of TCMSP are the composition of the large number of herbal entries, and the ability to identify drug-target networks and drug-disease networks, which will help revealing the mechanisms of action of Chinese herbs, uncovering the nature of TCM theory and developing new herb-oriented drugs. TCMSP is freely available at http://sm.nwsuaf.edu.cn/lsp/tcmsp.php. + +### Candidate [F] +**Title:** Network neuroscience. +**Abstract:** Despite substantial recent progress, our understanding of the principles and mechanisms underlying complex brain function and cognition remains incomplete. Network neuroscience proposes to tackle these enduring challenges. Approaching brain structure and function from an explicitly integrative perspective, network neuroscience pursues new ways to map, record, analyze and model the elements and interactions of neurobiological systems. Two parallel trends drive the approach: the availability of new empirical tools to create comprehensive maps and record dynamic patterns among molecules, neurons, brain areas and social systems; and the theoretical framework and computational tools of modern network science. The convergence of empirical and computational advances opens new frontiers of scientific inquiry, including network dynamics, manipulation and control of brain networks, and integration of network processes across spatiotemporal domains. We review emerging trends in network neuroscience and attempt to chart a path toward a better understanding of the brain as a multiscale networked system. + +### Candidate [G] +**Title:** Sequential and cooperative action of Fgfs and Shh in the zebrafish retina. +**Abstract:** The signaling molecule Sonic hedgehog (Shh) is required for differentiation of the vertebrate retina. In the developing zebrafish retina, shh expression is initiated at the ventronasal region, from where it spreads as a wave through the retina. To investigate the molecular mechanism underlying this coordinated expression of shh, we mapped the cis-regulatory region and identified a novel regulatory sequence in the first intron of the shh locus. This sequence contains binding sites for the transcription factors Erm and Pea3 that are known transducers of Fgf signaling. Mutation of the binding sites or knockdown of Pea3 and Erm abolishes transgene expression, indicating that Fgf signaling regulates shh expression in the retina. We provide evidence that Fgf3 and -8 control initiation of expression, while Fgf19 is crucial for the propagation of transgene expression through the retina. Inhibitor experiments indicate a continued requirement of FGF and Hedgehog (Hh) signaling for transgene expression after initiation at the ventronasal aspect of the retina. We propose a model, in which Fgf3 and -8 initiate expression and Fgf19 and Shh signals cooperate subsequently to promote establishment of expression throughout the retina. + +### Candidate [H] +**Title:** Memory function and the hippocampus. +**Abstract:** There has been a long tradition in memory research of adopting the view of a vital role of the medial temporal lobe and especially the hippocampus in declarative memory. Despite the broad support for this notion, there is an ongoing debate about what computations are performed by the different substructures. The present chapter summarizes several accounts of hippocampal functions in terms of the cognitive processes subserved by these structures, the information processed, and the underlying neural operations. Firstly, the value of the distinction between recollection and familiarity for the understanding of the role the hippocampus plays in memory is discussed. Then multiple lines of evidence for the role of the hippocampus in memory are considered. Cumulating evidence suggests that the hippocampus fosters the binding of disparate cortical representations of items and their spatiotemporal context into a coherent representation by means of a sparse conjunctive neural coding. This association of item and context will then lead to the phenomenological experience of recollection. In contrast, surrounding cortical areas have broader neural coding that provide a scalar signal of the similarity between two inputs (e.g. between the encoding and the retrieval). By this they form the basis of a feeling of familiarity, but also might encode the commonalities between these different inputs. However, a more complete picture of the importance of the hippocampus for declarative memories can only be drawn when the interactions of the medial temporal lobe with other brain areas are also taken into account. + +### Candidate [I] +**Title:** Association between response inhibition and working memory in adult ADHD: a link to right frontal cortex pathology? +**Abstract:** We sought to assess the relationship between response inhibition and working memory in adult patients with attention-deficit/hyperactivity disorder (ADHD) and neurosurgical patients with frontal lobe damage. The stop-signal reaction time (SSRT) test and a spatial working memory (SWM) task were administered to 20 adult patients with ADHD and a group of matched controls. The same tasks were administered to 21 patients with lesions to right frontal cortex and 19 patients with left frontal lesions. The SSRT test, but not choice reaction time, was significantly associated with search errors on the SWM task in both the adult ADHD and right frontal patients. In the right frontal patients, impaired performance on both variables was correlated with the volume of damage to the inferior frontal gyrus. Response inhibition and working memory impairments in ADHD may stem from a common pathologic process rather than being distinct deficits. Such pathology could relate to right frontal-cortex abnormalities in ADHD, consistent with prior reports, as well as with the demonstration here of a significant association between SSRT and SWM in right frontal patients. + +### Candidate [J] +**Title:** The effects of common peroneal nerve electrical stimulation on lower extremity deep venous hemodynamics: A randomized, crossover and controlled study. +**Abstract:** Intermittent pneumatic compression (IPC) and neuromuscular electrical stimulation can improve deep vein hemodynamics in the lower limbs. We developed a new, small and convenient, and easy to wear common peroneal nerve electrical stimulator (CPNES) and to investigate the effectiveness and safety of CPNES intervention on deep venous hemodynamics. Thirty healthy volunteers were recruited and randomly divided into group A and B. In group A, the hemodynamics of the left superficial femoral artery and the superficial femoral vein were measured after IPC compression, and then the CPNES was activated and the hemodynamics was measured again. In group B, the order of intervention was reversed. In group A, the peak velocity, time average blood flow velocity (TAMV), and flow velocity of femoral vein after IPC and CPNES intervention were higher than these of the baseline (P < .05, respectively). No significant differences of these blood flow parameters were found between IPC and CPNES intervention (P > .05, respectively). In group B, these blood flow parameters of femoral vein after IPC and CPNES intervention were higher than these of the baseline (P < .05, respectively). No significant difference of these blood flow parameters (P > .05, respectively) were noted between IPC and CPNES intervention as well. No differential change of these flow velocity of femoral artery after IPC and CPNES intervention in group A or group B. The hemodynamics of superficial femoral arteries and veins after intervention in group A and B were similar (P > .05, respectively). The effectiveness of CPNES intervention on the hemodynamics of the lower extremity is similar with that of IPC, increasing blood flow and may prevent venous thrombosis without adverse reaction. + +### Candidate [K] +**Title:** Region of interest correction factors improve reliability of diffusion imaging measures within and across scanners and field strengths +**Abstract:** Diffusion tensor imaging (DTI) measures are commonly used as imaging markers to investigate individual differences in relation to behavioral and health-related characteristics. However, the ability to detect reliable associations in cross-sectional or longitudinal studies is limited by the reliability of the diffusion measures. Several studies have examined the reliability of diffusion measures within (i.e. intra-site) and across (i.e. inter-site) scanners with mixed results. Our study compares the test-retest reliability of diffusion measures within and across scanners and field strengths in cognitively normal older adults with a follow-up interval less than 2.25 years. Intra-class correlation (ICC) and coefficient of variation (CoV) of fractional anisotropy (FA) and mean diffusivity (MD) were evaluated in sixteen white matter and twenty-six gray matter bilateral regions. The ICC for intra-site reliability (0.32 to 0.96 for FA and 0.18 to 0.95 for MD in white matter regions; 0.27 to 0.89 for MD and 0.03 to 0.79 for FA in gray matter regions) and inter-site reliability (0.28 to 0.95 for FA in white matter regions, 0.02 to 0.86 for MD in gray matter regions) with longer follow-up intervals were similar to earlier studies using shorter follow-up intervals. The reliability of across field strengths comparisons was lower than intra- and inter-site reliabilities. Within and across scanner comparisons showed that diffusion measures were more stable in larger white matter regions (>1500 mm(3)). For gray matter regions, the MD measure showed stability in specific regions and was not dependent on region size. Linear correction factor estimated from cross-sectional or longitudinal data improved the reliability across field strengths. Our findings indicate that investigations relating diffusion measures to external variables must consider variable reliability across the distinct regions of interest and that correction factors can be used to improve consistency of measurement across field strengths. An important result of this work is that inter-scanner and field strength effects can be partially mitigated with linear correction factors specific to regions of interest. These data-driven linear correction techniques can be applied in cross-sectional or longitudinal studies. Published by Elsevier Inc. + +### Candidate [L] +**Title:** Role of adenosine A2a receptor in cancers and autoimmune diseases +**Abstract:** Adenosine receptors are P1 class of purinergic receptors that belong to G protein‐coupled receptors. There are 4 subtypes of adenosine receptors, namely A1, A2A, A2B, and A3. A2AR has a high affinity for the ligand adenosine. Under pathological conditions or external stimuli, ATP is sequentially hydrolyzed to adenosine by CD39 and CD73. The combination of adenosine and A2AR can increase the concentration of cAMP and activate a series of downstream signaling pathways, and further playing the role of immunosuppression and promotion of tumor invasion. A2AR is expressed to some extent on various immune cells, where it is abnormally expressed on immune cells in cancers and autoimmune diseases. A2AR expression also correlates with disease progression. Inhibitors and agonists of A2AR may be potential new strategies for treatment of cancers and autoimmune diseases. We herein briefly reviewed the expression and distribution of A2AR, adenosine/A2AR signaling pathway, expression, and potential as a therapeutic target. + +### Candidate [M] +**Title:** Cognitive network neuroscience. +**Abstract:** Network science provides theoretical, computational, and empirical tools that can be used to understand the structure and function of the human brain in novel ways using simple concepts and mathematical representations. Network neuroscience is a rapidly growing field that is providing considerable insight into human structural connectivity, functional connectivity while at rest, changes in functional networks over time (dynamics), and how these properties differ in clinical populations. In addition, a number of studies have begun to quantify network characteristics in a variety of cognitive processes and provide a context for understanding cognition from a network perspective. In this review, we outline the contributions of network science to cognitive neuroscience. We describe the methodology of network science as applied to the particular case of neuroimaging data and review its uses in investigating a range of cognitive functions including sensory processing, language, emotion, attention, cognitive control, learning, and memory. In conclusion, we discuss current frontiers and the specific challenges that must be overcome to integrate these complementary disciplines of network science and cognitive neuroscience. Increased communication between cognitive neuroscientists and network scientists could lead to significant discoveries under an emerging scientific intersection known as cognitive network neuroscience. + +### Candidate [N] +**Title:** Graph Neural Networks in Network Neuroscience. +**Abstract:** Noninvasive medical neuroimaging has yielded many discoveries about the brain connectivity. Several substantial techniques mapping morphological, structural and functional brain connectivities were developed to create a comprehensive road map of neuronal activities in the human brain -namely brain graph. Relying on its non-euclidean data type, graph neural network (GNN) provides a clever way of learning the deep graph structure and it is rapidly becoming the state-of-the-art leading to enhanced performance in various network neuroscience tasks. Here we review current GNN-based methods, highlighting the ways that they have been used in several applications related to brain graphs such as missing brain graph synthesis and disease classification. We conclude by charting a path toward a better application of GNN models in network neuroscience field for neurological disorder diagnosis and population graph integration. The list of papers cited in our work is available at https://github.com/basiralab/GNNs-in-Network-Neuroscience. + +### Candidate [O] +**Title:** Null models in network neuroscience. +**Abstract:** Recent advances in imaging and tracing technology provide increasingly detailed reconstructions of brain connectomes. Concomitant analytic advances enable rigorous identification and quantification of functionally important features of brain network architecture. Null models are a flexible tool to statistically benchmark the presence or magnitude of features of interest, by selectively preserving specific architectural properties of brain networks while systematically randomizing others. Here we describe the logic, implementation and interpretation of null models of connectomes. We introduce randomization and generative approaches to constructing null networks, and outline a taxonomy of network methods for statistical inference. We highlight the spectrum of null models - from liberal models that control few network properties, to conservative models that recapitulate multiple properties of empirical networks - that allow us to operationalize and test detailed hypotheses about the structure and function of brain networks. We review emerging scenarios for the application of null models in network neuroscience, including for spatially embedded networks, annotated networks and correlation-derived networks. Finally, we consider the limits of null models, as well as outstanding questions for the field. + +## Output Format + +**CRITICAL**: You MUST structure your response EXACTLY as follows (the markers are used for automatic parsing). + + +[Your flexible reasoning process - explore problems and opportunities as they emerge, evaluate how candidates relate to potential improvements, select the most promising one. Refer to candidates using their labels like Candidate [A], Candidate [B], etc.] + + +**Selected ID starts:** [X] **Selected ID ends** + +(Replace [X] with the letter of your chosen candidate, e.g., [A], [B], [C], etc. Output ONLY the letter in brackets, nothing else between the markers.) + +**Selection Reason starts:** [summary of why this inspiration was selected - what problem it addresses, how it enables progress] **Selection Reason ends** +```` + +**Expected output marker:** `**Selected ID starts:** [F] **Selected ID ends**` + +
+ + +
+HC prompt — compose Δhypothesis from the retrieved inspiration "Network neuroscience" + +```` +You are a scientific hypothesis composer. Given a research question, background survey, potentially a previous hypothesis to build upon, and a new inspiration paper, you will reason through how to adapt concepts from the inspiration to advance the solution, then formulate a DELTA HYPOTHESIS - the specific contribution from THIS inspiration paper (not the full cumulative hypothesis). + +## Your Task + +Analyze the provided research context and inspiration paper to: +1. Identify the key conceptual innovation from the inspiration paper (Note: the paper may directly provide a concept that can be adapted, OR it may contain related ideas/transferable mechanisms that inspire what we need - look beyond exact concept names) +2. Determine how this innovation addresses gaps (either in existing methods or in your previous hypothesis) +3. Reason through adaptation and integration into your solution +4. Formulate a delta hypothesis describing ONLY what THIS inspiration contributes + +## Key Principles + +**Reasoning Process:** +- Start by understanding the problem and what current methods lack +- If there's a previous hypothesis, understand what it already addresses and what gaps/limitations remain +- Analyze the inspiration paper to identify relevant concepts that could be adapted +- Reason through: What specific knowledge/technique from this paper could serve as an inspiration? +- Connect the dots: How does this potential inspiration address the identified gaps? +- Work through the mechanism: How would this inspiration actually function in our context? +- Develop the methodology: Detail the specific implementation and integration +- Don't just identify concepts - reason through their practical application and adaptation + +**Delta Hypothesis Requirements:** +- Output ONLY what THIS inspiration adds (delta), NOT the full cumulative hypothesis +- Don't repeat what's already in the previous hypothesis +- Must clearly explain WHY the inspiration addresses the problem (Motivation) +- Must detail HOW the inspiration works in this context (Mechanism) +- Must specify HOW to implement it methodologically (Methodology) +- Follow the exact structured format shown below + +## Information Provided + +**Research Question** (the specific problem to solve): +How to comprehensively explain and predict the heterogeneity of neurodegeneration in Alzheimer's disease? + +**Background Survey** (existing methods and their limitations): +Current Alzheimer's disease (AD) research is dominated by unifactorial approaches, primarily the amyloid-beta (Aβ) hypothesis, which posits Aβ accumulation as the central cause of neurodegeneration. Established methods include: +- Biomarker-focused frameworks (e.g., ATN framework), which categorize AD pathology into amyloid/tau/neurodegeneration but oversimplify heterogeneity. +- Reductionist therapeutics like Aβ antibodies (lecanemab [17], donanemab [18]) that remove plaques but yield marginal cognitive improvements (±25% delay) and cause adverse effects (brain swelling/microbleeds). +- Siloed scale investigations: Molecular studies (Aβ/tau at nanoscale [4]), cellular analyses (neuroinflammation at microscale [29]), and epidemiological risk tracking (exposcale [33]) lack integration. + +Key Limitations: +- Aβ-centric models fail to explain: + - Aβ plaques in cognitively healthy seniors [8–10] + - Poor correlation between Aβ burden and cognitive decline [11–13] + - Heterogeneity in clinical presentations (e.g., visual vs. memory-predominant AD [56]) +- Clinical trials targeting single entities ignore cross-scale interactions (e.g., exercise improves cognition via vascular/metabolic pathways [58]). + +**Previous Hypothesis** (if any - the current state of your solution to build upon): +No previous hypothesis. + +**New Inspiration Paper Title** (external work to incorporate): +Network neuroscience. + +**New Inspiration Paper Abstract**: +Despite substantial recent progress, our understanding of the principles and mechanisms underlying complex brain function and cognition remains incomplete. Network neuroscience proposes to tackle these enduring challenges. Approaching brain structure and function from an explicitly integrative perspective, network neuroscience pursues new ways to map, record, analyze and model the elements and interactions of neurobiological systems. Two parallel trends drive the approach: the availability of new empirical tools to create comprehensive maps and record dynamic patterns among molecules, neurons, brain areas and social systems; and the theoretical framework and computational tools of modern network science. The convergence of empirical and computational advances opens new frontiers of scientific inquiry, including network dynamics, manipulation and control of brain networks, and integration of network processes across spatiotemporal domains. We review emerging trends in network neuroscience and attempt to chart a path toward a better understanding of the brain as a multiscale networked system. + +## Your Response + +Analyze how this inspiration paper's concepts can advance your solution: + +1. **If starting from scratch** (no previous hypothesis): + - Identify how the inspiration addresses the core gaps in existing methods + - This becomes your first conceptual building block beyond the baseline approach + +2. **If building on a previous hypothesis**: + - First understand what the previous hypothesis already accomplishes + - Identify remaining limitations or opportunities for enhancement + - Determine how this new inspiration specifically addresses those gaps + +Show your reasoning process as you: +- Extract relevant concepts from the inspiration paper (may not be obvious - reason through what could be useful) +- Identify what specific technique/knowledge could serve as the inspiration +- Connect this inspiration to your problem: Why is this relevant? How does it address gaps? +- Work through adaptation: How to modify this concept for your specific context? +- Detail the motivation, mechanism, and methodology for THIS inspiration's contribution +- Reason through implementation details + +Then formulate a delta hypothesis that captures ONLY what THIS inspiration adds. + +## Output Format + +**IMPORTANT**: Structure your response exactly as follows: + + +[Your reasoning process here - explore all aspects thoroughly] + + +**Delta Hypothesis starts:** +Inspiration: [Key concept derived from or inspired by the inspiration paper] +- Motivation (WHY): [Why this addresses a gap - what specific limitation does it solve?] +- Mechanism (HOW IT WORKS): [How the concept works in this context] +- Methodology (HOW IT'S INTEGRATED): [How to integrate it - specific implementation steps] +**Delta Hypothesis ends** + +⚠️ CRITICAL: The delta hypothesis is the ONLY part that gets evaluated! +- Include ALL components you FINALIZED in your reasoning (not early ideas you later revised) +- Be COMPREHENSIVE - every technical detail, mechanism, and methodology step you reasoned through should appear in the delta hypothesis +- Don't assume the reader saw your reasoning - the delta hypothesis must be SELF-CONTAINED and COMPLETE +- Focus on THIS inspiration's contribution only - don't repeat previous hypothesis content +```` + +**Expected output format:** +``` + +[reasoning] + + +**Delta Hypothesis starts:** +Inspiration: [...] +- Motivation (WHY): [...] +- Mechanism (HOW IT WORKS): [...] +- Methodology (HOW IT'S INTEGRATED): [...] +**Delta Hypothesis ends** +``` + +
+ diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..9ec97a5 --- /dev/null +++ b/all_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.0, + "total_flos": 2898754626256896.0, + "train_loss": 0.5475362745847836, + "train_runtime": 128435.5344, + "train_samples_per_second": 2.061, + "train_steps_per_second": 0.016 +} \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..c2066bd --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1 @@ +{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\n'}}{% endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..26706dc --- /dev/null +++ b/config.json @@ -0,0 +1,60 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151646, + "dtype": "bfloat16", + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 18944, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 28, + "num_hidden_layers": 28, + "num_key_value_heads": 4, + "pad_token_id": 151643, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 10000, + "sliding_window": null, + "tie_word_embeddings": false, + "transformers_version": "4.57.1", + "use_cache": false, + "use_mrope": false, + "use_sliding_window": false, + "vocab_size": 152064 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..acaf452 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "_from_model_config": true, + "bos_token_id": 151646, + "do_sample": true, + "eos_token_id": [ + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_p": 0.95, + "transformers_version": "4.57.1" +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..44f95fa --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86079d8ce268ca4ca151417205b63e9936c0f7926bdca79faeff12b771bf5f81 +size 4877660776 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..cdda50f --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b17ac4e9f2242b1f26811fd2e07651c8d7598692670cbe51a3b877d0ade458c +size 4932751008 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..5a82db1 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eafc5bf3bbd927c9b4555ced464a54fda8b76e93ba458583447222b498263325 +size 4330865200 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..6578dc3 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892a13d341bbf95e623b0ec44a575288fd514dd817930de230b7d57284190b06 +size 1089994880 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..098a4a8 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,347 @@ +{ + "metadata": { + "total_parameters": 333312, + "total_size": 15231233024 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00003-of-00004.safetensors" + } +} diff --git a/runs/Apr01_17-59-24_dsw-396638-5b5d784bd8-z9vqr/events.out.tfevents.1775037798.dsw-396638-5b5d784bd8-z9vqr.6789.0 b/runs/Apr01_17-59-24_dsw-396638-5b5d784bd8-z9vqr/events.out.tfevents.1775037798.dsw-396638-5b5d784bd8-z9vqr.6789.0 new file mode 100644 index 0000000..55a3bb8 --- /dev/null +++ b/runs/Apr01_17-59-24_dsw-396638-5b5d784bd8-z9vqr/events.out.tfevents.1775037798.dsw-396638-5b5d784bd8-z9vqr.6789.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f38efda3aca9df73c543b9077d37f1fe09a2b5216853b56eea34f480ec7ad3a0 +size 442679 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..1d385d6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1a2db24 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..a207bd5 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,197 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "padding_side": "right", + "sp_model_kwargs": {}, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizerFast", + "unk_token": null, + "use_default_system_prompt": false, + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>\\n'}}{% endif %}" +} \ No newline at end of file diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..9ec97a5 --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 1.0, + "total_flos": 2898754626256896.0, + "train_loss": 0.5475362745847836, + "train_runtime": 128435.5344, + "train_samples_per_second": 2.061, + "train_steps_per_second": 0.016 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..d2f6410 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,2070 @@ +{"current_steps": 1, "total_steps": 2069, "loss": 0.9924, "lr": 0.0, "epoch": 0.00048344210780759005, "percentage": 0.05, "elapsed_time": "0:01:00", "remaining_time": "1 day, 10:47:08"} +{"current_steps": 2, "total_steps": 2069, "loss": 0.9738, "lr": 9.615384615384617e-08, "epoch": 0.0009668842156151801, "percentage": 0.1, "elapsed_time": "0:02:04", "remaining_time": "1 day, 11:42:45"} +{"current_steps": 3, "total_steps": 2069, "loss": 0.9588, "lr": 1.9230769230769234e-07, "epoch": 0.0014503263234227702, "percentage": 0.14, "elapsed_time": "0:03:05", "remaining_time": "1 day, 11:33:51"} +{"current_steps": 4, "total_steps": 2069, "loss": 0.9862, "lr": 2.884615384615385e-07, "epoch": 0.0019337684312303602, "percentage": 0.19, "elapsed_time": "0:04:05", "remaining_time": "1 day, 11:10:11"} +{"current_steps": 5, "total_steps": 2069, "loss": 0.9758, "lr": 3.846153846153847e-07, "epoch": 0.00241721053903795, "percentage": 0.24, "elapsed_time": "0:05:05", "remaining_time": "1 day, 10:59:48"} +{"current_steps": 6, "total_steps": 2069, "loss": 0.9716, "lr": 4.807692307692308e-07, "epoch": 0.0029006526468455403, "percentage": 0.29, "elapsed_time": "0:06:05", "remaining_time": "1 day, 10:55:01"} +{"current_steps": 7, "total_steps": 2069, "loss": 0.976, "lr": 5.76923076923077e-07, "epoch": 0.00338409475465313, "percentage": 0.34, "elapsed_time": "0:07:02", "remaining_time": "1 day, 10:35:50"} +{"current_steps": 8, "total_steps": 2069, "loss": 0.9873, "lr": 6.730769230769231e-07, "epoch": 0.0038675368624607204, "percentage": 0.39, "elapsed_time": "0:08:04", "remaining_time": "1 day, 10:38:20"} +{"current_steps": 9, "total_steps": 2069, "loss": 0.9286, "lr": 7.692307692307694e-07, "epoch": 0.00435097897026831, "percentage": 0.43, "elapsed_time": "0:09:07", "remaining_time": "1 day, 10:47:27"} +{"current_steps": 10, "total_steps": 2069, "loss": 0.9845, "lr": 8.653846153846154e-07, "epoch": 0.0048344210780759, "percentage": 0.48, "elapsed_time": "0:10:08", "remaining_time": "1 day, 10:46:57"} +{"current_steps": 11, "total_steps": 2069, "loss": 0.95, "lr": 9.615384615384617e-07, "epoch": 0.005317863185883491, "percentage": 0.53, "elapsed_time": "0:11:14", "remaining_time": "1 day, 11:03:28"} +{"current_steps": 12, "total_steps": 2069, "loss": 0.9715, "lr": 1.0576923076923078e-06, "epoch": 0.005801305293691081, "percentage": 0.58, "elapsed_time": "0:12:16", "remaining_time": "1 day, 11:02:59"} +{"current_steps": 13, "total_steps": 2069, "loss": 0.9422, "lr": 1.153846153846154e-06, "epoch": 0.0062847474014986705, "percentage": 0.63, "elapsed_time": "0:13:21", "remaining_time": "1 day, 11:13:48"} +{"current_steps": 14, "total_steps": 2069, "loss": 0.965, "lr": 1.25e-06, "epoch": 0.00676818950930626, "percentage": 0.68, "elapsed_time": "0:14:26", "remaining_time": "1 day, 11:20:06"} +{"current_steps": 15, "total_steps": 2069, "loss": 0.9372, "lr": 1.3461538461538462e-06, "epoch": 0.007251631617113851, "percentage": 0.72, "elapsed_time": "0:15:30", "remaining_time": "1 day, 11:23:46"} +{"current_steps": 16, "total_steps": 2069, "loss": 0.9443, "lr": 1.4423076923076922e-06, "epoch": 0.007735073724921441, "percentage": 0.77, "elapsed_time": "0:16:33", "remaining_time": "1 day, 11:23:36"} +{"current_steps": 17, "total_steps": 2069, "loss": 0.9362, "lr": 1.5384615384615387e-06, "epoch": 0.00821851583272903, "percentage": 0.82, "elapsed_time": "0:17:37", "remaining_time": "1 day, 11:26:56"} +{"current_steps": 18, "total_steps": 2069, "loss": 0.9298, "lr": 1.6346153846153848e-06, "epoch": 0.00870195794053662, "percentage": 0.87, "elapsed_time": "0:18:35", "remaining_time": "1 day, 11:18:25"} +{"current_steps": 19, "total_steps": 2069, "loss": 0.9336, "lr": 1.7307692307692308e-06, "epoch": 0.00918540004834421, "percentage": 0.92, "elapsed_time": "0:19:38", "remaining_time": "1 day, 11:20:06"} +{"current_steps": 20, "total_steps": 2069, "loss": 0.9055, "lr": 1.826923076923077e-06, "epoch": 0.0096688421561518, "percentage": 0.97, "elapsed_time": "0:20:39", "remaining_time": "1 day, 11:16:47"} +{"current_steps": 21, "total_steps": 2069, "loss": 0.8962, "lr": 1.9230769230769234e-06, "epoch": 0.01015228426395939, "percentage": 1.01, "elapsed_time": "0:21:45", "remaining_time": "1 day, 11:21:51"} +{"current_steps": 22, "total_steps": 2069, "loss": 0.8702, "lr": 2.0192307692307692e-06, "epoch": 0.010635726371766982, "percentage": 1.06, "elapsed_time": "0:22:46", "remaining_time": "1 day, 11:19:08"} +{"current_steps": 23, "total_steps": 2069, "loss": 0.8816, "lr": 2.1153846153846155e-06, "epoch": 0.011119168479574571, "percentage": 1.11, "elapsed_time": "0:23:51", "remaining_time": "1 day, 11:23:04"} +{"current_steps": 24, "total_steps": 2069, "loss": 0.8699, "lr": 2.211538461538462e-06, "epoch": 0.011602610587382161, "percentage": 1.16, "elapsed_time": "0:24:53", "remaining_time": "1 day, 11:20:16"} +{"current_steps": 25, "total_steps": 2069, "loss": 0.8669, "lr": 2.307692307692308e-06, "epoch": 0.012086052695189751, "percentage": 1.21, "elapsed_time": "0:25:55", "remaining_time": "1 day, 11:19:32"} +{"current_steps": 26, "total_steps": 2069, "loss": 0.8371, "lr": 2.403846153846154e-06, "epoch": 0.012569494802997341, "percentage": 1.26, "elapsed_time": "0:26:59", "remaining_time": "1 day, 11:20:58"} +{"current_steps": 27, "total_steps": 2069, "loss": 0.8388, "lr": 2.5e-06, "epoch": 0.01305293691080493, "percentage": 1.3, "elapsed_time": "0:27:59", "remaining_time": "1 day, 11:17:33"} +{"current_steps": 28, "total_steps": 2069, "loss": 0.8041, "lr": 2.5961538461538465e-06, "epoch": 0.01353637901861252, "percentage": 1.35, "elapsed_time": "0:28:58", "remaining_time": "1 day, 11:12:26"} +{"current_steps": 29, "total_steps": 2069, "loss": 0.8091, "lr": 2.6923076923076923e-06, "epoch": 0.01401982112642011, "percentage": 1.4, "elapsed_time": "0:29:59", "remaining_time": "1 day, 11:09:29"} +{"current_steps": 30, "total_steps": 2069, "loss": 0.7749, "lr": 2.7884615384615386e-06, "epoch": 0.014503263234227702, "percentage": 1.45, "elapsed_time": "0:31:03", "remaining_time": "1 day, 11:11:28"} +{"current_steps": 31, "total_steps": 2069, "loss": 0.8144, "lr": 2.8846153846153845e-06, "epoch": 0.014986705342035292, "percentage": 1.5, "elapsed_time": "0:32:05", "remaining_time": "1 day, 11:09:16"} +{"current_steps": 32, "total_steps": 2069, "loss": 0.7931, "lr": 2.980769230769231e-06, "epoch": 0.015470147449842882, "percentage": 1.55, "elapsed_time": "0:33:04", "remaining_time": "1 day, 11:05:36"} +{"current_steps": 33, "total_steps": 2069, "loss": 0.7903, "lr": 3.0769230769230774e-06, "epoch": 0.01595358955765047, "percentage": 1.59, "elapsed_time": "0:34:05", "remaining_time": "1 day, 11:03:32"} +{"current_steps": 34, "total_steps": 2069, "loss": 0.7256, "lr": 3.1730769230769233e-06, "epoch": 0.01643703166545806, "percentage": 1.64, "elapsed_time": "0:35:13", "remaining_time": "1 day, 11:08:08"} +{"current_steps": 35, "total_steps": 2069, "loss": 0.7956, "lr": 3.2692307692307696e-06, "epoch": 0.01692047377326565, "percentage": 1.69, "elapsed_time": "0:36:14", "remaining_time": "1 day, 11:05:48"} +{"current_steps": 36, "total_steps": 2069, "loss": 0.7478, "lr": 3.365384615384616e-06, "epoch": 0.01740391588107324, "percentage": 1.74, "elapsed_time": "0:37:17", "remaining_time": "1 day, 11:06:03"} +{"current_steps": 37, "total_steps": 2069, "loss": 0.7621, "lr": 3.4615384615384617e-06, "epoch": 0.01788735798888083, "percentage": 1.79, "elapsed_time": "0:38:17", "remaining_time": "1 day, 11:03:02"} +{"current_steps": 38, "total_steps": 2069, "loss": 0.7706, "lr": 3.557692307692308e-06, "epoch": 0.01837080009668842, "percentage": 1.84, "elapsed_time": "0:39:16", "remaining_time": "1 day, 10:59:29"} +{"current_steps": 39, "total_steps": 2069, "loss": 0.7237, "lr": 3.653846153846154e-06, "epoch": 0.01885424220449601, "percentage": 1.88, "elapsed_time": "0:40:15", "remaining_time": "1 day, 10:55:21"} +{"current_steps": 40, "total_steps": 2069, "loss": 0.7537, "lr": 3.7500000000000005e-06, "epoch": 0.0193376843123036, "percentage": 1.93, "elapsed_time": "0:41:19", "remaining_time": "1 day, 10:55:57"} +{"current_steps": 41, "total_steps": 2069, "loss": 0.7656, "lr": 3.846153846153847e-06, "epoch": 0.01982112642011119, "percentage": 1.98, "elapsed_time": "0:42:21", "remaining_time": "1 day, 10:55:17"} +{"current_steps": 42, "total_steps": 2069, "loss": 0.7558, "lr": 3.942307692307692e-06, "epoch": 0.02030456852791878, "percentage": 2.03, "elapsed_time": "0:43:21", "remaining_time": "1 day, 10:52:10"} +{"current_steps": 43, "total_steps": 2069, "loss": 0.7408, "lr": 4.0384615384615385e-06, "epoch": 0.020788010635726373, "percentage": 2.08, "elapsed_time": "0:44:21", "remaining_time": "1 day, 10:49:42"} +{"current_steps": 44, "total_steps": 2069, "loss": 0.7482, "lr": 4.134615384615385e-06, "epoch": 0.021271452743533963, "percentage": 2.13, "elapsed_time": "0:45:22", "remaining_time": "1 day, 10:48:33"} +{"current_steps": 45, "total_steps": 2069, "loss": 0.7424, "lr": 4.230769230769231e-06, "epoch": 0.021754894851341553, "percentage": 2.17, "elapsed_time": "0:46:24", "remaining_time": "1 day, 10:47:13"} +{"current_steps": 46, "total_steps": 2069, "loss": 0.7372, "lr": 4.326923076923077e-06, "epoch": 0.022238336959149143, "percentage": 2.22, "elapsed_time": "0:47:22", "remaining_time": "1 day, 10:43:39"} +{"current_steps": 47, "total_steps": 2069, "loss": 0.7362, "lr": 4.423076923076924e-06, "epoch": 0.022721779066956733, "percentage": 2.27, "elapsed_time": "0:48:19", "remaining_time": "1 day, 10:39:00"} +{"current_steps": 48, "total_steps": 2069, "loss": 0.7326, "lr": 4.51923076923077e-06, "epoch": 0.023205221174764323, "percentage": 2.32, "elapsed_time": "0:49:19", "remaining_time": "1 day, 10:37:07"} +{"current_steps": 49, "total_steps": 2069, "loss": 0.7275, "lr": 4.615384615384616e-06, "epoch": 0.023688663282571912, "percentage": 2.37, "elapsed_time": "0:50:18", "remaining_time": "1 day, 10:33:45"} +{"current_steps": 50, "total_steps": 2069, "loss": 0.7311, "lr": 4.711538461538462e-06, "epoch": 0.024172105390379502, "percentage": 2.42, "elapsed_time": "0:51:22", "remaining_time": "1 day, 10:34:41"} +{"current_steps": 51, "total_steps": 2069, "loss": 0.7261, "lr": 4.807692307692308e-06, "epoch": 0.024655547498187092, "percentage": 2.46, "elapsed_time": "0:52:21", "remaining_time": "1 day, 10:31:57"} +{"current_steps": 52, "total_steps": 2069, "loss": 0.7092, "lr": 4.903846153846154e-06, "epoch": 0.025138989605994682, "percentage": 2.51, "elapsed_time": "0:53:18", "remaining_time": "1 day, 10:27:52"} +{"current_steps": 53, "total_steps": 2069, "loss": 0.7093, "lr": 5e-06, "epoch": 0.025622431713802272, "percentage": 2.56, "elapsed_time": "0:54:22", "remaining_time": "1 day, 10:28:12"} +{"current_steps": 54, "total_steps": 2069, "loss": 0.687, "lr": 5.096153846153846e-06, "epoch": 0.02610587382160986, "percentage": 2.61, "elapsed_time": "0:55:27", "remaining_time": "1 day, 10:29:38"} +{"current_steps": 55, "total_steps": 2069, "loss": 0.7143, "lr": 5.192307692307693e-06, "epoch": 0.02658931592941745, "percentage": 2.66, "elapsed_time": "0:56:28", "remaining_time": "1 day, 10:27:45"} +{"current_steps": 56, "total_steps": 2069, "loss": 0.7059, "lr": 5.288461538461539e-06, "epoch": 0.02707275803722504, "percentage": 2.71, "elapsed_time": "0:57:27", "remaining_time": "1 day, 10:25:15"} +{"current_steps": 57, "total_steps": 2069, "loss": 0.704, "lr": 5.384615384615385e-06, "epoch": 0.02755620014503263, "percentage": 2.75, "elapsed_time": "0:58:29", "remaining_time": "1 day, 10:24:44"} +{"current_steps": 58, "total_steps": 2069, "loss": 0.7169, "lr": 5.480769230769232e-06, "epoch": 0.02803964225284022, "percentage": 2.8, "elapsed_time": "0:59:30", "remaining_time": "1 day, 10:23:13"} +{"current_steps": 59, "total_steps": 2069, "loss": 0.7065, "lr": 5.576923076923077e-06, "epoch": 0.02852308436064781, "percentage": 2.85, "elapsed_time": "1:00:33", "remaining_time": "1 day, 10:23:13"} +{"current_steps": 60, "total_steps": 2069, "loss": 0.6373, "lr": 5.6730769230769235e-06, "epoch": 0.029006526468455404, "percentage": 2.9, "elapsed_time": "1:01:39", "remaining_time": "1 day, 10:24:46"} +{"current_steps": 61, "total_steps": 2069, "loss": 0.7018, "lr": 5.769230769230769e-06, "epoch": 0.029489968576262994, "percentage": 2.95, "elapsed_time": "1:02:38", "remaining_time": "1 day, 10:22:07"} +{"current_steps": 62, "total_steps": 2069, "loss": 0.6959, "lr": 5.865384615384616e-06, "epoch": 0.029973410684070584, "percentage": 3.0, "elapsed_time": "1:03:37", "remaining_time": "1 day, 10:19:51"} +{"current_steps": 63, "total_steps": 2069, "loss": 0.6896, "lr": 5.961538461538462e-06, "epoch": 0.030456852791878174, "percentage": 3.04, "elapsed_time": "1:04:39", "remaining_time": "1 day, 10:18:38"} +{"current_steps": 64, "total_steps": 2069, "loss": 0.7014, "lr": 6.057692307692308e-06, "epoch": 0.030940294899685764, "percentage": 3.09, "elapsed_time": "1:05:40", "remaining_time": "1 day, 10:17:42"} +{"current_steps": 65, "total_steps": 2069, "loss": 0.6903, "lr": 6.153846153846155e-06, "epoch": 0.03142373700749335, "percentage": 3.14, "elapsed_time": "1:06:44", "remaining_time": "1 day, 10:17:52"} +{"current_steps": 66, "total_steps": 2069, "loss": 0.6893, "lr": 6.25e-06, "epoch": 0.03190717911530094, "percentage": 3.19, "elapsed_time": "1:07:45", "remaining_time": "1 day, 10:16:08"} +{"current_steps": 67, "total_steps": 2069, "loss": 0.6946, "lr": 6.3461538461538466e-06, "epoch": 0.03239062122310853, "percentage": 3.24, "elapsed_time": "1:08:41", "remaining_time": "1 day, 10:12:32"} +{"current_steps": 68, "total_steps": 2069, "loss": 0.6788, "lr": 6.442307692307693e-06, "epoch": 0.03287406333091612, "percentage": 3.29, "elapsed_time": "1:09:45", "remaining_time": "1 day, 10:12:44"} +{"current_steps": 69, "total_steps": 2069, "loss": 0.6847, "lr": 6.538461538461539e-06, "epoch": 0.03335750543872371, "percentage": 3.33, "elapsed_time": "1:10:47", "remaining_time": "1 day, 10:12:08"} +{"current_steps": 70, "total_steps": 2069, "loss": 0.7086, "lr": 6.6346153846153846e-06, "epoch": 0.0338409475465313, "percentage": 3.38, "elapsed_time": "1:11:44", "remaining_time": "1 day, 10:08:57"} +{"current_steps": 71, "total_steps": 2069, "loss": 0.6724, "lr": 6.730769230769232e-06, "epoch": 0.03432438965433889, "percentage": 3.43, "elapsed_time": "1:12:45", "remaining_time": "1 day, 10:07:35"} +{"current_steps": 72, "total_steps": 2069, "loss": 0.6592, "lr": 6.826923076923078e-06, "epoch": 0.03480783176214648, "percentage": 3.48, "elapsed_time": "1:13:51", "remaining_time": "1 day, 10:08:23"} +{"current_steps": 73, "total_steps": 2069, "loss": 0.6674, "lr": 6.923076923076923e-06, "epoch": 0.03529127386995407, "percentage": 3.53, "elapsed_time": "1:14:49", "remaining_time": "1 day, 10:05:51"} +{"current_steps": 74, "total_steps": 2069, "loss": 0.6665, "lr": 7.01923076923077e-06, "epoch": 0.03577471597776166, "percentage": 3.58, "elapsed_time": "1:15:50", "remaining_time": "1 day, 10:04:32"} +{"current_steps": 75, "total_steps": 2069, "loss": 0.6685, "lr": 7.115384615384616e-06, "epoch": 0.03625815808556925, "percentage": 3.62, "elapsed_time": "1:16:47", "remaining_time": "1 day, 10:01:34"} +{"current_steps": 76, "total_steps": 2069, "loss": 0.6826, "lr": 7.211538461538462e-06, "epoch": 0.03674160019337684, "percentage": 3.67, "elapsed_time": "1:17:46", "remaining_time": "1 day, 9:59:26"} +{"current_steps": 77, "total_steps": 2069, "loss": 0.6811, "lr": 7.307692307692308e-06, "epoch": 0.03722504230118443, "percentage": 3.72, "elapsed_time": "1:18:48", "remaining_time": "1 day, 9:58:49"} +{"current_steps": 78, "total_steps": 2069, "loss": 0.6752, "lr": 7.403846153846155e-06, "epoch": 0.03770848440899202, "percentage": 3.77, "elapsed_time": "1:19:47", "remaining_time": "1 day, 9:56:43"} +{"current_steps": 79, "total_steps": 2069, "loss": 0.657, "lr": 7.500000000000001e-06, "epoch": 0.03819192651679961, "percentage": 3.82, "elapsed_time": "1:20:48", "remaining_time": "1 day, 9:55:39"} +{"current_steps": 80, "total_steps": 2069, "loss": 0.6326, "lr": 7.5961538461538465e-06, "epoch": 0.0386753686246072, "percentage": 3.87, "elapsed_time": "1:21:47", "remaining_time": "1 day, 9:53:42"} +{"current_steps": 81, "total_steps": 2069, "loss": 0.6577, "lr": 7.692307692307694e-06, "epoch": 0.03915881073241479, "percentage": 3.91, "elapsed_time": "1:22:47", "remaining_time": "1 day, 9:51:49"} +{"current_steps": 82, "total_steps": 2069, "loss": 0.6803, "lr": 7.78846153846154e-06, "epoch": 0.03964225284022238, "percentage": 3.96, "elapsed_time": "1:23:48", "remaining_time": "1 day, 9:50:47"} +{"current_steps": 83, "total_steps": 2069, "loss": 0.662, "lr": 7.884615384615384e-06, "epoch": 0.04012569494802997, "percentage": 4.01, "elapsed_time": "1:24:50", "remaining_time": "1 day, 9:49:53"} +{"current_steps": 84, "total_steps": 2069, "loss": 0.6784, "lr": 7.980769230769232e-06, "epoch": 0.04060913705583756, "percentage": 4.06, "elapsed_time": "1:25:51", "remaining_time": "1 day, 9:49:01"} +{"current_steps": 85, "total_steps": 2069, "loss": 0.663, "lr": 8.076923076923077e-06, "epoch": 0.04109257916364515, "percentage": 4.11, "elapsed_time": "1:26:54", "remaining_time": "1 day, 9:48:31"} +{"current_steps": 86, "total_steps": 2069, "loss": 0.6633, "lr": 8.173076923076923e-06, "epoch": 0.04157602127145275, "percentage": 4.16, "elapsed_time": "1:27:58", "remaining_time": "1 day, 9:48:34"} +{"current_steps": 87, "total_steps": 2069, "loss": 0.6345, "lr": 8.26923076923077e-06, "epoch": 0.04205946337926034, "percentage": 4.2, "elapsed_time": "1:29:01", "remaining_time": "1 day, 9:47:58"} +{"current_steps": 88, "total_steps": 2069, "loss": 0.661, "lr": 8.365384615384616e-06, "epoch": 0.04254290548706793, "percentage": 4.25, "elapsed_time": "1:30:04", "remaining_time": "1 day, 9:47:45"} +{"current_steps": 89, "total_steps": 2069, "loss": 0.6476, "lr": 8.461538461538462e-06, "epoch": 0.043026347594875516, "percentage": 4.3, "elapsed_time": "1:31:06", "remaining_time": "1 day, 9:46:45"} +{"current_steps": 90, "total_steps": 2069, "loss": 0.6667, "lr": 8.557692307692308e-06, "epoch": 0.043509789702683106, "percentage": 4.35, "elapsed_time": "1:32:08", "remaining_time": "1 day, 9:46:13"} +{"current_steps": 91, "total_steps": 2069, "loss": 0.6558, "lr": 8.653846153846155e-06, "epoch": 0.043993231810490696, "percentage": 4.4, "elapsed_time": "1:33:11", "remaining_time": "1 day, 9:45:33"} +{"current_steps": 92, "total_steps": 2069, "loss": 0.6662, "lr": 8.750000000000001e-06, "epoch": 0.044476673918298286, "percentage": 4.45, "elapsed_time": "1:34:11", "remaining_time": "1 day, 9:44:15"} +{"current_steps": 93, "total_steps": 2069, "loss": 0.6768, "lr": 8.846153846153847e-06, "epoch": 0.044960116026105876, "percentage": 4.49, "elapsed_time": "1:35:13", "remaining_time": "1 day, 9:43:21"} +{"current_steps": 94, "total_steps": 2069, "loss": 0.633, "lr": 8.942307692307693e-06, "epoch": 0.045443558133913466, "percentage": 4.54, "elapsed_time": "1:36:18", "remaining_time": "1 day, 9:43:27"} +{"current_steps": 95, "total_steps": 2069, "loss": 0.6075, "lr": 9.03846153846154e-06, "epoch": 0.045927000241721055, "percentage": 4.59, "elapsed_time": "1:37:24", "remaining_time": "1 day, 9:44:09"} +{"current_steps": 96, "total_steps": 2069, "loss": 0.623, "lr": 9.134615384615384e-06, "epoch": 0.046410442349528645, "percentage": 4.64, "elapsed_time": "1:38:29", "remaining_time": "1 day, 9:44:14"} +{"current_steps": 97, "total_steps": 2069, "loss": 0.6556, "lr": 9.230769230769232e-06, "epoch": 0.046893884457336235, "percentage": 4.69, "elapsed_time": "1:39:29", "remaining_time": "1 day, 9:42:38"} +{"current_steps": 98, "total_steps": 2069, "loss": 0.6524, "lr": 9.326923076923079e-06, "epoch": 0.047377326565143825, "percentage": 4.74, "elapsed_time": "1:40:28", "remaining_time": "1 day, 9:40:53"} +{"current_steps": 99, "total_steps": 2069, "loss": 0.6449, "lr": 9.423076923076923e-06, "epoch": 0.047860768672951415, "percentage": 4.78, "elapsed_time": "1:41:29", "remaining_time": "1 day, 9:39:30"} +{"current_steps": 100, "total_steps": 2069, "loss": 0.6517, "lr": 9.51923076923077e-06, "epoch": 0.048344210780759005, "percentage": 4.83, "elapsed_time": "1:42:32", "remaining_time": "1 day, 9:39:09"} +{"current_steps": 101, "total_steps": 2069, "loss": 0.636, "lr": 9.615384615384616e-06, "epoch": 0.048827652888566594, "percentage": 4.88, "elapsed_time": "1:43:33", "remaining_time": "1 day, 9:37:51"} +{"current_steps": 102, "total_steps": 2069, "loss": 0.6569, "lr": 9.711538461538462e-06, "epoch": 0.049311094996374184, "percentage": 4.93, "elapsed_time": "1:44:33", "remaining_time": "1 day, 9:36:20"} +{"current_steps": 103, "total_steps": 2069, "loss": 0.6515, "lr": 9.807692307692308e-06, "epoch": 0.049794537104181774, "percentage": 4.98, "elapsed_time": "1:45:31", "remaining_time": "1 day, 9:34:12"} +{"current_steps": 104, "total_steps": 2069, "loss": 0.6471, "lr": 9.903846153846155e-06, "epoch": 0.050277979211989364, "percentage": 5.03, "elapsed_time": "1:46:34", "remaining_time": "1 day, 9:33:34"} +{"current_steps": 105, "total_steps": 2069, "loss": 0.6212, "lr": 1e-05, "epoch": 0.050761421319796954, "percentage": 5.07, "elapsed_time": "1:47:39", "remaining_time": "1 day, 9:33:43"} +{"current_steps": 106, "total_steps": 2069, "loss": 0.6483, "lr": 9.99999360979851e-06, "epoch": 0.051244863427604544, "percentage": 5.12, "elapsed_time": "1:48:41", "remaining_time": "1 day, 9:32:47"} +{"current_steps": 107, "total_steps": 2069, "loss": 0.6474, "lr": 9.999974439210376e-06, "epoch": 0.051728305535412133, "percentage": 5.17, "elapsed_time": "1:49:40", "remaining_time": "1 day, 9:31:06"} +{"current_steps": 108, "total_steps": 2069, "loss": 0.6506, "lr": 9.999942488284598e-06, "epoch": 0.05221174764321972, "percentage": 5.22, "elapsed_time": "1:50:43", "remaining_time": "1 day, 9:30:22"} +{"current_steps": 109, "total_steps": 2069, "loss": 0.641, "lr": 9.999897757102843e-06, "epoch": 0.05269518975102731, "percentage": 5.27, "elapsed_time": "1:51:43", "remaining_time": "1 day, 9:28:58"} +{"current_steps": 110, "total_steps": 2069, "loss": 0.6561, "lr": 9.99984024577945e-06, "epoch": 0.0531786318588349, "percentage": 5.32, "elapsed_time": "1:52:47", "remaining_time": "1 day, 9:28:34"} +{"current_steps": 111, "total_steps": 2069, "loss": 0.6181, "lr": 9.999769954461425e-06, "epoch": 0.05366207396664249, "percentage": 5.36, "elapsed_time": "1:53:42", "remaining_time": "1 day, 9:25:39"} +{"current_steps": 112, "total_steps": 2069, "loss": 0.6269, "lr": 9.999686883328433e-06, "epoch": 0.05414551607445008, "percentage": 5.41, "elapsed_time": "1:54:39", "remaining_time": "1 day, 9:23:34"} +{"current_steps": 113, "total_steps": 2069, "loss": 0.6317, "lr": 9.999591032592813e-06, "epoch": 0.05462895818225767, "percentage": 5.46, "elapsed_time": "1:55:45", "remaining_time": "1 day, 9:23:45"} +{"current_steps": 114, "total_steps": 2069, "loss": 0.6468, "lr": 9.999482402499569e-06, "epoch": 0.05511240029006526, "percentage": 5.51, "elapsed_time": "1:56:45", "remaining_time": "1 day, 9:22:22"} +{"current_steps": 115, "total_steps": 2069, "loss": 0.6359, "lr": 9.999360993326366e-06, "epoch": 0.05559584239787285, "percentage": 5.56, "elapsed_time": "1:57:48", "remaining_time": "1 day, 9:21:39"} +{"current_steps": 116, "total_steps": 2069, "loss": 0.6349, "lr": 9.999226805383534e-06, "epoch": 0.05607928450568044, "percentage": 5.61, "elapsed_time": "1:58:52", "remaining_time": "1 day, 9:21:18"} +{"current_steps": 117, "total_steps": 2069, "loss": 0.6399, "lr": 9.999079839014074e-06, "epoch": 0.05656272661348803, "percentage": 5.65, "elapsed_time": "1:59:54", "remaining_time": "1 day, 9:20:35"} +{"current_steps": 118, "total_steps": 2069, "loss": 0.5984, "lr": 9.998920094593637e-06, "epoch": 0.05704616872129562, "percentage": 5.7, "elapsed_time": "2:01:03", "remaining_time": "1 day, 9:21:32"} +{"current_steps": 119, "total_steps": 2069, "loss": 0.6398, "lr": 9.998747572530548e-06, "epoch": 0.05752961082910321, "percentage": 5.75, "elapsed_time": "2:02:05", "remaining_time": "1 day, 9:20:33"} +{"current_steps": 120, "total_steps": 2069, "loss": 0.626, "lr": 9.998562273265786e-06, "epoch": 0.05801305293691081, "percentage": 5.8, "elapsed_time": "2:03:07", "remaining_time": "1 day, 9:19:46"} +{"current_steps": 121, "total_steps": 2069, "loss": 0.6537, "lr": 9.998364197272988e-06, "epoch": 0.0584964950447184, "percentage": 5.85, "elapsed_time": "2:04:07", "remaining_time": "1 day, 9:18:20"} +{"current_steps": 122, "total_steps": 2069, "loss": 0.9475, "lr": 9.998153345058454e-06, "epoch": 0.05897993715252599, "percentage": 5.9, "elapsed_time": "2:05:13", "remaining_time": "1 day, 9:18:29"} +{"current_steps": 123, "total_steps": 2069, "loss": 0.6473, "lr": 9.997929717161142e-06, "epoch": 0.05946337926033358, "percentage": 5.94, "elapsed_time": "2:06:16", "remaining_time": "1 day, 9:17:49"} +{"current_steps": 124, "total_steps": 2069, "loss": 0.6342, "lr": 9.997693314152658e-06, "epoch": 0.05994682136814117, "percentage": 5.99, "elapsed_time": "2:07:15", "remaining_time": "1 day, 9:16:05"} +{"current_steps": 125, "total_steps": 2069, "loss": 0.623, "lr": 9.99744413663727e-06, "epoch": 0.06043026347594876, "percentage": 6.04, "elapsed_time": "2:08:14", "remaining_time": "1 day, 9:14:31"} +{"current_steps": 126, "total_steps": 2069, "loss": 0.6221, "lr": 9.997182185251896e-06, "epoch": 0.06091370558375635, "percentage": 6.09, "elapsed_time": "2:09:14", "remaining_time": "1 day, 9:12:59"} +{"current_steps": 127, "total_steps": 2069, "loss": 0.6357, "lr": 9.996907460666104e-06, "epoch": 0.06139714769156394, "percentage": 6.14, "elapsed_time": "2:10:13", "remaining_time": "1 day, 9:11:22"} +{"current_steps": 128, "total_steps": 2069, "loss": 0.6043, "lr": 9.996619963582113e-06, "epoch": 0.06188058979937153, "percentage": 6.19, "elapsed_time": "2:11:17", "remaining_time": "1 day, 9:10:51"} +{"current_steps": 129, "total_steps": 2069, "loss": 0.6311, "lr": 9.996319694734787e-06, "epoch": 0.06236403190717912, "percentage": 6.23, "elapsed_time": "2:12:17", "remaining_time": "1 day, 9:09:29"} +{"current_steps": 130, "total_steps": 2069, "loss": 0.6411, "lr": 9.99600665489164e-06, "epoch": 0.0628474740149867, "percentage": 6.28, "elapsed_time": "2:13:16", "remaining_time": "1 day, 9:07:54"} +{"current_steps": 131, "total_steps": 2069, "loss": 0.6403, "lr": 9.995680844852824e-06, "epoch": 0.06333091612279429, "percentage": 6.33, "elapsed_time": "2:14:21", "remaining_time": "1 day, 9:07:48"} +{"current_steps": 132, "total_steps": 2069, "loss": 0.6269, "lr": 9.995342265451138e-06, "epoch": 0.06381435823060189, "percentage": 6.38, "elapsed_time": "2:15:24", "remaining_time": "1 day, 9:06:59"} +{"current_steps": 133, "total_steps": 2069, "loss": 0.6321, "lr": 9.994990917552017e-06, "epoch": 0.06429780033840947, "percentage": 6.43, "elapsed_time": "2:16:24", "remaining_time": "1 day, 9:05:36"} +{"current_steps": 134, "total_steps": 2069, "loss": 0.6236, "lr": 9.994626802053536e-06, "epoch": 0.06478124244621707, "percentage": 6.48, "elapsed_time": "2:17:24", "remaining_time": "1 day, 9:04:08"} +{"current_steps": 135, "total_steps": 2069, "loss": 0.6258, "lr": 9.994249919886402e-06, "epoch": 0.06526468455402465, "percentage": 6.52, "elapsed_time": "2:18:26", "remaining_time": "1 day, 9:03:15"} +{"current_steps": 136, "total_steps": 2069, "loss": 0.6162, "lr": 9.993860272013958e-06, "epoch": 0.06574812666183225, "percentage": 6.57, "elapsed_time": "2:19:24", "remaining_time": "1 day, 9:01:28"} +{"current_steps": 137, "total_steps": 2069, "loss": 0.6261, "lr": 9.993457859432172e-06, "epoch": 0.06623156876963984, "percentage": 6.62, "elapsed_time": "2:20:28", "remaining_time": "1 day, 9:01:01"} +{"current_steps": 138, "total_steps": 2069, "loss": 0.6371, "lr": 9.993042683169647e-06, "epoch": 0.06671501087744743, "percentage": 6.67, "elapsed_time": "2:21:29", "remaining_time": "1 day, 8:59:49"} +{"current_steps": 139, "total_steps": 2069, "loss": 0.6275, "lr": 9.992614744287605e-06, "epoch": 0.06719845298525502, "percentage": 6.72, "elapsed_time": "2:22:33", "remaining_time": "1 day, 8:59:20"} +{"current_steps": 140, "total_steps": 2069, "loss": 0.6175, "lr": 9.992174043879893e-06, "epoch": 0.0676818950930626, "percentage": 6.77, "elapsed_time": "2:23:36", "remaining_time": "1 day, 8:58:37"} +{"current_steps": 141, "total_steps": 2069, "loss": 0.6255, "lr": 9.991720583072975e-06, "epoch": 0.0681653372008702, "percentage": 6.81, "elapsed_time": "2:24:34", "remaining_time": "1 day, 8:56:48"} +{"current_steps": 142, "total_steps": 2069, "loss": 0.6257, "lr": 9.991254363025935e-06, "epoch": 0.06864877930867778, "percentage": 6.86, "elapsed_time": "2:25:36", "remaining_time": "1 day, 8:55:57"} +{"current_steps": 143, "total_steps": 2069, "loss": 0.6301, "lr": 9.99077538493047e-06, "epoch": 0.06913222141648538, "percentage": 6.91, "elapsed_time": "2:26:37", "remaining_time": "1 day, 8:54:52"} +{"current_steps": 144, "total_steps": 2069, "loss": 0.619, "lr": 9.990283650010883e-06, "epoch": 0.06961566352429296, "percentage": 6.96, "elapsed_time": "2:27:40", "remaining_time": "1 day, 8:54:13"} +{"current_steps": 145, "total_steps": 2069, "loss": 0.5818, "lr": 9.989779159524091e-06, "epoch": 0.07009910563210056, "percentage": 7.01, "elapsed_time": "2:28:42", "remaining_time": "1 day, 8:53:17"} +{"current_steps": 146, "total_steps": 2069, "loss": 0.6105, "lr": 9.989261914759612e-06, "epoch": 0.07058254773990814, "percentage": 7.06, "elapsed_time": "2:29:48", "remaining_time": "1 day, 8:53:13"} +{"current_steps": 147, "total_steps": 2069, "loss": 0.6154, "lr": 9.988731917039564e-06, "epoch": 0.07106598984771574, "percentage": 7.1, "elapsed_time": "2:30:47", "remaining_time": "1 day, 8:51:30"} +{"current_steps": 148, "total_steps": 2069, "loss": 0.5533, "lr": 9.988189167718665e-06, "epoch": 0.07154943195552332, "percentage": 7.15, "elapsed_time": "2:31:52", "remaining_time": "1 day, 8:51:14"} +{"current_steps": 149, "total_steps": 2069, "loss": 0.6281, "lr": 9.987633668184227e-06, "epoch": 0.07203287406333092, "percentage": 7.2, "elapsed_time": "2:32:54", "remaining_time": "1 day, 8:50:24"} +{"current_steps": 150, "total_steps": 2069, "loss": 0.5836, "lr": 9.98706541985615e-06, "epoch": 0.0725163161711385, "percentage": 7.25, "elapsed_time": "2:33:58", "remaining_time": "1 day, 8:49:55"} +{"current_steps": 151, "total_steps": 2069, "loss": 0.6246, "lr": 9.986484424186922e-06, "epoch": 0.0729997582789461, "percentage": 7.3, "elapsed_time": "2:35:03", "remaining_time": "1 day, 8:49:34"} +{"current_steps": 152, "total_steps": 2069, "loss": 0.6038, "lr": 9.985890682661616e-06, "epoch": 0.07348320038675368, "percentage": 7.35, "elapsed_time": "2:36:05", "remaining_time": "1 day, 8:48:39"} +{"current_steps": 153, "total_steps": 2069, "loss": 0.6246, "lr": 9.985284196797884e-06, "epoch": 0.07396664249456128, "percentage": 7.39, "elapsed_time": "2:37:08", "remaining_time": "1 day, 8:47:55"} +{"current_steps": 154, "total_steps": 2069, "loss": 0.6318, "lr": 9.984664968145953e-06, "epoch": 0.07445008460236886, "percentage": 7.44, "elapsed_time": "2:38:08", "remaining_time": "1 day, 8:46:33"} +{"current_steps": 155, "total_steps": 2069, "loss": 0.6184, "lr": 9.984032998288617e-06, "epoch": 0.07493352671017646, "percentage": 7.49, "elapsed_time": "2:39:12", "remaining_time": "1 day, 8:45:52"} +{"current_steps": 156, "total_steps": 2069, "loss": 0.6185, "lr": 9.983388288841246e-06, "epoch": 0.07541696881798404, "percentage": 7.54, "elapsed_time": "2:40:10", "remaining_time": "1 day, 8:44:15"} +{"current_steps": 157, "total_steps": 2069, "loss": 0.625, "lr": 9.982730841451768e-06, "epoch": 0.07590041092579164, "percentage": 7.59, "elapsed_time": "2:41:13", "remaining_time": "1 day, 8:43:26"} +{"current_steps": 158, "total_steps": 2069, "loss": 0.6183, "lr": 9.982060657800672e-06, "epoch": 0.07638385303359922, "percentage": 7.64, "elapsed_time": "2:42:13", "remaining_time": "1 day, 8:42:02"} +{"current_steps": 159, "total_steps": 2069, "loss": 0.6137, "lr": 9.981377739601002e-06, "epoch": 0.07686729514140682, "percentage": 7.68, "elapsed_time": "2:43:13", "remaining_time": "1 day, 8:40:43"} +{"current_steps": 160, "total_steps": 2069, "loss": 0.6229, "lr": 9.980682088598349e-06, "epoch": 0.0773507372492144, "percentage": 7.73, "elapsed_time": "2:44:14", "remaining_time": "1 day, 8:39:36"} +{"current_steps": 161, "total_steps": 2069, "loss": 0.614, "lr": 9.979973706570856e-06, "epoch": 0.077834179357022, "percentage": 7.78, "elapsed_time": "2:45:15", "remaining_time": "1 day, 8:38:33"} +{"current_steps": 162, "total_steps": 2069, "loss": 0.6222, "lr": 9.979252595329204e-06, "epoch": 0.07831762146482958, "percentage": 7.83, "elapsed_time": "2:46:15", "remaining_time": "1 day, 8:37:12"} +{"current_steps": 163, "total_steps": 2069, "loss": 0.5856, "lr": 9.978518756716611e-06, "epoch": 0.07880106357263718, "percentage": 7.88, "elapsed_time": "2:47:22", "remaining_time": "1 day, 8:37:07"} +{"current_steps": 164, "total_steps": 2069, "loss": 0.6291, "lr": 9.977772192608827e-06, "epoch": 0.07928450568044476, "percentage": 7.93, "elapsed_time": "2:48:25", "remaining_time": "1 day, 8:36:25"} +{"current_steps": 165, "total_steps": 2069, "loss": 0.6149, "lr": 9.977012904914133e-06, "epoch": 0.07976794778825236, "percentage": 7.97, "elapsed_time": "2:49:26", "remaining_time": "1 day, 8:35:19"} +{"current_steps": 166, "total_steps": 2069, "loss": 0.6147, "lr": 9.976240895573326e-06, "epoch": 0.08025138989605994, "percentage": 8.02, "elapsed_time": "2:50:31", "remaining_time": "1 day, 8:34:54"} +{"current_steps": 167, "total_steps": 2069, "loss": 0.6002, "lr": 9.975456166559725e-06, "epoch": 0.08073483200386754, "percentage": 8.07, "elapsed_time": "2:51:34", "remaining_time": "1 day, 8:34:09"} +{"current_steps": 168, "total_steps": 2069, "loss": 0.606, "lr": 9.974658719879163e-06, "epoch": 0.08121827411167512, "percentage": 8.12, "elapsed_time": "2:52:33", "remaining_time": "1 day, 8:32:29"} +{"current_steps": 169, "total_steps": 2069, "loss": 0.6226, "lr": 9.973848557569974e-06, "epoch": 0.08170171621948272, "percentage": 8.17, "elapsed_time": "2:53:33", "remaining_time": "1 day, 8:31:18"} +{"current_steps": 170, "total_steps": 2069, "loss": 0.6144, "lr": 9.973025681703e-06, "epoch": 0.0821851583272903, "percentage": 8.22, "elapsed_time": "2:54:34", "remaining_time": "1 day, 8:30:10"} +{"current_steps": 171, "total_steps": 2069, "loss": 0.6148, "lr": 9.972190094381578e-06, "epoch": 0.0826686004350979, "percentage": 8.26, "elapsed_time": "2:55:35", "remaining_time": "1 day, 8:28:58"} +{"current_steps": 172, "total_steps": 2069, "loss": 0.616, "lr": 9.971341797741538e-06, "epoch": 0.0831520425429055, "percentage": 8.31, "elapsed_time": "2:56:35", "remaining_time": "1 day, 8:27:33"} +{"current_steps": 173, "total_steps": 2069, "loss": 0.6196, "lr": 9.970480793951194e-06, "epoch": 0.08363548465071308, "percentage": 8.36, "elapsed_time": "2:57:36", "remaining_time": "1 day, 8:26:34"} +{"current_steps": 174, "total_steps": 2069, "loss": 0.5902, "lr": 9.96960708521134e-06, "epoch": 0.08411892675852067, "percentage": 8.41, "elapsed_time": "2:58:43", "remaining_time": "1 day, 8:26:26"} +{"current_steps": 175, "total_steps": 2069, "loss": 0.6039, "lr": 9.968720673755246e-06, "epoch": 0.08460236886632826, "percentage": 8.46, "elapsed_time": "2:59:43", "remaining_time": "1 day, 8:25:03"} +{"current_steps": 176, "total_steps": 2069, "loss": 0.6128, "lr": 9.96782156184865e-06, "epoch": 0.08508581097413585, "percentage": 8.51, "elapsed_time": "3:00:43", "remaining_time": "1 day, 8:23:45"} +{"current_steps": 177, "total_steps": 2069, "loss": 0.6201, "lr": 9.966909751789758e-06, "epoch": 0.08556925308194344, "percentage": 8.55, "elapsed_time": "3:01:42", "remaining_time": "1 day, 8:22:23"} +{"current_steps": 178, "total_steps": 2069, "loss": 0.581, "lr": 9.965985245909226e-06, "epoch": 0.08605269518975103, "percentage": 8.6, "elapsed_time": "3:02:42", "remaining_time": "1 day, 8:21:00"} +{"current_steps": 179, "total_steps": 2069, "loss": 0.5748, "lr": 9.96504804657017e-06, "epoch": 0.08653613729755862, "percentage": 8.65, "elapsed_time": "3:03:47", "remaining_time": "1 day, 8:20:35"} +{"current_steps": 180, "total_steps": 2069, "loss": 0.6025, "lr": 9.964098156168143e-06, "epoch": 0.08701957940536621, "percentage": 8.7, "elapsed_time": "3:04:51", "remaining_time": "1 day, 8:19:57"} +{"current_steps": 181, "total_steps": 2069, "loss": 0.6086, "lr": 9.963135577131144e-06, "epoch": 0.0875030215131738, "percentage": 8.75, "elapsed_time": "3:05:51", "remaining_time": "1 day, 8:18:43"} +{"current_steps": 182, "total_steps": 2069, "loss": 0.5759, "lr": 9.962160311919601e-06, "epoch": 0.08798646362098139, "percentage": 8.8, "elapsed_time": "3:06:54", "remaining_time": "1 day, 8:17:56"} +{"current_steps": 183, "total_steps": 2069, "loss": 0.6009, "lr": 9.96117236302637e-06, "epoch": 0.08846990572878898, "percentage": 8.84, "elapsed_time": "3:07:54", "remaining_time": "1 day, 8:16:32"} +{"current_steps": 184, "total_steps": 2069, "loss": 0.5891, "lr": 9.960171732976731e-06, "epoch": 0.08895334783659657, "percentage": 8.89, "elapsed_time": "3:09:00", "remaining_time": "1 day, 8:16:17"} +{"current_steps": 185, "total_steps": 2069, "loss": 0.6126, "lr": 9.959158424328373e-06, "epoch": 0.08943678994440415, "percentage": 8.94, "elapsed_time": "3:10:00", "remaining_time": "1 day, 8:15:02"} +{"current_steps": 186, "total_steps": 2069, "loss": 0.6113, "lr": 9.958132439671392e-06, "epoch": 0.08992023205221175, "percentage": 8.99, "elapsed_time": "3:11:01", "remaining_time": "1 day, 8:13:54"} +{"current_steps": 187, "total_steps": 2069, "loss": 0.5585, "lr": 9.957093781628294e-06, "epoch": 0.09040367416001933, "percentage": 9.04, "elapsed_time": "3:12:05", "remaining_time": "1 day, 8:13:10"} +{"current_steps": 188, "total_steps": 2069, "loss": 0.5829, "lr": 9.956042452853967e-06, "epoch": 0.09088711626782693, "percentage": 9.09, "elapsed_time": "3:13:07", "remaining_time": "1 day, 8:12:14"} +{"current_steps": 189, "total_steps": 2069, "loss": 0.6014, "lr": 9.954978456035695e-06, "epoch": 0.09137055837563451, "percentage": 9.13, "elapsed_time": "3:14:08", "remaining_time": "1 day, 8:11:07"} +{"current_steps": 190, "total_steps": 2069, "loss": 0.6135, "lr": 9.953901793893137e-06, "epoch": 0.09185400048344211, "percentage": 9.18, "elapsed_time": "3:15:08", "remaining_time": "1 day, 8:09:46"} +{"current_steps": 191, "total_steps": 2069, "loss": 0.6126, "lr": 9.95281246917833e-06, "epoch": 0.0923374425912497, "percentage": 9.23, "elapsed_time": "3:16:12", "remaining_time": "1 day, 8:09:09"} +{"current_steps": 192, "total_steps": 2069, "loss": 0.6077, "lr": 9.951710484675677e-06, "epoch": 0.09282088469905729, "percentage": 9.28, "elapsed_time": "3:17:13", "remaining_time": "1 day, 8:08:05"} +{"current_steps": 193, "total_steps": 2069, "loss": 0.6052, "lr": 9.950595843201936e-06, "epoch": 0.09330432680686487, "percentage": 9.33, "elapsed_time": "3:18:16", "remaining_time": "1 day, 8:07:16"} +{"current_steps": 194, "total_steps": 2069, "loss": 0.608, "lr": 9.949468547606222e-06, "epoch": 0.09378776891467247, "percentage": 9.38, "elapsed_time": "3:19:16", "remaining_time": "1 day, 8:06:00"} +{"current_steps": 195, "total_steps": 2069, "loss": 0.5725, "lr": 9.948328600769996e-06, "epoch": 0.09427121102248005, "percentage": 9.42, "elapsed_time": "3:20:19", "remaining_time": "1 day, 8:05:08"} +{"current_steps": 196, "total_steps": 2069, "loss": 0.5981, "lr": 9.94717600560705e-06, "epoch": 0.09475465313028765, "percentage": 9.47, "elapsed_time": "3:21:20", "remaining_time": "1 day, 8:03:59"} +{"current_steps": 197, "total_steps": 2069, "loss": 0.6163, "lr": 9.946010765063512e-06, "epoch": 0.09523809523809523, "percentage": 9.52, "elapsed_time": "3:22:19", "remaining_time": "1 day, 8:02:33"} +{"current_steps": 198, "total_steps": 2069, "loss": 0.6049, "lr": 9.94483288211783e-06, "epoch": 0.09572153734590283, "percentage": 9.57, "elapsed_time": "3:23:21", "remaining_time": "1 day, 8:01:40"} +{"current_steps": 199, "total_steps": 2069, "loss": 0.6034, "lr": 9.943642359780767e-06, "epoch": 0.09620497945371041, "percentage": 9.62, "elapsed_time": "3:24:24", "remaining_time": "1 day, 8:00:45"} +{"current_steps": 200, "total_steps": 2069, "loss": 0.5977, "lr": 9.942439201095398e-06, "epoch": 0.09668842156151801, "percentage": 9.67, "elapsed_time": "3:25:25", "remaining_time": "1 day, 7:59:45"} +{"current_steps": 201, "total_steps": 2069, "loss": 0.6147, "lr": 9.941223409137088e-06, "epoch": 0.09717186366932559, "percentage": 9.71, "elapsed_time": "3:26:24", "remaining_time": "1 day, 7:58:15"} +{"current_steps": 202, "total_steps": 2069, "loss": 0.595, "lr": 9.939994987013505e-06, "epoch": 0.09765530577713319, "percentage": 9.76, "elapsed_time": "3:27:25", "remaining_time": "1 day, 7:57:10"} +{"current_steps": 203, "total_steps": 2069, "loss": 0.5825, "lr": 9.93875393786459e-06, "epoch": 0.09813874788494077, "percentage": 9.81, "elapsed_time": "3:28:27", "remaining_time": "1 day, 7:56:13"} +{"current_steps": 204, "total_steps": 2069, "loss": 0.6106, "lr": 9.937500264862567e-06, "epoch": 0.09862218999274837, "percentage": 9.86, "elapsed_time": "3:29:31", "remaining_time": "1 day, 7:55:33"} +{"current_steps": 205, "total_steps": 2069, "loss": 0.5724, "lr": 9.936233971211926e-06, "epoch": 0.09910563210055595, "percentage": 9.91, "elapsed_time": "3:30:37", "remaining_time": "1 day, 7:55:07"} +{"current_steps": 206, "total_steps": 2069, "loss": 0.5702, "lr": 9.934955060149413e-06, "epoch": 0.09958907420836355, "percentage": 9.96, "elapsed_time": "3:31:41", "remaining_time": "1 day, 7:54:26"} +{"current_steps": 207, "total_steps": 2069, "loss": 0.5976, "lr": 9.933663534944029e-06, "epoch": 0.10007251631617115, "percentage": 10.0, "elapsed_time": "3:32:41", "remaining_time": "1 day, 7:53:07"} +{"current_steps": 208, "total_steps": 2069, "loss": 0.5662, "lr": 9.932359398897018e-06, "epoch": 0.10055595842397873, "percentage": 10.05, "elapsed_time": "3:33:47", "remaining_time": "1 day, 7:52:50"} +{"current_steps": 209, "total_steps": 2069, "loss": 0.5987, "lr": 9.931042655341856e-06, "epoch": 0.10103940053178632, "percentage": 10.1, "elapsed_time": "3:35:54", "remaining_time": "1 day, 8:01:30"} +{"current_steps": 210, "total_steps": 2069, "loss": 0.5956, "lr": 9.929713307644245e-06, "epoch": 0.10152284263959391, "percentage": 10.15, "elapsed_time": "3:36:58", "remaining_time": "1 day, 8:00:46"} +{"current_steps": 211, "total_steps": 2069, "loss": 0.6023, "lr": 9.928371359202103e-06, "epoch": 0.1020062847474015, "percentage": 10.2, "elapsed_time": "3:38:05", "remaining_time": "1 day, 8:00:30"} +{"current_steps": 212, "total_steps": 2069, "loss": 0.5941, "lr": 9.927016813445562e-06, "epoch": 0.10248972685520909, "percentage": 10.25, "elapsed_time": "3:39:06", "remaining_time": "1 day, 7:59:14"} +{"current_steps": 213, "total_steps": 2069, "loss": 0.5977, "lr": 9.925649673836949e-06, "epoch": 0.10297316896301668, "percentage": 10.29, "elapsed_time": "3:40:06", "remaining_time": "1 day, 7:57:53"} +{"current_steps": 214, "total_steps": 2069, "loss": 0.599, "lr": 9.924269943870781e-06, "epoch": 0.10345661107082427, "percentage": 10.34, "elapsed_time": "3:41:08", "remaining_time": "1 day, 7:56:55"} +{"current_steps": 215, "total_steps": 2069, "loss": 0.565, "lr": 9.922877627073763e-06, "epoch": 0.10394005317863186, "percentage": 10.39, "elapsed_time": "3:42:13", "remaining_time": "1 day, 7:56:14"} +{"current_steps": 216, "total_steps": 2069, "loss": 0.6038, "lr": 9.921472727004765e-06, "epoch": 0.10442349528643945, "percentage": 10.44, "elapsed_time": "3:43:12", "remaining_time": "1 day, 7:54:46"} +{"current_steps": 217, "total_steps": 2069, "loss": 0.5951, "lr": 9.920055247254827e-06, "epoch": 0.10490693739424704, "percentage": 10.49, "elapsed_time": "3:44:15", "remaining_time": "1 day, 7:53:55"} +{"current_steps": 218, "total_steps": 2069, "loss": 0.604, "lr": 9.91862519144714e-06, "epoch": 0.10539037950205463, "percentage": 10.54, "elapsed_time": "3:45:16", "remaining_time": "1 day, 7:52:47"} +{"current_steps": 219, "total_steps": 2069, "loss": 0.6006, "lr": 9.917182563237045e-06, "epoch": 0.10587382160986222, "percentage": 10.58, "elapsed_time": "3:46:24", "remaining_time": "1 day, 7:52:37"} +{"current_steps": 220, "total_steps": 2069, "loss": 0.591, "lr": 9.915727366312012e-06, "epoch": 0.1063572637176698, "percentage": 10.63, "elapsed_time": "3:47:29", "remaining_time": "1 day, 7:51:55"} +{"current_steps": 221, "total_steps": 2069, "loss": 0.5818, "lr": 9.914259604391642e-06, "epoch": 0.1068407058254774, "percentage": 10.68, "elapsed_time": "3:48:35", "remaining_time": "1 day, 7:51:26"} +{"current_steps": 222, "total_steps": 2069, "loss": 0.5991, "lr": 9.912779281227656e-06, "epoch": 0.10732414793328499, "percentage": 10.73, "elapsed_time": "3:49:39", "remaining_time": "1 day, 7:50:40"} +{"current_steps": 223, "total_steps": 2069, "loss": 0.5783, "lr": 9.911286400603878e-06, "epoch": 0.10780759004109258, "percentage": 10.78, "elapsed_time": "3:50:40", "remaining_time": "1 day, 7:49:29"} +{"current_steps": 224, "total_steps": 2069, "loss": 0.6007, "lr": 9.90978096633623e-06, "epoch": 0.10829103214890017, "percentage": 10.83, "elapsed_time": "3:51:46", "remaining_time": "1 day, 7:48:59"} +{"current_steps": 225, "total_steps": 2069, "loss": 0.5865, "lr": 9.908262982272724e-06, "epoch": 0.10877447425670776, "percentage": 10.87, "elapsed_time": "3:52:47", "remaining_time": "1 day, 7:47:53"} +{"current_steps": 226, "total_steps": 2069, "loss": 0.5635, "lr": 9.906732452293448e-06, "epoch": 0.10925791636451535, "percentage": 10.92, "elapsed_time": "3:53:46", "remaining_time": "1 day, 7:46:21"} +{"current_steps": 227, "total_steps": 2069, "loss": 0.5982, "lr": 9.905189380310564e-06, "epoch": 0.10974135847232294, "percentage": 10.97, "elapsed_time": "3:54:47", "remaining_time": "1 day, 7:45:11"} +{"current_steps": 228, "total_steps": 2069, "loss": 0.5734, "lr": 9.903633770268286e-06, "epoch": 0.11022480058013052, "percentage": 11.02, "elapsed_time": "3:55:51", "remaining_time": "1 day, 7:44:29"} +{"current_steps": 229, "total_steps": 2069, "loss": 0.6021, "lr": 9.902065626142876e-06, "epoch": 0.11070824268793812, "percentage": 11.07, "elapsed_time": "3:56:55", "remaining_time": "1 day, 7:43:37"} +{"current_steps": 230, "total_steps": 2069, "loss": 0.5847, "lr": 9.900484951942642e-06, "epoch": 0.1111916847957457, "percentage": 11.12, "elapsed_time": "3:57:57", "remaining_time": "1 day, 7:42:40"} +{"current_steps": 231, "total_steps": 2069, "loss": 0.5946, "lr": 9.89889175170791e-06, "epoch": 0.1116751269035533, "percentage": 11.16, "elapsed_time": "3:59:03", "remaining_time": "1 day, 7:42:05"} +{"current_steps": 232, "total_steps": 2069, "loss": 0.5941, "lr": 9.89728602951103e-06, "epoch": 0.11215856901136088, "percentage": 11.21, "elapsed_time": "4:00:08", "remaining_time": "1 day, 7:41:26"} +{"current_steps": 233, "total_steps": 2069, "loss": 0.5965, "lr": 9.89566778945636e-06, "epoch": 0.11264201111916848, "percentage": 11.26, "elapsed_time": "4:01:12", "remaining_time": "1 day, 7:40:41"} +{"current_steps": 234, "total_steps": 2069, "loss": 0.6076, "lr": 9.894037035680246e-06, "epoch": 0.11312545322697606, "percentage": 11.31, "elapsed_time": "4:02:15", "remaining_time": "1 day, 7:39:44"} +{"current_steps": 235, "total_steps": 2069, "loss": 0.5749, "lr": 9.892393772351033e-06, "epoch": 0.11360889533478366, "percentage": 11.36, "elapsed_time": "4:03:17", "remaining_time": "1 day, 7:38:40"} +{"current_steps": 236, "total_steps": 2069, "loss": 0.5882, "lr": 9.890738003669029e-06, "epoch": 0.11409233744259124, "percentage": 11.41, "elapsed_time": "4:04:19", "remaining_time": "1 day, 7:37:37"} +{"current_steps": 237, "total_steps": 2069, "loss": 0.5978, "lr": 9.889069733866515e-06, "epoch": 0.11457577955039884, "percentage": 11.45, "elapsed_time": "4:05:25", "remaining_time": "1 day, 7:37:10"} +{"current_steps": 238, "total_steps": 2069, "loss": 0.6, "lr": 9.887388967207722e-06, "epoch": 0.11505922165820642, "percentage": 11.5, "elapsed_time": "4:06:29", "remaining_time": "1 day, 7:36:21"} +{"current_steps": 239, "total_steps": 2069, "loss": 0.5977, "lr": 9.885695707988825e-06, "epoch": 0.11554266376601402, "percentage": 11.55, "elapsed_time": "4:07:28", "remaining_time": "1 day, 7:34:56"} +{"current_steps": 240, "total_steps": 2069, "loss": 0.6044, "lr": 9.883989960537934e-06, "epoch": 0.11602610587382162, "percentage": 11.6, "elapsed_time": "4:08:29", "remaining_time": "1 day, 7:33:44"} +{"current_steps": 241, "total_steps": 2069, "loss": 0.5849, "lr": 9.882271729215071e-06, "epoch": 0.1165095479816292, "percentage": 11.65, "elapsed_time": "4:09:30", "remaining_time": "1 day, 7:32:33"} +{"current_steps": 242, "total_steps": 2069, "loss": 0.5986, "lr": 9.880541018412179e-06, "epoch": 0.1169929900894368, "percentage": 11.7, "elapsed_time": "4:10:31", "remaining_time": "1 day, 7:31:22"} +{"current_steps": 243, "total_steps": 2069, "loss": 0.5646, "lr": 9.878797832553093e-06, "epoch": 0.11747643219724438, "percentage": 11.74, "elapsed_time": "4:11:28", "remaining_time": "1 day, 7:29:38"} +{"current_steps": 244, "total_steps": 2069, "loss": 0.5998, "lr": 9.877042176093537e-06, "epoch": 0.11795987430505198, "percentage": 11.79, "elapsed_time": "4:12:30", "remaining_time": "1 day, 7:28:39"} +{"current_steps": 245, "total_steps": 2069, "loss": 0.5846, "lr": 9.875274053521107e-06, "epoch": 0.11844331641285956, "percentage": 11.84, "elapsed_time": "4:13:35", "remaining_time": "1 day, 7:27:57"} +{"current_steps": 246, "total_steps": 2069, "loss": 0.5912, "lr": 9.873493469355271e-06, "epoch": 0.11892675852066716, "percentage": 11.89, "elapsed_time": "4:14:35", "remaining_time": "1 day, 7:26:40"} +{"current_steps": 247, "total_steps": 2069, "loss": 0.5836, "lr": 9.871700428147342e-06, "epoch": 0.11941020062847474, "percentage": 11.94, "elapsed_time": "4:15:39", "remaining_time": "1 day, 7:25:52"} +{"current_steps": 248, "total_steps": 2069, "loss": 0.5898, "lr": 9.86989493448048e-06, "epoch": 0.11989364273628234, "percentage": 11.99, "elapsed_time": "4:16:44", "remaining_time": "1 day, 7:25:14"} +{"current_steps": 249, "total_steps": 2069, "loss": 0.5933, "lr": 9.868076992969672e-06, "epoch": 0.12037708484408992, "percentage": 12.03, "elapsed_time": "4:17:46", "remaining_time": "1 day, 7:24:06"} +{"current_steps": 250, "total_steps": 2069, "loss": 0.5855, "lr": 9.866246608261725e-06, "epoch": 0.12086052695189752, "percentage": 12.08, "elapsed_time": "4:18:48", "remaining_time": "1 day, 7:23:05"} +{"current_steps": 251, "total_steps": 2069, "loss": 0.5989, "lr": 9.864403785035246e-06, "epoch": 0.1213439690597051, "percentage": 12.13, "elapsed_time": "4:19:53", "remaining_time": "1 day, 7:22:21"} +{"current_steps": 252, "total_steps": 2069, "loss": 0.5722, "lr": 9.862548528000644e-06, "epoch": 0.1218274111675127, "percentage": 12.18, "elapsed_time": "4:20:56", "remaining_time": "1 day, 7:21:28"} +{"current_steps": 253, "total_steps": 2069, "loss": 0.5879, "lr": 9.860680841900101e-06, "epoch": 0.12231085327532028, "percentage": 12.23, "elapsed_time": "4:21:56", "remaining_time": "1 day, 7:20:14"} +{"current_steps": 254, "total_steps": 2069, "loss": 0.5999, "lr": 9.858800731507575e-06, "epoch": 0.12279429538312787, "percentage": 12.28, "elapsed_time": "4:22:56", "remaining_time": "1 day, 7:18:55"} +{"current_steps": 255, "total_steps": 2069, "loss": 0.586, "lr": 9.85690820162878e-06, "epoch": 0.12327773749093546, "percentage": 12.32, "elapsed_time": "4:24:03", "remaining_time": "1 day, 7:18:26"} +{"current_steps": 256, "total_steps": 2069, "loss": 0.6011, "lr": 9.855003257101177e-06, "epoch": 0.12376117959874305, "percentage": 12.37, "elapsed_time": "4:25:00", "remaining_time": "1 day, 7:16:49"} +{"current_steps": 257, "total_steps": 2069, "loss": 0.5894, "lr": 9.853085902793952e-06, "epoch": 0.12424462170655064, "percentage": 12.42, "elapsed_time": "4:26:02", "remaining_time": "1 day, 7:15:46"} +{"current_steps": 258, "total_steps": 2069, "loss": 0.5897, "lr": 9.851156143608025e-06, "epoch": 0.12472806381435823, "percentage": 12.47, "elapsed_time": "4:27:01", "remaining_time": "1 day, 7:14:17"} +{"current_steps": 259, "total_steps": 2069, "loss": 0.59, "lr": 9.84921398447601e-06, "epoch": 0.12521150592216582, "percentage": 12.52, "elapsed_time": "4:28:02", "remaining_time": "1 day, 7:13:14"} +{"current_steps": 260, "total_steps": 2069, "loss": 0.5642, "lr": 9.847259430362222e-06, "epoch": 0.1256949480299734, "percentage": 12.57, "elapsed_time": "4:29:06", "remaining_time": "1 day, 7:12:20"} +{"current_steps": 261, "total_steps": 2069, "loss": 0.6016, "lr": 9.845292486262664e-06, "epoch": 0.126178390137781, "percentage": 12.61, "elapsed_time": "4:30:05", "remaining_time": "1 day, 7:11:01"} +{"current_steps": 262, "total_steps": 2069, "loss": 0.5807, "lr": 9.843313157204999e-06, "epoch": 0.12666183224558858, "percentage": 12.66, "elapsed_time": "4:31:05", "remaining_time": "1 day, 7:09:44"} +{"current_steps": 263, "total_steps": 2069, "loss": 0.5858, "lr": 9.841321448248552e-06, "epoch": 0.12714527435339618, "percentage": 12.71, "elapsed_time": "4:32:05", "remaining_time": "1 day, 7:08:25"} +{"current_steps": 264, "total_steps": 2069, "loss": 0.5847, "lr": 9.839317364484295e-06, "epoch": 0.12762871646120377, "percentage": 12.76, "elapsed_time": "4:33:03", "remaining_time": "1 day, 7:06:53"} +{"current_steps": 265, "total_steps": 2069, "loss": 0.5888, "lr": 9.837300911034824e-06, "epoch": 0.12811215856901137, "percentage": 12.81, "elapsed_time": "4:34:07", "remaining_time": "1 day, 7:06:09"} +{"current_steps": 266, "total_steps": 2069, "loss": 0.5928, "lr": 9.83527209305436e-06, "epoch": 0.12859560067681894, "percentage": 12.86, "elapsed_time": "4:35:06", "remaining_time": "1 day, 7:04:46"} +{"current_steps": 267, "total_steps": 2069, "loss": 0.5872, "lr": 9.83323091572872e-06, "epoch": 0.12907904278462654, "percentage": 12.9, "elapsed_time": "4:36:10", "remaining_time": "1 day, 7:03:58"} +{"current_steps": 268, "total_steps": 2069, "loss": 0.5805, "lr": 9.831177384275323e-06, "epoch": 0.12956248489243413, "percentage": 12.95, "elapsed_time": "4:37:12", "remaining_time": "1 day, 7:02:50"} +{"current_steps": 269, "total_steps": 2069, "loss": 0.5837, "lr": 9.829111503943159e-06, "epoch": 0.13004592700024173, "percentage": 13.0, "elapsed_time": "4:38:11", "remaining_time": "1 day, 7:01:33"} +{"current_steps": 270, "total_steps": 2069, "loss": 0.5539, "lr": 9.827033280012783e-06, "epoch": 0.1305293691080493, "percentage": 13.05, "elapsed_time": "4:39:16", "remaining_time": "1 day, 7:00:48"} +{"current_steps": 271, "total_steps": 2069, "loss": 0.5881, "lr": 9.824942717796304e-06, "epoch": 0.1310128112158569, "percentage": 13.1, "elapsed_time": "4:40:21", "remaining_time": "1 day, 7:00:05"} +{"current_steps": 272, "total_steps": 2069, "loss": 0.6032, "lr": 9.822839822637369e-06, "epoch": 0.1314962533236645, "percentage": 13.15, "elapsed_time": "4:41:24", "remaining_time": "1 day, 6:59:11"} +{"current_steps": 273, "total_steps": 2069, "loss": 0.5842, "lr": 9.820724599911147e-06, "epoch": 0.1319796954314721, "percentage": 13.19, "elapsed_time": "4:42:28", "remaining_time": "1 day, 6:58:20"} +{"current_steps": 274, "total_steps": 2069, "loss": 0.585, "lr": 9.818597055024315e-06, "epoch": 0.13246313753927969, "percentage": 13.24, "elapsed_time": "4:43:23", "remaining_time": "1 day, 6:56:29"} +{"current_steps": 275, "total_steps": 2069, "loss": 0.5779, "lr": 9.816457193415055e-06, "epoch": 0.13294657964708725, "percentage": 13.29, "elapsed_time": "4:44:24", "remaining_time": "1 day, 6:55:25"} +{"current_steps": 276, "total_steps": 2069, "loss": 0.5798, "lr": 9.81430502055302e-06, "epoch": 0.13343002175489485, "percentage": 13.34, "elapsed_time": "4:45:24", "remaining_time": "1 day, 6:54:06"} +{"current_steps": 277, "total_steps": 2069, "loss": 0.5836, "lr": 9.812140541939338e-06, "epoch": 0.13391346386270245, "percentage": 13.39, "elapsed_time": "4:46:28", "remaining_time": "1 day, 6:53:19"} +{"current_steps": 278, "total_steps": 2069, "loss": 0.5733, "lr": 9.809963763106593e-06, "epoch": 0.13439690597051004, "percentage": 13.44, "elapsed_time": "4:47:29", "remaining_time": "1 day, 6:52:11"} +{"current_steps": 279, "total_steps": 2069, "loss": 0.58, "lr": 9.807774689618806e-06, "epoch": 0.1348803480783176, "percentage": 13.48, "elapsed_time": "4:48:32", "remaining_time": "1 day, 6:51:15"} +{"current_steps": 280, "total_steps": 2069, "loss": 0.5911, "lr": 9.805573327071428e-06, "epoch": 0.1353637901861252, "percentage": 13.53, "elapsed_time": "4:49:33", "remaining_time": "1 day, 6:50:07"} +{"current_steps": 281, "total_steps": 2069, "loss": 0.5737, "lr": 9.803359681091313e-06, "epoch": 0.1358472322939328, "percentage": 13.58, "elapsed_time": "4:50:33", "remaining_time": "1 day, 6:48:51"} +{"current_steps": 282, "total_steps": 2069, "loss": 0.593, "lr": 9.801133757336726e-06, "epoch": 0.1363306744017404, "percentage": 13.63, "elapsed_time": "4:51:34", "remaining_time": "1 day, 6:47:37"} +{"current_steps": 283, "total_steps": 2069, "loss": 0.5818, "lr": 9.798895561497299e-06, "epoch": 0.13681411650954797, "percentage": 13.68, "elapsed_time": "4:52:35", "remaining_time": "1 day, 6:46:32"} +{"current_steps": 284, "total_steps": 2069, "loss": 0.6024, "lr": 9.796645099294049e-06, "epoch": 0.13729755861735557, "percentage": 13.73, "elapsed_time": "4:53:36", "remaining_time": "1 day, 6:45:22"} +{"current_steps": 285, "total_steps": 2069, "loss": 0.5837, "lr": 9.794382376479334e-06, "epoch": 0.13778100072516317, "percentage": 13.77, "elapsed_time": "4:54:38", "remaining_time": "1 day, 6:44:23"} +{"current_steps": 286, "total_steps": 2069, "loss": 0.5781, "lr": 9.792107398836859e-06, "epoch": 0.13826444283297076, "percentage": 13.82, "elapsed_time": "4:55:39", "remaining_time": "1 day, 6:43:11"} +{"current_steps": 287, "total_steps": 2069, "loss": 0.5821, "lr": 9.789820172181648e-06, "epoch": 0.13874788494077833, "percentage": 13.87, "elapsed_time": "4:56:41", "remaining_time": "1 day, 6:42:09"} +{"current_steps": 288, "total_steps": 2069, "loss": 1.0972, "lr": 9.787520702360035e-06, "epoch": 0.13923132704858593, "percentage": 13.92, "elapsed_time": "4:57:42", "remaining_time": "1 day, 6:41:03"} +{"current_steps": 289, "total_steps": 2069, "loss": 0.5803, "lr": 9.785208995249655e-06, "epoch": 0.13971476915639353, "percentage": 13.97, "elapsed_time": "4:58:41", "remaining_time": "1 day, 6:39:42"} +{"current_steps": 290, "total_steps": 2069, "loss": 0.563, "lr": 9.782885056759413e-06, "epoch": 0.14019821126420112, "percentage": 14.02, "elapsed_time": "4:59:46", "remaining_time": "1 day, 6:38:58"} +{"current_steps": 291, "total_steps": 2069, "loss": 0.5872, "lr": 9.780548892829486e-06, "epoch": 0.1406816533720087, "percentage": 14.06, "elapsed_time": "5:00:51", "remaining_time": "1 day, 6:38:16"} +{"current_steps": 292, "total_steps": 2069, "loss": 0.5782, "lr": 9.778200509431297e-06, "epoch": 0.1411650954798163, "percentage": 14.11, "elapsed_time": "5:01:56", "remaining_time": "1 day, 6:37:30"} +{"current_steps": 293, "total_steps": 2069, "loss": 0.5804, "lr": 9.775839912567502e-06, "epoch": 0.14164853758762389, "percentage": 14.16, "elapsed_time": "5:02:58", "remaining_time": "1 day, 6:36:27"} +{"current_steps": 294, "total_steps": 2069, "loss": 0.5831, "lr": 9.773467108271978e-06, "epoch": 0.14213197969543148, "percentage": 14.21, "elapsed_time": "5:03:58", "remaining_time": "1 day, 6:35:16"} +{"current_steps": 295, "total_steps": 2069, "loss": 0.5597, "lr": 9.771082102609803e-06, "epoch": 0.14261542180323905, "percentage": 14.26, "elapsed_time": "5:05:03", "remaining_time": "1 day, 6:34:31"} +{"current_steps": 296, "total_steps": 2069, "loss": 0.5779, "lr": 9.768684901677245e-06, "epoch": 0.14309886391104665, "percentage": 14.31, "elapsed_time": "5:06:04", "remaining_time": "1 day, 6:33:23"} +{"current_steps": 297, "total_steps": 2069, "loss": 0.5849, "lr": 9.766275511601742e-06, "epoch": 0.14358230601885424, "percentage": 14.35, "elapsed_time": "5:07:05", "remaining_time": "1 day, 6:32:14"} +{"current_steps": 298, "total_steps": 2069, "loss": 0.5915, "lr": 9.763853938541887e-06, "epoch": 0.14406574812666184, "percentage": 14.4, "elapsed_time": "5:08:07", "remaining_time": "1 day, 6:31:08"} +{"current_steps": 299, "total_steps": 2069, "loss": 0.5816, "lr": 9.76142018868742e-06, "epoch": 0.1445491902344694, "percentage": 14.45, "elapsed_time": "5:09:09", "remaining_time": "1 day, 6:30:10"} +{"current_steps": 300, "total_steps": 2069, "loss": 0.5578, "lr": 9.7589742682592e-06, "epoch": 0.145032632342277, "percentage": 14.5, "elapsed_time": "5:10:11", "remaining_time": "1 day, 6:29:07"} +{"current_steps": 301, "total_steps": 2069, "loss": 0.5833, "lr": 9.756516183509198e-06, "epoch": 0.1455160744500846, "percentage": 14.55, "elapsed_time": "5:11:11", "remaining_time": "1 day, 6:27:53"} +{"current_steps": 302, "total_steps": 2069, "loss": 0.581, "lr": 9.754045940720471e-06, "epoch": 0.1459995165578922, "percentage": 14.6, "elapsed_time": "5:12:13", "remaining_time": "1 day, 6:26:51"} +{"current_steps": 303, "total_steps": 2069, "loss": 0.5879, "lr": 9.751563546207167e-06, "epoch": 0.14648295866569977, "percentage": 14.64, "elapsed_time": "5:13:15", "remaining_time": "1 day, 6:25:46"} +{"current_steps": 304, "total_steps": 2069, "loss": 0.557, "lr": 9.749069006314481e-06, "epoch": 0.14696640077350737, "percentage": 14.69, "elapsed_time": "5:14:21", "remaining_time": "1 day, 6:25:06"} +{"current_steps": 305, "total_steps": 2069, "loss": 0.5236, "lr": 9.74656232741866e-06, "epoch": 0.14744984288131496, "percentage": 14.74, "elapsed_time": "5:15:26", "remaining_time": "1 day, 6:24:21"} +{"current_steps": 306, "total_steps": 2069, "loss": 0.5827, "lr": 9.744043515926975e-06, "epoch": 0.14793328498912256, "percentage": 14.79, "elapsed_time": "5:16:23", "remaining_time": "1 day, 6:22:51"} +{"current_steps": 307, "total_steps": 2069, "loss": 0.5741, "lr": 9.741512578277715e-06, "epoch": 0.14841672709693013, "percentage": 14.84, "elapsed_time": "5:17:26", "remaining_time": "1 day, 6:21:55"} +{"current_steps": 308, "total_steps": 2069, "loss": 0.587, "lr": 9.738969520940158e-06, "epoch": 0.14890016920473773, "percentage": 14.89, "elapsed_time": "5:18:28", "remaining_time": "1 day, 6:20:56"} +{"current_steps": 309, "total_steps": 2069, "loss": 0.5836, "lr": 9.736414350414564e-06, "epoch": 0.14938361131254532, "percentage": 14.93, "elapsed_time": "5:19:30", "remaining_time": "1 day, 6:19:52"} +{"current_steps": 310, "total_steps": 2069, "loss": 0.583, "lr": 9.733847073232156e-06, "epoch": 0.14986705342035292, "percentage": 14.98, "elapsed_time": "5:20:32", "remaining_time": "1 day, 6:18:51"} +{"current_steps": 311, "total_steps": 2069, "loss": 0.5433, "lr": 9.7312676959551e-06, "epoch": 0.15035049552816052, "percentage": 15.03, "elapsed_time": "5:21:36", "remaining_time": "1 day, 6:17:56"} +{"current_steps": 312, "total_steps": 2069, "loss": 0.5859, "lr": 9.72867622517649e-06, "epoch": 0.15083393763596809, "percentage": 15.08, "elapsed_time": "5:22:37", "remaining_time": "1 day, 6:16:51"} +{"current_steps": 313, "total_steps": 2069, "loss": 0.5759, "lr": 9.726072667520338e-06, "epoch": 0.15131737974377568, "percentage": 15.13, "elapsed_time": "5:23:43", "remaining_time": "1 day, 6:16:08"} +{"current_steps": 314, "total_steps": 2069, "loss": 0.5883, "lr": 9.723457029641547e-06, "epoch": 0.15180082185158328, "percentage": 15.18, "elapsed_time": "5:24:45", "remaining_time": "1 day, 6:15:08"} +{"current_steps": 315, "total_steps": 2069, "loss": 0.5723, "lr": 9.720829318225897e-06, "epoch": 0.15228426395939088, "percentage": 15.22, "elapsed_time": "5:25:49", "remaining_time": "1 day, 6:14:15"} +{"current_steps": 316, "total_steps": 2069, "loss": 0.5748, "lr": 9.718189539990029e-06, "epoch": 0.15276770606719844, "percentage": 15.27, "elapsed_time": "5:26:49", "remaining_time": "1 day, 6:13:02"} +{"current_steps": 317, "total_steps": 2069, "loss": 0.5831, "lr": 9.715537701681431e-06, "epoch": 0.15325114817500604, "percentage": 15.32, "elapsed_time": "5:27:52", "remaining_time": "1 day, 6:12:06"} +{"current_steps": 318, "total_steps": 2069, "loss": 0.5505, "lr": 9.712873810078415e-06, "epoch": 0.15373459028281364, "percentage": 15.37, "elapsed_time": "5:28:58", "remaining_time": "1 day, 6:11:24"} +{"current_steps": 319, "total_steps": 2069, "loss": 0.5789, "lr": 9.710197871990101e-06, "epoch": 0.15421803239062123, "percentage": 15.42, "elapsed_time": "5:29:54", "remaining_time": "1 day, 6:09:50"} +{"current_steps": 320, "total_steps": 2069, "loss": 0.5699, "lr": 9.707509894256406e-06, "epoch": 0.1547014744984288, "percentage": 15.47, "elapsed_time": "5:30:53", "remaining_time": "1 day, 6:08:30"} +{"current_steps": 321, "total_steps": 2069, "loss": 0.5841, "lr": 9.704809883748012e-06, "epoch": 0.1551849166062364, "percentage": 15.51, "elapsed_time": "5:31:56", "remaining_time": "1 day, 6:07:37"} +{"current_steps": 322, "total_steps": 2069, "loss": 0.5791, "lr": 9.70209784736637e-06, "epoch": 0.155668358714044, "percentage": 15.56, "elapsed_time": "5:33:01", "remaining_time": "1 day, 6:06:50"} +{"current_steps": 323, "total_steps": 2069, "loss": 0.5789, "lr": 9.699373792043658e-06, "epoch": 0.1561518008218516, "percentage": 15.61, "elapsed_time": "5:34:02", "remaining_time": "1 day, 6:05:40"} +{"current_steps": 324, "total_steps": 2069, "loss": 0.5791, "lr": 9.696637724742785e-06, "epoch": 0.15663524292965916, "percentage": 15.66, "elapsed_time": "5:35:02", "remaining_time": "1 day, 6:04:25"} +{"current_steps": 325, "total_steps": 2069, "loss": 0.5664, "lr": 9.693889652457359e-06, "epoch": 0.15711868503746676, "percentage": 15.71, "elapsed_time": "5:36:03", "remaining_time": "1 day, 6:03:21"} +{"current_steps": 326, "total_steps": 2069, "loss": 0.5777, "lr": 9.691129582211671e-06, "epoch": 0.15760212714527436, "percentage": 15.76, "elapsed_time": "5:37:04", "remaining_time": "1 day, 6:02:12"} +{"current_steps": 327, "total_steps": 2069, "loss": 0.5843, "lr": 9.688357521060685e-06, "epoch": 0.15808556925308195, "percentage": 15.8, "elapsed_time": "5:38:07", "remaining_time": "1 day, 6:01:15"} +{"current_steps": 328, "total_steps": 2069, "loss": 0.578, "lr": 9.685573476090015e-06, "epoch": 0.15856901136088952, "percentage": 15.85, "elapsed_time": "5:39:13", "remaining_time": "1 day, 6:00:35"} +{"current_steps": 329, "total_steps": 2069, "loss": 0.5859, "lr": 9.6827774544159e-06, "epoch": 0.15905245346869712, "percentage": 15.9, "elapsed_time": "5:40:13", "remaining_time": "1 day, 5:59:22"} +{"current_steps": 330, "total_steps": 2069, "loss": 0.5871, "lr": 9.6799694631852e-06, "epoch": 0.15953589557650472, "percentage": 15.95, "elapsed_time": "5:41:11", "remaining_time": "1 day, 5:57:58"} +{"current_steps": 331, "total_steps": 2069, "loss": 0.5841, "lr": 9.677149509575365e-06, "epoch": 0.1600193376843123, "percentage": 16.0, "elapsed_time": "5:42:12", "remaining_time": "1 day, 5:56:53"} +{"current_steps": 332, "total_steps": 2069, "loss": 0.5762, "lr": 9.674317600794426e-06, "epoch": 0.16050277979211988, "percentage": 16.05, "elapsed_time": "5:43:15", "remaining_time": "1 day, 5:55:53"} +{"current_steps": 333, "total_steps": 2069, "loss": 0.5685, "lr": 9.67147374408097e-06, "epoch": 0.16098622189992748, "percentage": 16.09, "elapsed_time": "5:44:10", "remaining_time": "1 day, 5:54:14"} +{"current_steps": 334, "total_steps": 2069, "loss": 0.5856, "lr": 9.66861794670412e-06, "epoch": 0.16146966400773508, "percentage": 16.14, "elapsed_time": "5:45:13", "remaining_time": "1 day, 5:53:18"} +{"current_steps": 335, "total_steps": 2069, "loss": 0.5789, "lr": 9.665750215963528e-06, "epoch": 0.16195310611554267, "percentage": 16.19, "elapsed_time": "5:46:18", "remaining_time": "1 day, 5:52:31"} +{"current_steps": 336, "total_steps": 2069, "loss": 0.5702, "lr": 9.662870559189344e-06, "epoch": 0.16243654822335024, "percentage": 16.24, "elapsed_time": "5:47:19", "remaining_time": "1 day, 5:51:25"} +{"current_steps": 337, "total_steps": 2069, "loss": 0.5742, "lr": 9.6599789837422e-06, "epoch": 0.16291999033115784, "percentage": 16.29, "elapsed_time": "5:48:21", "remaining_time": "1 day, 5:50:25"} +{"current_steps": 338, "total_steps": 2069, "loss": 0.5752, "lr": 9.657075497013202e-06, "epoch": 0.16340343243896543, "percentage": 16.34, "elapsed_time": "5:49:24", "remaining_time": "1 day, 5:49:23"} +{"current_steps": 339, "total_steps": 2069, "loss": 0.5854, "lr": 9.654160106423891e-06, "epoch": 0.16388687454677303, "percentage": 16.38, "elapsed_time": "5:50:23", "remaining_time": "1 day, 5:48:08"} +{"current_steps": 340, "total_steps": 2069, "loss": 0.5764, "lr": 9.651232819426242e-06, "epoch": 0.1643703166545806, "percentage": 16.43, "elapsed_time": "5:51:23", "remaining_time": "1 day, 5:46:54"} +{"current_steps": 341, "total_steps": 2069, "loss": 0.5619, "lr": 9.648293643502636e-06, "epoch": 0.1648537587623882, "percentage": 16.48, "elapsed_time": "5:52:26", "remaining_time": "1 day, 5:45:56"} +{"current_steps": 342, "total_steps": 2069, "loss": 0.5833, "lr": 9.645342586165845e-06, "epoch": 0.1653372008701958, "percentage": 16.53, "elapsed_time": "5:53:30", "remaining_time": "1 day, 5:45:04"} +{"current_steps": 343, "total_steps": 2069, "loss": 0.5381, "lr": 9.642379654959006e-06, "epoch": 0.1658206429780034, "percentage": 16.58, "elapsed_time": "5:54:35", "remaining_time": "1 day, 5:44:19"} +{"current_steps": 344, "total_steps": 2069, "loss": 0.5674, "lr": 9.639404857455614e-06, "epoch": 0.166304085085811, "percentage": 16.63, "elapsed_time": "5:55:36", "remaining_time": "1 day, 5:43:12"} +{"current_steps": 345, "total_steps": 2069, "loss": 0.5705, "lr": 9.63641820125949e-06, "epoch": 0.16678752719361856, "percentage": 16.67, "elapsed_time": "5:56:36", "remaining_time": "1 day, 5:41:59"} +{"current_steps": 346, "total_steps": 2069, "loss": 0.555, "lr": 9.633419694004767e-06, "epoch": 0.16727096930142615, "percentage": 16.72, "elapsed_time": "5:57:41", "remaining_time": "1 day, 5:41:12"} +{"current_steps": 347, "total_steps": 2069, "loss": 0.5741, "lr": 9.63040934335587e-06, "epoch": 0.16775441140923375, "percentage": 16.77, "elapsed_time": "5:58:42", "remaining_time": "1 day, 5:40:07"} +{"current_steps": 348, "total_steps": 2069, "loss": 0.5775, "lr": 9.627387157007502e-06, "epoch": 0.16823785351704135, "percentage": 16.82, "elapsed_time": "5:59:45", "remaining_time": "1 day, 5:39:10"} +{"current_steps": 349, "total_steps": 2069, "loss": 0.5724, "lr": 9.624353142684611e-06, "epoch": 0.16872129562484892, "percentage": 16.87, "elapsed_time": "6:00:45", "remaining_time": "1 day, 5:37:58"} +{"current_steps": 350, "total_steps": 2069, "loss": 0.5794, "lr": 9.621307308142385e-06, "epoch": 0.1692047377326565, "percentage": 16.92, "elapsed_time": "6:01:50", "remaining_time": "1 day, 5:37:10"} +{"current_steps": 351, "total_steps": 2069, "loss": 0.5764, "lr": 9.618249661166218e-06, "epoch": 0.1696881798404641, "percentage": 16.96, "elapsed_time": "6:02:52", "remaining_time": "1 day, 5:36:08"} +{"current_steps": 352, "total_steps": 2069, "loss": 0.5804, "lr": 9.615180209571709e-06, "epoch": 0.1701716219482717, "percentage": 17.01, "elapsed_time": "6:03:55", "remaining_time": "1 day, 5:35:11"} +{"current_steps": 353, "total_steps": 2069, "loss": 0.5581, "lr": 9.612098961204617e-06, "epoch": 0.17065506405607928, "percentage": 17.06, "elapsed_time": "6:04:59", "remaining_time": "1 day, 5:34:19"} +{"current_steps": 354, "total_steps": 2069, "loss": 0.5618, "lr": 9.609005923940865e-06, "epoch": 0.17113850616388687, "percentage": 17.11, "elapsed_time": "6:06:00", "remaining_time": "1 day, 5:33:09"} +{"current_steps": 355, "total_steps": 2069, "loss": 0.5694, "lr": 9.605901105686503e-06, "epoch": 0.17162194827169447, "percentage": 17.16, "elapsed_time": "6:07:05", "remaining_time": "1 day, 5:32:21"} +{"current_steps": 356, "total_steps": 2069, "loss": 0.5897, "lr": 9.602784514377701e-06, "epoch": 0.17210539037950207, "percentage": 17.21, "elapsed_time": "6:08:09", "remaining_time": "1 day, 5:31:31"} +{"current_steps": 357, "total_steps": 2069, "loss": 0.5724, "lr": 9.599656157980715e-06, "epoch": 0.17258883248730963, "percentage": 17.25, "elapsed_time": "6:09:08", "remaining_time": "1 day, 5:30:12"} +{"current_steps": 358, "total_steps": 2069, "loss": 0.577, "lr": 9.596516044491873e-06, "epoch": 0.17307227459511723, "percentage": 17.3, "elapsed_time": "6:10:08", "remaining_time": "1 day, 5:29:02"} +{"current_steps": 359, "total_steps": 2069, "loss": 0.5834, "lr": 9.593364181937563e-06, "epoch": 0.17355571670292483, "percentage": 17.35, "elapsed_time": "6:11:11", "remaining_time": "1 day, 5:28:03"} +{"current_steps": 360, "total_steps": 2069, "loss": 0.5848, "lr": 9.590200578374198e-06, "epoch": 0.17403915881073242, "percentage": 17.4, "elapsed_time": "6:12:12", "remaining_time": "1 day, 5:26:55"} +{"current_steps": 361, "total_steps": 2069, "loss": 0.5629, "lr": 9.587025241888202e-06, "epoch": 0.17452260091854, "percentage": 17.45, "elapsed_time": "6:13:14", "remaining_time": "1 day, 5:25:57"} +{"current_steps": 362, "total_steps": 2069, "loss": 0.5619, "lr": 9.583838180595993e-06, "epoch": 0.1750060430263476, "percentage": 17.5, "elapsed_time": "6:14:18", "remaining_time": "1 day, 5:25:02"} +{"current_steps": 363, "total_steps": 2069, "loss": 0.5788, "lr": 9.580639402643957e-06, "epoch": 0.1754894851341552, "percentage": 17.54, "elapsed_time": "6:15:16", "remaining_time": "1 day, 5:23:41"} +{"current_steps": 364, "total_steps": 2069, "loss": 0.5758, "lr": 9.577428916208426e-06, "epoch": 0.17597292724196278, "percentage": 17.59, "elapsed_time": "6:16:22", "remaining_time": "1 day, 5:22:58"} +{"current_steps": 365, "total_steps": 2069, "loss": 0.5739, "lr": 9.574206729495662e-06, "epoch": 0.17645636934977035, "percentage": 17.64, "elapsed_time": "6:17:20", "remaining_time": "1 day, 5:21:38"} +{"current_steps": 366, "total_steps": 2069, "loss": 0.5646, "lr": 9.570972850741839e-06, "epoch": 0.17693981145757795, "percentage": 17.69, "elapsed_time": "6:18:18", "remaining_time": "1 day, 5:20:17"} +{"current_steps": 367, "total_steps": 2069, "loss": 0.5809, "lr": 9.567727288213005e-06, "epoch": 0.17742325356538555, "percentage": 17.74, "elapsed_time": "6:19:22", "remaining_time": "1 day, 5:19:23"} +{"current_steps": 368, "total_steps": 2069, "loss": 0.5745, "lr": 9.564470050205084e-06, "epoch": 0.17790669567319314, "percentage": 17.79, "elapsed_time": "6:20:24", "remaining_time": "1 day, 5:18:20"} +{"current_steps": 369, "total_steps": 2069, "loss": 0.5759, "lr": 9.561201145043835e-06, "epoch": 0.1783901377810007, "percentage": 17.83, "elapsed_time": "6:21:28", "remaining_time": "1 day, 5:17:29"} +{"current_steps": 370, "total_steps": 2069, "loss": 0.5716, "lr": 9.557920581084848e-06, "epoch": 0.1788735798888083, "percentage": 17.88, "elapsed_time": "6:22:28", "remaining_time": "1 day, 5:16:16"} +{"current_steps": 371, "total_steps": 2069, "loss": 0.5681, "lr": 9.554628366713506e-06, "epoch": 0.1793570219966159, "percentage": 17.93, "elapsed_time": "6:23:33", "remaining_time": "1 day, 5:15:28"} +{"current_steps": 372, "total_steps": 2069, "loss": 0.5674, "lr": 9.551324510344972e-06, "epoch": 0.1798404641044235, "percentage": 17.98, "elapsed_time": "6:24:34", "remaining_time": "1 day, 5:14:23"} +{"current_steps": 373, "total_steps": 2069, "loss": 0.5759, "lr": 9.548009020424172e-06, "epoch": 0.18032390621223107, "percentage": 18.03, "elapsed_time": "6:25:37", "remaining_time": "1 day, 5:13:22"} +{"current_steps": 374, "total_steps": 2069, "loss": 0.5761, "lr": 9.544681905425767e-06, "epoch": 0.18080734832003867, "percentage": 18.08, "elapsed_time": "6:26:35", "remaining_time": "1 day, 5:12:03"} +{"current_steps": 375, "total_steps": 2069, "loss": 0.5846, "lr": 9.541343173854128e-06, "epoch": 0.18129079042784627, "percentage": 18.12, "elapsed_time": "6:27:33", "remaining_time": "1 day, 5:10:45"} +{"current_steps": 376, "total_steps": 2069, "loss": 0.5655, "lr": 9.537992834243323e-06, "epoch": 0.18177423253565386, "percentage": 18.17, "elapsed_time": "6:28:37", "remaining_time": "1 day, 5:09:50"} +{"current_steps": 377, "total_steps": 2069, "loss": 0.578, "lr": 9.53463089515709e-06, "epoch": 0.18225767464346146, "percentage": 18.22, "elapsed_time": "6:29:38", "remaining_time": "1 day, 5:08:43"} +{"current_steps": 378, "total_steps": 2069, "loss": 0.5683, "lr": 9.531257365188818e-06, "epoch": 0.18274111675126903, "percentage": 18.27, "elapsed_time": "6:30:41", "remaining_time": "1 day, 5:07:46"} +{"current_steps": 379, "total_steps": 2069, "loss": 0.5112, "lr": 9.527872252961518e-06, "epoch": 0.18322455885907662, "percentage": 18.32, "elapsed_time": "6:31:48", "remaining_time": "1 day, 5:07:06"} +{"current_steps": 380, "total_steps": 2069, "loss": 0.5799, "lr": 9.524475567127813e-06, "epoch": 0.18370800096688422, "percentage": 18.37, "elapsed_time": "6:32:47", "remaining_time": "1 day, 5:05:52"} +{"current_steps": 381, "total_steps": 2069, "loss": 0.5601, "lr": 9.521067316369903e-06, "epoch": 0.18419144307469182, "percentage": 18.41, "elapsed_time": "6:33:48", "remaining_time": "1 day, 5:04:43"} +{"current_steps": 382, "total_steps": 2069, "loss": 0.5399, "lr": 9.517647509399555e-06, "epoch": 0.1846748851824994, "percentage": 18.46, "elapsed_time": "6:34:53", "remaining_time": "1 day, 5:03:54"} +{"current_steps": 383, "total_steps": 2069, "loss": 0.5754, "lr": 9.514216154958067e-06, "epoch": 0.18515832729030698, "percentage": 18.51, "elapsed_time": "6:35:53", "remaining_time": "1 day, 5:02:44"} +{"current_steps": 384, "total_steps": 2069, "loss": 0.5623, "lr": 9.510773261816261e-06, "epoch": 0.18564176939811458, "percentage": 18.56, "elapsed_time": "6:36:58", "remaining_time": "1 day, 5:01:54"} +{"current_steps": 385, "total_steps": 2069, "loss": 0.5774, "lr": 9.507318838774448e-06, "epoch": 0.18612521150592218, "percentage": 18.61, "elapsed_time": "6:38:00", "remaining_time": "1 day, 5:00:54"} +{"current_steps": 386, "total_steps": 2069, "loss": 0.5698, "lr": 9.50385289466241e-06, "epoch": 0.18660865361372975, "percentage": 18.66, "elapsed_time": "6:39:01", "remaining_time": "1 day, 4:59:47"} +{"current_steps": 387, "total_steps": 2069, "loss": 0.5634, "lr": 9.500375438339384e-06, "epoch": 0.18709209572153734, "percentage": 18.7, "elapsed_time": "6:39:59", "remaining_time": "1 day, 4:58:26"} +{"current_steps": 388, "total_steps": 2069, "loss": 0.5642, "lr": 9.496886478694025e-06, "epoch": 0.18757553782934494, "percentage": 18.75, "elapsed_time": "6:41:05", "remaining_time": "1 day, 4:57:41"} +{"current_steps": 389, "total_steps": 2069, "loss": 0.5763, "lr": 9.493386024644396e-06, "epoch": 0.18805897993715254, "percentage": 18.8, "elapsed_time": "6:42:06", "remaining_time": "1 day, 4:56:37"} +{"current_steps": 390, "total_steps": 2069, "loss": 0.5667, "lr": 9.48987408513794e-06, "epoch": 0.1885424220449601, "percentage": 18.85, "elapsed_time": "6:43:07", "remaining_time": "1 day, 4:55:29"} +{"current_steps": 391, "total_steps": 2069, "loss": 0.5633, "lr": 9.486350669151455e-06, "epoch": 0.1890258641527677, "percentage": 18.9, "elapsed_time": "6:44:11", "remaining_time": "1 day, 4:54:35"} +{"current_steps": 392, "total_steps": 2069, "loss": 0.5705, "lr": 9.482815785691082e-06, "epoch": 0.1895093062605753, "percentage": 18.95, "elapsed_time": "6:45:11", "remaining_time": "1 day, 4:53:27"} +{"current_steps": 393, "total_steps": 2069, "loss": 0.5703, "lr": 9.47926944379226e-06, "epoch": 0.1899927483683829, "percentage": 18.99, "elapsed_time": "6:46:16", "remaining_time": "1 day, 4:52:38"} +{"current_steps": 394, "total_steps": 2069, "loss": 0.5583, "lr": 9.475711652519732e-06, "epoch": 0.19047619047619047, "percentage": 19.04, "elapsed_time": "6:47:19", "remaining_time": "1 day, 4:51:40"} +{"current_steps": 395, "total_steps": 2069, "loss": 0.5674, "lr": 9.472142420967496e-06, "epoch": 0.19095963258399806, "percentage": 19.09, "elapsed_time": "6:48:24", "remaining_time": "1 day, 4:50:49"} +{"current_steps": 396, "total_steps": 2069, "loss": 0.578, "lr": 9.468561758258795e-06, "epoch": 0.19144307469180566, "percentage": 19.14, "elapsed_time": "6:49:26", "remaining_time": "1 day, 4:49:45"} +{"current_steps": 397, "total_steps": 2069, "loss": 0.582, "lr": 9.464969673546092e-06, "epoch": 0.19192651679961326, "percentage": 19.19, "elapsed_time": "6:50:25", "remaining_time": "1 day, 4:48:31"} +{"current_steps": 398, "total_steps": 2069, "loss": 0.5762, "lr": 9.461366176011047e-06, "epoch": 0.19240995890742082, "percentage": 19.24, "elapsed_time": "6:51:23", "remaining_time": "1 day, 4:47:13"} +{"current_steps": 399, "total_steps": 2069, "loss": 0.5786, "lr": 9.457751274864486e-06, "epoch": 0.19289340101522842, "percentage": 19.28, "elapsed_time": "6:52:22", "remaining_time": "1 day, 4:45:56"} +{"current_steps": 400, "total_steps": 2069, "loss": 0.531, "lr": 9.454124979346392e-06, "epoch": 0.19337684312303602, "percentage": 19.33, "elapsed_time": "6:53:25", "remaining_time": "1 day, 4:44:59"} +{"current_steps": 401, "total_steps": 2069, "loss": 0.5735, "lr": 9.450487298725866e-06, "epoch": 0.19386028523084362, "percentage": 19.38, "elapsed_time": "6:54:25", "remaining_time": "1 day, 4:43:51"} +{"current_steps": 402, "total_steps": 2069, "loss": 0.5736, "lr": 9.446838242301113e-06, "epoch": 0.19434372733865118, "percentage": 19.43, "elapsed_time": "6:55:29", "remaining_time": "1 day, 4:42:55"} +{"current_steps": 403, "total_steps": 2069, "loss": 0.5682, "lr": 9.443177819399416e-06, "epoch": 0.19482716944645878, "percentage": 19.48, "elapsed_time": "6:56:31", "remaining_time": "1 day, 4:41:53"} +{"current_steps": 404, "total_steps": 2069, "loss": 0.5457, "lr": 9.439506039377111e-06, "epoch": 0.19531061155426638, "percentage": 19.53, "elapsed_time": "6:57:34", "remaining_time": "1 day, 4:40:58"} +{"current_steps": 405, "total_steps": 2069, "loss": 0.5452, "lr": 9.435822911619564e-06, "epoch": 0.19579405366207397, "percentage": 19.57, "elapsed_time": "6:58:37", "remaining_time": "1 day, 4:39:59"} +{"current_steps": 406, "total_steps": 2069, "loss": 0.5569, "lr": 9.432128445541147e-06, "epoch": 0.19627749576988154, "percentage": 19.62, "elapsed_time": "6:59:39", "remaining_time": "1 day, 4:38:55"} +{"current_steps": 407, "total_steps": 2069, "loss": 0.5791, "lr": 9.42842265058521e-06, "epoch": 0.19676093787768914, "percentage": 19.67, "elapsed_time": "7:00:41", "remaining_time": "1 day, 4:37:53"} +{"current_steps": 408, "total_steps": 2069, "loss": 0.572, "lr": 9.424705536224065e-06, "epoch": 0.19724437998549674, "percentage": 19.72, "elapsed_time": "7:01:37", "remaining_time": "1 day, 4:36:27"} +{"current_steps": 409, "total_steps": 2069, "loss": 0.577, "lr": 9.420977111958957e-06, "epoch": 0.19772782209330433, "percentage": 19.77, "elapsed_time": "7:02:36", "remaining_time": "1 day, 4:35:14"} +{"current_steps": 410, "total_steps": 2069, "loss": 0.5673, "lr": 9.41723738732004e-06, "epoch": 0.1982112642011119, "percentage": 19.82, "elapsed_time": "7:03:38", "remaining_time": "1 day, 4:34:13"} +{"current_steps": 411, "total_steps": 2069, "loss": 0.5805, "lr": 9.41348637186635e-06, "epoch": 0.1986947063089195, "percentage": 19.86, "elapsed_time": "7:04:41", "remaining_time": "1 day, 4:33:12"} +{"current_steps": 412, "total_steps": 2069, "loss": 0.5811, "lr": 9.409724075185782e-06, "epoch": 0.1991781484167271, "percentage": 19.91, "elapsed_time": "7:05:42", "remaining_time": "1 day, 4:32:08"} +{"current_steps": 413, "total_steps": 2069, "loss": 0.5539, "lr": 9.405950506895074e-06, "epoch": 0.1996615905245347, "percentage": 19.96, "elapsed_time": "7:06:44", "remaining_time": "1 day, 4:31:04"} +{"current_steps": 414, "total_steps": 2069, "loss": 0.5754, "lr": 9.40216567663977e-06, "epoch": 0.2001450326323423, "percentage": 20.01, "elapsed_time": "7:07:43", "remaining_time": "1 day, 4:29:51"} +{"current_steps": 415, "total_steps": 2069, "loss": 0.508, "lr": 9.398369594094198e-06, "epoch": 0.20062847474014986, "percentage": 20.06, "elapsed_time": "7:08:54", "remaining_time": "1 day, 4:29:25"} +{"current_steps": 416, "total_steps": 2069, "loss": 0.5681, "lr": 9.394562268961454e-06, "epoch": 0.20111191684795746, "percentage": 20.11, "elapsed_time": "7:09:52", "remaining_time": "1 day, 4:28:07"} +{"current_steps": 417, "total_steps": 2069, "loss": 0.575, "lr": 9.390743710973366e-06, "epoch": 0.20159535895576505, "percentage": 20.15, "elapsed_time": "7:11:58", "remaining_time": "1 day, 4:31:19"} +{"current_steps": 418, "total_steps": 2069, "loss": 0.57, "lr": 9.386913929890478e-06, "epoch": 0.20207880106357265, "percentage": 20.2, "elapsed_time": "7:13:00", "remaining_time": "1 day, 4:30:17"} +{"current_steps": 419, "total_steps": 2069, "loss": 0.5644, "lr": 9.383072935502018e-06, "epoch": 0.20256224317138022, "percentage": 20.25, "elapsed_time": "7:14:07", "remaining_time": "1 day, 4:29:35"} +{"current_steps": 420, "total_steps": 2069, "loss": 0.564, "lr": 9.379220737625877e-06, "epoch": 0.20304568527918782, "percentage": 20.3, "elapsed_time": "7:15:08", "remaining_time": "1 day, 4:28:27"} +{"current_steps": 421, "total_steps": 2069, "loss": 0.5602, "lr": 9.375357346108583e-06, "epoch": 0.2035291273869954, "percentage": 20.35, "elapsed_time": "7:16:10", "remaining_time": "1 day, 4:27:22"} +{"current_steps": 422, "total_steps": 2069, "loss": 0.5695, "lr": 9.371482770825277e-06, "epoch": 0.204012569494803, "percentage": 20.4, "elapsed_time": "7:17:12", "remaining_time": "1 day, 4:26:21"} +{"current_steps": 423, "total_steps": 2069, "loss": 0.5661, "lr": 9.367597021679686e-06, "epoch": 0.20449601160261058, "percentage": 20.44, "elapsed_time": "7:18:17", "remaining_time": "1 day, 4:25:29"} +{"current_steps": 424, "total_steps": 2069, "loss": 0.5582, "lr": 9.363700108604096e-06, "epoch": 0.20497945371041817, "percentage": 20.49, "elapsed_time": "7:19:16", "remaining_time": "1 day, 4:24:14"} +{"current_steps": 425, "total_steps": 2069, "loss": 0.5645, "lr": 9.359792041559334e-06, "epoch": 0.20546289581822577, "percentage": 20.54, "elapsed_time": "7:20:19", "remaining_time": "1 day, 4:23:17"} +{"current_steps": 426, "total_steps": 2069, "loss": 0.5677, "lr": 9.35587283053473e-06, "epoch": 0.20594633792603337, "percentage": 20.59, "elapsed_time": "7:21:20", "remaining_time": "1 day, 4:22:08"} +{"current_steps": 427, "total_steps": 2069, "loss": 0.5435, "lr": 9.351942485548109e-06, "epoch": 0.20642978003384094, "percentage": 20.64, "elapsed_time": "7:22:27", "remaining_time": "1 day, 4:21:24"} +{"current_steps": 428, "total_steps": 2069, "loss": 0.5599, "lr": 9.348001016645744e-06, "epoch": 0.20691322214164853, "percentage": 20.69, "elapsed_time": "7:23:26", "remaining_time": "1 day, 4:20:10"} +{"current_steps": 429, "total_steps": 2069, "loss": 0.541, "lr": 9.344048433902351e-06, "epoch": 0.20739666424945613, "percentage": 20.73, "elapsed_time": "7:24:32", "remaining_time": "1 day, 4:19:24"} +{"current_steps": 430, "total_steps": 2069, "loss": 0.5366, "lr": 9.340084747421048e-06, "epoch": 0.20788010635726373, "percentage": 20.78, "elapsed_time": "7:25:36", "remaining_time": "1 day, 4:18:29"} +{"current_steps": 431, "total_steps": 2069, "loss": 0.5571, "lr": 9.336109967333337e-06, "epoch": 0.2083635484650713, "percentage": 20.83, "elapsed_time": "7:26:35", "remaining_time": "1 day, 4:17:17"} +{"current_steps": 432, "total_steps": 2069, "loss": 0.5516, "lr": 9.332124103799075e-06, "epoch": 0.2088469905728789, "percentage": 20.88, "elapsed_time": "7:27:35", "remaining_time": "1 day, 4:16:04"} +{"current_steps": 433, "total_steps": 2069, "loss": 0.5679, "lr": 9.328127167006457e-06, "epoch": 0.2093304326806865, "percentage": 20.93, "elapsed_time": "7:28:38", "remaining_time": "1 day, 4:15:05"} +{"current_steps": 434, "total_steps": 2069, "loss": 0.5659, "lr": 9.324119167171967e-06, "epoch": 0.2098138747884941, "percentage": 20.98, "elapsed_time": "7:29:40", "remaining_time": "1 day, 4:14:02"} +{"current_steps": 435, "total_steps": 2069, "loss": 0.5753, "lr": 9.320100114540382e-06, "epoch": 0.21029731689630166, "percentage": 21.02, "elapsed_time": "7:30:38", "remaining_time": "1 day, 4:12:44"} +{"current_steps": 436, "total_steps": 2069, "loss": 0.558, "lr": 9.316070019384722e-06, "epoch": 0.21078075900410925, "percentage": 21.07, "elapsed_time": "7:31:38", "remaining_time": "1 day, 4:11:34"} +{"current_steps": 437, "total_steps": 2069, "loss": 0.5637, "lr": 9.312028892006233e-06, "epoch": 0.21126420111191685, "percentage": 21.12, "elapsed_time": "7:32:42", "remaining_time": "1 day, 4:10:39"} +{"current_steps": 438, "total_steps": 2069, "loss": 0.5603, "lr": 9.307976742734366e-06, "epoch": 0.21174764321972445, "percentage": 21.17, "elapsed_time": "7:33:42", "remaining_time": "1 day, 4:09:30"} +{"current_steps": 439, "total_steps": 2069, "loss": 0.5583, "lr": 9.30391358192674e-06, "epoch": 0.21223108532753202, "percentage": 21.22, "elapsed_time": "7:34:45", "remaining_time": "1 day, 4:08:32"} +{"current_steps": 440, "total_steps": 2069, "loss": 0.5614, "lr": 9.299839419969119e-06, "epoch": 0.2127145274353396, "percentage": 21.27, "elapsed_time": "7:35:49", "remaining_time": "1 day, 4:07:35"} +{"current_steps": 441, "total_steps": 2069, "loss": 0.5732, "lr": 9.295754267275393e-06, "epoch": 0.2131979695431472, "percentage": 21.31, "elapsed_time": "7:36:49", "remaining_time": "1 day, 4:06:24"} +{"current_steps": 442, "total_steps": 2069, "loss": 0.5451, "lr": 9.291658134287537e-06, "epoch": 0.2136814116509548, "percentage": 21.36, "elapsed_time": "7:37:45", "remaining_time": "1 day, 4:04:59"} +{"current_steps": 443, "total_steps": 2069, "loss": 0.5486, "lr": 9.287551031475604e-06, "epoch": 0.21416485375876237, "percentage": 21.41, "elapsed_time": "7:38:47", "remaining_time": "1 day, 4:03:59"} +{"current_steps": 444, "total_steps": 2069, "loss": 0.5568, "lr": 9.283432969337672e-06, "epoch": 0.21464829586656997, "percentage": 21.46, "elapsed_time": "7:39:50", "remaining_time": "1 day, 4:03:00"} +{"current_steps": 445, "total_steps": 2069, "loss": 0.5561, "lr": 9.279303958399846e-06, "epoch": 0.21513173797437757, "percentage": 21.51, "elapsed_time": "7:40:54", "remaining_time": "1 day, 4:02:04"} +{"current_steps": 446, "total_steps": 2069, "loss": 0.5653, "lr": 9.275164009216205e-06, "epoch": 0.21561518008218516, "percentage": 21.56, "elapsed_time": "7:41:57", "remaining_time": "1 day, 4:01:05"} +{"current_steps": 447, "total_steps": 2069, "loss": 0.5359, "lr": 9.271013132368799e-06, "epoch": 0.21609862218999276, "percentage": 21.6, "elapsed_time": "7:42:57", "remaining_time": "1 day, 3:59:54"} +{"current_steps": 448, "total_steps": 2069, "loss": 0.5627, "lr": 9.266851338467598e-06, "epoch": 0.21658206429780033, "percentage": 21.65, "elapsed_time": "7:43:59", "remaining_time": "1 day, 3:58:51"} +{"current_steps": 449, "total_steps": 2069, "loss": 0.5372, "lr": 9.262678638150486e-06, "epoch": 0.21706550640560793, "percentage": 21.7, "elapsed_time": "7:45:05", "remaining_time": "1 day, 3:58:03"} +{"current_steps": 450, "total_steps": 2069, "loss": 0.583, "lr": 9.258495042083222e-06, "epoch": 0.21754894851341552, "percentage": 21.75, "elapsed_time": "7:46:07", "remaining_time": "1 day, 3:56:59"} +{"current_steps": 451, "total_steps": 2069, "loss": 0.5641, "lr": 9.254300560959413e-06, "epoch": 0.21803239062122312, "percentage": 21.8, "elapsed_time": "7:47:11", "remaining_time": "1 day, 3:56:05"} +{"current_steps": 452, "total_steps": 2069, "loss": 0.5692, "lr": 9.25009520550049e-06, "epoch": 0.2185158327290307, "percentage": 21.85, "elapsed_time": "7:48:13", "remaining_time": "1 day, 3:55:03"} +{"current_steps": 453, "total_steps": 2069, "loss": 0.5732, "lr": 9.245878986455684e-06, "epoch": 0.2189992748368383, "percentage": 21.89, "elapsed_time": "7:49:16", "remaining_time": "1 day, 3:54:01"} +{"current_steps": 454, "total_steps": 2069, "loss": 0.5684, "lr": 9.241651914601986e-06, "epoch": 0.21948271694464588, "percentage": 21.94, "elapsed_time": "7:50:18", "remaining_time": "1 day, 3:53:02"} +{"current_steps": 455, "total_steps": 2069, "loss": 0.5728, "lr": 9.237414000744134e-06, "epoch": 0.21996615905245348, "percentage": 21.99, "elapsed_time": "7:51:19", "remaining_time": "1 day, 3:51:53"} +{"current_steps": 456, "total_steps": 2069, "loss": 0.5543, "lr": 9.23316525571458e-06, "epoch": 0.22044960116026105, "percentage": 22.04, "elapsed_time": "7:52:25", "remaining_time": "1 day, 3:51:04"} +{"current_steps": 457, "total_steps": 2069, "loss": 0.5109, "lr": 9.228905690373456e-06, "epoch": 0.22093304326806865, "percentage": 22.09, "elapsed_time": "7:53:31", "remaining_time": "1 day, 3:50:15"} +{"current_steps": 458, "total_steps": 2069, "loss": 0.5613, "lr": 9.224635315608554e-06, "epoch": 0.22141648537587624, "percentage": 22.14, "elapsed_time": "7:54:30", "remaining_time": "1 day, 3:49:03"} +{"current_steps": 459, "total_steps": 2069, "loss": 0.5758, "lr": 9.2203541423353e-06, "epoch": 0.22189992748368384, "percentage": 22.18, "elapsed_time": "7:55:29", "remaining_time": "1 day, 3:47:52"} +{"current_steps": 460, "total_steps": 2069, "loss": 0.5656, "lr": 9.216062181496712e-06, "epoch": 0.2223833695914914, "percentage": 22.23, "elapsed_time": "7:56:33", "remaining_time": "1 day, 3:46:55"} +{"current_steps": 461, "total_steps": 2069, "loss": 0.5643, "lr": 9.211759444063392e-06, "epoch": 0.222866811699299, "percentage": 22.28, "elapsed_time": "7:57:36", "remaining_time": "1 day, 3:45:56"} +{"current_steps": 462, "total_steps": 2069, "loss": 0.5645, "lr": 9.207445941033483e-06, "epoch": 0.2233502538071066, "percentage": 22.33, "elapsed_time": "7:58:41", "remaining_time": "1 day, 3:45:02"} +{"current_steps": 463, "total_steps": 2069, "loss": 0.5622, "lr": 9.203121683432646e-06, "epoch": 0.2238336959149142, "percentage": 22.38, "elapsed_time": "7:59:41", "remaining_time": "1 day, 3:43:53"} +{"current_steps": 464, "total_steps": 2069, "loss": 0.5686, "lr": 9.19878668231403e-06, "epoch": 0.22431713802272177, "percentage": 22.43, "elapsed_time": "8:00:45", "remaining_time": "1 day, 3:42:56"} +{"current_steps": 465, "total_steps": 2069, "loss": 0.5617, "lr": 9.19444094875825e-06, "epoch": 0.22480058013052936, "percentage": 22.47, "elapsed_time": "8:01:45", "remaining_time": "1 day, 3:41:47"} +{"current_steps": 466, "total_steps": 2069, "loss": 0.5733, "lr": 9.190084493873353e-06, "epoch": 0.22528402223833696, "percentage": 22.52, "elapsed_time": "8:02:45", "remaining_time": "1 day, 3:40:37"} +{"current_steps": 467, "total_steps": 2069, "loss": 0.5632, "lr": 9.185717328794784e-06, "epoch": 0.22576746434614456, "percentage": 22.57, "elapsed_time": "8:03:46", "remaining_time": "1 day, 3:39:32"} +{"current_steps": 468, "total_steps": 2069, "loss": 0.5684, "lr": 9.18133946468537e-06, "epoch": 0.22625090645395213, "percentage": 22.62, "elapsed_time": "8:04:46", "remaining_time": "1 day, 3:38:23"} +{"current_steps": 469, "total_steps": 2069, "loss": 0.5559, "lr": 9.176950912735287e-06, "epoch": 0.22673434856175972, "percentage": 22.67, "elapsed_time": "8:05:46", "remaining_time": "1 day, 3:37:13"} +{"current_steps": 470, "total_steps": 2069, "loss": 0.5731, "lr": 9.172551684162025e-06, "epoch": 0.22721779066956732, "percentage": 22.72, "elapsed_time": "8:06:47", "remaining_time": "1 day, 3:36:06"} +{"current_steps": 471, "total_steps": 2069, "loss": 0.5671, "lr": 9.16814179021037e-06, "epoch": 0.22770123277737492, "percentage": 22.76, "elapsed_time": "8:07:47", "remaining_time": "1 day, 3:34:57"} +{"current_steps": 472, "total_steps": 2069, "loss": 0.5661, "lr": 9.163721242152362e-06, "epoch": 0.2281846748851825, "percentage": 22.81, "elapsed_time": "8:08:48", "remaining_time": "1 day, 3:33:53"} +{"current_steps": 473, "total_steps": 2069, "loss": 0.5627, "lr": 9.159290051287282e-06, "epoch": 0.22866811699299008, "percentage": 22.86, "elapsed_time": "8:09:48", "remaining_time": "1 day, 3:32:43"} +{"current_steps": 474, "total_steps": 2069, "loss": 0.5615, "lr": 9.154848228941607e-06, "epoch": 0.22915155910079768, "percentage": 22.91, "elapsed_time": "8:10:49", "remaining_time": "1 day, 3:31:37"} +{"current_steps": 475, "total_steps": 2069, "loss": 0.5645, "lr": 9.150395786468998e-06, "epoch": 0.22963500120860528, "percentage": 22.96, "elapsed_time": "8:11:48", "remaining_time": "1 day, 3:30:22"} +{"current_steps": 476, "total_steps": 2069, "loss": 0.5647, "lr": 9.14593273525025e-06, "epoch": 0.23011844331641285, "percentage": 23.01, "elapsed_time": "8:12:54", "remaining_time": "1 day, 3:29:36"} +{"current_steps": 477, "total_steps": 2069, "loss": 0.5729, "lr": 9.14145908669329e-06, "epoch": 0.23060188542422044, "percentage": 23.05, "elapsed_time": "8:13:57", "remaining_time": "1 day, 3:28:34"} +{"current_steps": 478, "total_steps": 2069, "loss": 0.5587, "lr": 9.136974852233118e-06, "epoch": 0.23108532753202804, "percentage": 23.1, "elapsed_time": "8:14:58", "remaining_time": "1 day, 3:27:30"} +{"current_steps": 479, "total_steps": 2069, "loss": 0.5646, "lr": 9.132480043331801e-06, "epoch": 0.23156876963983564, "percentage": 23.15, "elapsed_time": "8:15:59", "remaining_time": "1 day, 3:26:25"} +{"current_steps": 480, "total_steps": 2069, "loss": 0.5655, "lr": 9.127974671478432e-06, "epoch": 0.23205221174764323, "percentage": 23.2, "elapsed_time": "8:17:03", "remaining_time": "1 day, 3:25:28"} +{"current_steps": 481, "total_steps": 2069, "loss": 0.5608, "lr": 9.123458748189105e-06, "epoch": 0.2325356538554508, "percentage": 23.25, "elapsed_time": "8:18:08", "remaining_time": "1 day, 3:24:36"} +{"current_steps": 482, "total_steps": 2069, "loss": 0.5254, "lr": 9.118932285006886e-06, "epoch": 0.2330190959632584, "percentage": 23.3, "elapsed_time": "8:19:09", "remaining_time": "1 day, 3:23:30"} +{"current_steps": 483, "total_steps": 2069, "loss": 0.5751, "lr": 9.114395293501775e-06, "epoch": 0.233502538071066, "percentage": 23.34, "elapsed_time": "8:20:10", "remaining_time": "1 day, 3:22:22"} +{"current_steps": 484, "total_steps": 2069, "loss": 0.5603, "lr": 9.10984778527069e-06, "epoch": 0.2339859801788736, "percentage": 23.39, "elapsed_time": "8:21:12", "remaining_time": "1 day, 3:21:22"} +{"current_steps": 485, "total_steps": 2069, "loss": 0.5703, "lr": 9.10528977193743e-06, "epoch": 0.23446942228668116, "percentage": 23.44, "elapsed_time": "8:22:15", "remaining_time": "1 day, 3:20:20"} +{"current_steps": 486, "total_steps": 2069, "loss": 0.5635, "lr": 9.100721265152644e-06, "epoch": 0.23495286439448876, "percentage": 23.49, "elapsed_time": "8:23:15", "remaining_time": "1 day, 3:19:12"} +{"current_steps": 487, "total_steps": 2069, "loss": 0.5721, "lr": 9.096142276593802e-06, "epoch": 0.23543630650229636, "percentage": 23.54, "elapsed_time": "8:24:14", "remaining_time": "1 day, 3:17:59"} +{"current_steps": 488, "total_steps": 2069, "loss": 0.5502, "lr": 9.09155281796517e-06, "epoch": 0.23591974861010395, "percentage": 23.59, "elapsed_time": "8:25:17", "remaining_time": "1 day, 3:17:02"} +{"current_steps": 489, "total_steps": 2069, "loss": 0.5628, "lr": 9.086952900997774e-06, "epoch": 0.23640319071791152, "percentage": 23.63, "elapsed_time": "8:26:17", "remaining_time": "1 day, 3:15:53"} +{"current_steps": 490, "total_steps": 2069, "loss": 0.5649, "lr": 9.082342537449369e-06, "epoch": 0.23688663282571912, "percentage": 23.68, "elapsed_time": "8:27:21", "remaining_time": "1 day, 3:14:57"} +{"current_steps": 491, "total_steps": 2069, "loss": 0.5363, "lr": 9.07772173910442e-06, "epoch": 0.23737007493352671, "percentage": 23.73, "elapsed_time": "8:28:21", "remaining_time": "1 day, 3:13:48"} +{"current_steps": 492, "total_steps": 2069, "loss": 0.5679, "lr": 9.073090517774057e-06, "epoch": 0.2378535170413343, "percentage": 23.78, "elapsed_time": "8:29:18", "remaining_time": "1 day, 3:12:29"} +{"current_steps": 493, "total_steps": 2069, "loss": 0.5598, "lr": 9.068448885296057e-06, "epoch": 0.23833695914914188, "percentage": 23.83, "elapsed_time": "8:30:22", "remaining_time": "1 day, 3:11:33"} +{"current_steps": 494, "total_steps": 2069, "loss": 0.5606, "lr": 9.063796853534808e-06, "epoch": 0.23882040125694948, "percentage": 23.88, "elapsed_time": "8:31:26", "remaining_time": "1 day, 3:10:35"} +{"current_steps": 495, "total_steps": 2069, "loss": 0.5614, "lr": 9.059134434381274e-06, "epoch": 0.23930384336475707, "percentage": 23.92, "elapsed_time": "8:32:27", "remaining_time": "1 day, 3:09:31"} +{"current_steps": 496, "total_steps": 2069, "loss": 0.5637, "lr": 9.054461639752976e-06, "epoch": 0.23978728547256467, "percentage": 23.97, "elapsed_time": "8:33:27", "remaining_time": "1 day, 3:08:23"} +{"current_steps": 497, "total_steps": 2069, "loss": 0.5718, "lr": 9.049778481593954e-06, "epoch": 0.24027072758037224, "percentage": 24.02, "elapsed_time": "8:34:29", "remaining_time": "1 day, 3:07:20"} +{"current_steps": 498, "total_steps": 2069, "loss": 0.5651, "lr": 9.045084971874738e-06, "epoch": 0.24075416968817984, "percentage": 24.07, "elapsed_time": "8:35:28", "remaining_time": "1 day, 3:06:06"} +{"current_steps": 499, "total_steps": 2069, "loss": 0.565, "lr": 9.040381122592317e-06, "epoch": 0.24123761179598743, "percentage": 24.12, "elapsed_time": "8:36:27", "remaining_time": "1 day, 3:04:56"} +{"current_steps": 500, "total_steps": 2069, "loss": 0.5593, "lr": 9.035666945770107e-06, "epoch": 0.24172105390379503, "percentage": 24.17, "elapsed_time": "8:37:30", "remaining_time": "1 day, 3:03:55"} +{"current_steps": 501, "total_steps": 2069, "loss": 0.5199, "lr": 9.030942453457928e-06, "epoch": 0.2422044960116026, "percentage": 24.21, "elapsed_time": "8:38:34", "remaining_time": "1 day, 3:02:59"} +{"current_steps": 502, "total_steps": 2069, "loss": 0.5548, "lr": 9.02620765773196e-06, "epoch": 0.2426879381194102, "percentage": 24.26, "elapsed_time": "8:39:36", "remaining_time": "1 day, 3:01:56"} +{"current_steps": 503, "total_steps": 2069, "loss": 0.5611, "lr": 9.02146257069472e-06, "epoch": 0.2431713802272178, "percentage": 24.31, "elapsed_time": "8:40:39", "remaining_time": "1 day, 3:00:59"} +{"current_steps": 504, "total_steps": 2069, "loss": 0.5577, "lr": 9.01670720447504e-06, "epoch": 0.2436548223350254, "percentage": 24.36, "elapsed_time": "8:41:39", "remaining_time": "1 day, 2:59:49"} +{"current_steps": 505, "total_steps": 2069, "loss": 0.5608, "lr": 9.011941571228015e-06, "epoch": 0.24413826444283296, "percentage": 24.41, "elapsed_time": "8:42:37", "remaining_time": "1 day, 2:58:33"} +{"current_steps": 506, "total_steps": 2069, "loss": 0.5315, "lr": 9.007165683134986e-06, "epoch": 0.24462170655064056, "percentage": 24.46, "elapsed_time": "8:43:41", "remaining_time": "1 day, 2:57:37"} +{"current_steps": 507, "total_steps": 2069, "loss": 0.5613, "lr": 9.00237955240351e-06, "epoch": 0.24510514865844815, "percentage": 24.5, "elapsed_time": "8:44:38", "remaining_time": "1 day, 2:56:19"} +{"current_steps": 508, "total_steps": 2069, "loss": 0.5764, "lr": 8.997583191267326e-06, "epoch": 0.24558859076625575, "percentage": 24.55, "elapsed_time": "8:45:38", "remaining_time": "1 day, 2:55:13"} +{"current_steps": 509, "total_steps": 2069, "loss": 0.5704, "lr": 8.992776611986313e-06, "epoch": 0.24607203287406332, "percentage": 24.6, "elapsed_time": "8:46:39", "remaining_time": "1 day, 2:54:07"} +{"current_steps": 510, "total_steps": 2069, "loss": 0.5573, "lr": 8.987959826846479e-06, "epoch": 0.24655547498187091, "percentage": 24.65, "elapsed_time": "8:47:39", "remaining_time": "1 day, 2:52:59"} +{"current_steps": 511, "total_steps": 2069, "loss": 0.5583, "lr": 8.983132848159916e-06, "epoch": 0.2470389170896785, "percentage": 24.7, "elapsed_time": "8:48:43", "remaining_time": "1 day, 2:52:03"} +{"current_steps": 512, "total_steps": 2069, "loss": 0.5699, "lr": 8.978295688264768e-06, "epoch": 0.2475223591974861, "percentage": 24.75, "elapsed_time": "8:49:46", "remaining_time": "1 day, 2:51:02"} +{"current_steps": 513, "total_steps": 2069, "loss": 0.5641, "lr": 8.973448359525207e-06, "epoch": 0.2480058013052937, "percentage": 24.79, "elapsed_time": "8:50:49", "remaining_time": "1 day, 2:50:04"} +{"current_steps": 514, "total_steps": 2069, "loss": 0.5649, "lr": 8.968590874331395e-06, "epoch": 0.24848924341310127, "percentage": 24.84, "elapsed_time": "8:51:53", "remaining_time": "1 day, 2:49:07"} +{"current_steps": 515, "total_steps": 2069, "loss": 0.5533, "lr": 8.963723245099456e-06, "epoch": 0.24897268552090887, "percentage": 24.89, "elapsed_time": "8:52:54", "remaining_time": "1 day, 2:48:03"} +{"current_steps": 516, "total_steps": 2069, "loss": 0.5571, "lr": 8.958845484271443e-06, "epoch": 0.24945612762871647, "percentage": 24.94, "elapsed_time": "8:54:01", "remaining_time": "1 day, 2:47:14"} +{"current_steps": 517, "total_steps": 2069, "loss": 0.5612, "lr": 8.953957604315306e-06, "epoch": 0.24993956973652406, "percentage": 24.99, "elapsed_time": "8:55:03", "remaining_time": "1 day, 2:46:13"} +{"current_steps": 518, "total_steps": 2069, "loss": 0.5532, "lr": 8.949059617724859e-06, "epoch": 0.25042301184433163, "percentage": 25.04, "elapsed_time": "8:56:05", "remaining_time": "1 day, 2:45:10"} +{"current_steps": 519, "total_steps": 2069, "loss": 0.5314, "lr": 8.944151537019752e-06, "epoch": 0.25090645395213923, "percentage": 25.08, "elapsed_time": "8:57:09", "remaining_time": "1 day, 2:44:13"} +{"current_steps": 520, "total_steps": 2069, "loss": 0.561, "lr": 8.939233374745432e-06, "epoch": 0.2513898960599468, "percentage": 25.13, "elapsed_time": "8:58:11", "remaining_time": "1 day, 2:43:10"} +{"current_steps": 521, "total_steps": 2069, "loss": 0.5229, "lr": 8.934305143473123e-06, "epoch": 0.2518733381677544, "percentage": 25.18, "elapsed_time": "8:59:17", "remaining_time": "1 day, 2:42:20"} +{"current_steps": 522, "total_steps": 2069, "loss": 0.5584, "lr": 8.929366855799777e-06, "epoch": 0.252356780275562, "percentage": 25.23, "elapsed_time": "9:00:20", "remaining_time": "1 day, 2:41:21"} +{"current_steps": 523, "total_steps": 2069, "loss": 0.5722, "lr": 8.924418524348058e-06, "epoch": 0.2528402223833696, "percentage": 25.28, "elapsed_time": "9:01:21", "remaining_time": "1 day, 2:40:16"} +{"current_steps": 524, "total_steps": 2069, "loss": 0.5527, "lr": 8.919460161766299e-06, "epoch": 0.25332366449117716, "percentage": 25.33, "elapsed_time": "9:02:22", "remaining_time": "1 day, 2:39:11"} +{"current_steps": 525, "total_steps": 2069, "loss": 0.565, "lr": 8.914491780728471e-06, "epoch": 0.25380710659898476, "percentage": 25.37, "elapsed_time": "9:03:22", "remaining_time": "1 day, 2:38:01"} +{"current_steps": 526, "total_steps": 2069, "loss": 0.5562, "lr": 8.909513393934162e-06, "epoch": 0.25429054870679235, "percentage": 25.42, "elapsed_time": "9:04:23", "remaining_time": "1 day, 2:36:55"} +{"current_steps": 527, "total_steps": 2069, "loss": 0.5536, "lr": 8.904525014108529e-06, "epoch": 0.25477399081459995, "percentage": 25.47, "elapsed_time": "9:05:23", "remaining_time": "1 day, 2:35:48"} +{"current_steps": 528, "total_steps": 2069, "loss": 0.5612, "lr": 8.899526654002268e-06, "epoch": 0.25525743292240755, "percentage": 25.52, "elapsed_time": "9:06:28", "remaining_time": "1 day, 2:34:55"} +{"current_steps": 529, "total_steps": 2069, "loss": 0.5578, "lr": 8.894518326391595e-06, "epoch": 0.25574087503021514, "percentage": 25.57, "elapsed_time": "9:07:28", "remaining_time": "1 day, 2:33:46"} +{"current_steps": 530, "total_steps": 2069, "loss": 0.5554, "lr": 8.889500044078199e-06, "epoch": 0.25622431713802274, "percentage": 25.62, "elapsed_time": "9:08:30", "remaining_time": "1 day, 2:32:44"} +{"current_steps": 531, "total_steps": 2069, "loss": 0.5466, "lr": 8.88447181988921e-06, "epoch": 0.25670775924583034, "percentage": 25.66, "elapsed_time": "9:09:34", "remaining_time": "1 day, 2:31:49"} +{"current_steps": 532, "total_steps": 2069, "loss": 0.5232, "lr": 8.87943366667718e-06, "epoch": 0.2571912013536379, "percentage": 25.71, "elapsed_time": "9:10:41", "remaining_time": "1 day, 2:30:58"} +{"current_steps": 533, "total_steps": 2069, "loss": 0.5575, "lr": 8.87438559732003e-06, "epoch": 0.2576746434614455, "percentage": 25.76, "elapsed_time": "9:11:40", "remaining_time": "1 day, 2:29:49"} +{"current_steps": 534, "total_steps": 2069, "loss": 0.5584, "lr": 8.869327624721033e-06, "epoch": 0.25815808556925307, "percentage": 25.81, "elapsed_time": "9:12:45", "remaining_time": "1 day, 2:28:54"} +{"current_steps": 535, "total_steps": 2069, "loss": 0.5557, "lr": 8.864259761808778e-06, "epoch": 0.25864152767706067, "percentage": 25.86, "elapsed_time": "9:13:45", "remaining_time": "1 day, 2:27:47"} +{"current_steps": 536, "total_steps": 2069, "loss": 0.5672, "lr": 8.859182021537126e-06, "epoch": 0.25912496978486826, "percentage": 25.91, "elapsed_time": "9:14:50", "remaining_time": "1 day, 2:26:52"} +{"current_steps": 537, "total_steps": 2069, "loss": 0.5513, "lr": 8.854094416885192e-06, "epoch": 0.25960841189267586, "percentage": 25.95, "elapsed_time": "9:15:51", "remaining_time": "1 day, 2:25:48"} +{"current_steps": 538, "total_steps": 2069, "loss": 0.5542, "lr": 8.848996960857308e-06, "epoch": 0.26009185400048346, "percentage": 26.0, "elapsed_time": "9:16:52", "remaining_time": "1 day, 2:24:42"} +{"current_steps": 539, "total_steps": 2069, "loss": 0.5503, "lr": 8.843889666482977e-06, "epoch": 0.26057529610829105, "percentage": 26.05, "elapsed_time": "9:17:53", "remaining_time": "1 day, 2:23:38"} +{"current_steps": 540, "total_steps": 2069, "loss": 0.5245, "lr": 8.838772546816857e-06, "epoch": 0.2610587382160986, "percentage": 26.1, "elapsed_time": "9:18:59", "remaining_time": "1 day, 2:22:45"} +{"current_steps": 541, "total_steps": 2069, "loss": 0.563, "lr": 8.833645614938716e-06, "epoch": 0.2615421803239062, "percentage": 26.15, "elapsed_time": "9:20:05", "remaining_time": "1 day, 2:21:54"} +{"current_steps": 542, "total_steps": 2069, "loss": 0.5214, "lr": 8.82850888395341e-06, "epoch": 0.2620256224317138, "percentage": 26.2, "elapsed_time": "9:21:13", "remaining_time": "1 day, 2:21:10"} +{"current_steps": 543, "total_steps": 2069, "loss": 0.5539, "lr": 8.823362366990833e-06, "epoch": 0.2625090645395214, "percentage": 26.24, "elapsed_time": "9:22:14", "remaining_time": "1 day, 2:20:03"} +{"current_steps": 544, "total_steps": 2069, "loss": 0.5432, "lr": 8.818206077205899e-06, "epoch": 0.262992506647329, "percentage": 26.29, "elapsed_time": "9:23:15", "remaining_time": "1 day, 2:18:58"} +{"current_steps": 545, "total_steps": 2069, "loss": 0.5432, "lr": 8.8130400277785e-06, "epoch": 0.2634759487551366, "percentage": 26.34, "elapsed_time": "9:24:16", "remaining_time": "1 day, 2:17:54"} +{"current_steps": 546, "total_steps": 2069, "loss": 0.5609, "lr": 8.807864231913475e-06, "epoch": 0.2639593908629442, "percentage": 26.39, "elapsed_time": "9:25:18", "remaining_time": "1 day, 2:16:52"} +{"current_steps": 547, "total_steps": 2069, "loss": 0.5608, "lr": 8.802678702840575e-06, "epoch": 0.2644428329707518, "percentage": 26.44, "elapsed_time": "9:26:18", "remaining_time": "1 day, 2:15:43"} +{"current_steps": 548, "total_steps": 2069, "loss": 0.5487, "lr": 8.79748345381443e-06, "epoch": 0.26492627507855937, "percentage": 26.49, "elapsed_time": "9:27:19", "remaining_time": "1 day, 2:14:37"} +{"current_steps": 549, "total_steps": 2069, "loss": 0.549, "lr": 8.792278498114517e-06, "epoch": 0.2654097171863669, "percentage": 26.53, "elapsed_time": "9:28:23", "remaining_time": "1 day, 2:13:41"} +{"current_steps": 550, "total_steps": 2069, "loss": 0.5564, "lr": 8.78706384904512e-06, "epoch": 0.2658931592941745, "percentage": 26.58, "elapsed_time": "9:29:23", "remaining_time": "1 day, 2:12:33"} +{"current_steps": 551, "total_steps": 2069, "loss": 0.5546, "lr": 8.7818395199353e-06, "epoch": 0.2663766014019821, "percentage": 26.63, "elapsed_time": "9:30:27", "remaining_time": "1 day, 2:11:36"} +{"current_steps": 552, "total_steps": 2069, "loss": 0.5512, "lr": 8.77660552413887e-06, "epoch": 0.2668600435097897, "percentage": 26.68, "elapsed_time": "9:31:29", "remaining_time": "1 day, 2:10:35"} +{"current_steps": 553, "total_steps": 2069, "loss": 0.5631, "lr": 8.77136187503434e-06, "epoch": 0.2673434856175973, "percentage": 26.73, "elapsed_time": "9:32:27", "remaining_time": "1 day, 2:09:20"} +{"current_steps": 554, "total_steps": 2069, "loss": 0.5222, "lr": 8.766108586024904e-06, "epoch": 0.2678269277254049, "percentage": 26.78, "elapsed_time": "9:33:26", "remaining_time": "1 day, 2:08:11"} +{"current_steps": 555, "total_steps": 2069, "loss": 0.5485, "lr": 8.760845670538387e-06, "epoch": 0.2683103698332125, "percentage": 26.82, "elapsed_time": "9:34:29", "remaining_time": "1 day, 2:07:10"} +{"current_steps": 556, "total_steps": 2069, "loss": 0.5624, "lr": 8.755573142027228e-06, "epoch": 0.2687938119410201, "percentage": 26.87, "elapsed_time": "9:35:32", "remaining_time": "1 day, 2:06:09"} +{"current_steps": 557, "total_steps": 2069, "loss": 0.5562, "lr": 8.750291013968432e-06, "epoch": 0.26927725404882763, "percentage": 26.92, "elapsed_time": "9:36:33", "remaining_time": "1 day, 2:05:06"} +{"current_steps": 558, "total_steps": 2069, "loss": 0.5669, "lr": 8.744999299863549e-06, "epoch": 0.2697606961566352, "percentage": 26.97, "elapsed_time": "9:37:32", "remaining_time": "1 day, 2:03:55"} +{"current_steps": 559, "total_steps": 2069, "loss": 0.557, "lr": 8.739698013238625e-06, "epoch": 0.2702441382644428, "percentage": 27.02, "elapsed_time": "9:38:34", "remaining_time": "1 day, 2:02:51"} +{"current_steps": 560, "total_steps": 2069, "loss": 0.5202, "lr": 8.734387167644171e-06, "epoch": 0.2707275803722504, "percentage": 27.07, "elapsed_time": "9:39:35", "remaining_time": "1 day, 2:01:48"} +{"current_steps": 561, "total_steps": 2069, "loss": 0.5605, "lr": 8.729066776655144e-06, "epoch": 0.271211022480058, "percentage": 27.11, "elapsed_time": "9:40:35", "remaining_time": "1 day, 2:00:40"} +{"current_steps": 562, "total_steps": 2069, "loss": 0.5193, "lr": 8.723736853870888e-06, "epoch": 0.2716944645878656, "percentage": 27.16, "elapsed_time": "9:41:35", "remaining_time": "1 day, 1:59:31"} +{"current_steps": 563, "total_steps": 2069, "loss": 0.5583, "lr": 8.718397412915114e-06, "epoch": 0.2721779066956732, "percentage": 27.21, "elapsed_time": "9:42:41", "remaining_time": "1 day, 1:58:40"} +{"current_steps": 564, "total_steps": 2069, "loss": 0.5365, "lr": 8.713048467435865e-06, "epoch": 0.2726613488034808, "percentage": 27.26, "elapsed_time": "9:43:39", "remaining_time": "1 day, 1:57:27"} +{"current_steps": 565, "total_steps": 2069, "loss": 0.5638, "lr": 8.707690031105478e-06, "epoch": 0.27314479091128835, "percentage": 27.31, "elapsed_time": "9:44:37", "remaining_time": "1 day, 1:56:15"} +{"current_steps": 566, "total_steps": 2069, "loss": 0.5375, "lr": 8.702322117620547e-06, "epoch": 0.27362823301909595, "percentage": 27.36, "elapsed_time": "9:45:41", "remaining_time": "1 day, 1:55:17"} +{"current_steps": 567, "total_steps": 2069, "loss": 0.5502, "lr": 8.696944740701891e-06, "epoch": 0.27411167512690354, "percentage": 27.4, "elapsed_time": "9:46:44", "remaining_time": "1 day, 1:54:16"} +{"current_steps": 568, "total_steps": 2069, "loss": 0.549, "lr": 8.69155791409452e-06, "epoch": 0.27459511723471114, "percentage": 27.45, "elapsed_time": "9:47:47", "remaining_time": "1 day, 1:53:17"} +{"current_steps": 569, "total_steps": 2069, "loss": 0.5479, "lr": 8.686161651567596e-06, "epoch": 0.27507855934251874, "percentage": 27.5, "elapsed_time": "9:48:47", "remaining_time": "1 day, 1:52:11"} +{"current_steps": 570, "total_steps": 2069, "loss": 0.517, "lr": 8.6807559669144e-06, "epoch": 0.27556200145032633, "percentage": 27.55, "elapsed_time": "9:49:52", "remaining_time": "1 day, 1:51:16"} +{"current_steps": 571, "total_steps": 2069, "loss": 0.5449, "lr": 8.6753408739523e-06, "epoch": 0.27604544355813393, "percentage": 27.6, "elapsed_time": "9:50:53", "remaining_time": "1 day, 1:50:11"} +{"current_steps": 572, "total_steps": 2069, "loss": 0.5516, "lr": 8.669916386522708e-06, "epoch": 0.2765288856659415, "percentage": 27.65, "elapsed_time": "9:51:59", "remaining_time": "1 day, 1:49:18"} +{"current_steps": 573, "total_steps": 2069, "loss": 0.5527, "lr": 8.664482518491053e-06, "epoch": 0.27701232777374907, "percentage": 27.69, "elapsed_time": "9:53:02", "remaining_time": "1 day, 1:48:18"} +{"current_steps": 574, "total_steps": 2069, "loss": 0.5528, "lr": 8.659039283746738e-06, "epoch": 0.27749576988155666, "percentage": 27.74, "elapsed_time": "9:54:00", "remaining_time": "1 day, 1:47:06"} +{"current_steps": 575, "total_steps": 2069, "loss": 0.5428, "lr": 8.653586696203111e-06, "epoch": 0.27797921198936426, "percentage": 27.79, "elapsed_time": "9:54:59", "remaining_time": "1 day, 1:45:57"} +{"current_steps": 576, "total_steps": 2069, "loss": 0.5566, "lr": 8.648124769797424e-06, "epoch": 0.27846265409717186, "percentage": 27.84, "elapsed_time": "9:55:55", "remaining_time": "1 day, 1:44:38"} +{"current_steps": 577, "total_steps": 2069, "loss": 0.5517, "lr": 8.6426535184908e-06, "epoch": 0.27894609620497945, "percentage": 27.89, "elapsed_time": "9:57:01", "remaining_time": "1 day, 1:43:47"} +{"current_steps": 578, "total_steps": 2069, "loss": 0.5537, "lr": 8.637172956268203e-06, "epoch": 0.27942953831278705, "percentage": 27.94, "elapsed_time": "9:58:02", "remaining_time": "1 day, 1:42:42"} +{"current_steps": 579, "total_steps": 2069, "loss": 0.5455, "lr": 8.631683097138386e-06, "epoch": 0.27991298042059465, "percentage": 27.98, "elapsed_time": "9:59:07", "remaining_time": "1 day, 1:41:47"} +{"current_steps": 580, "total_steps": 2069, "loss": 0.5216, "lr": 8.626183955133876e-06, "epoch": 0.28039642252840224, "percentage": 28.03, "elapsed_time": "10:00:08", "remaining_time": "1 day, 1:40:42"} +{"current_steps": 581, "total_steps": 2069, "loss": 0.5483, "lr": 8.620675544310921e-06, "epoch": 0.2808798646362098, "percentage": 28.08, "elapsed_time": "10:01:11", "remaining_time": "1 day, 1:39:41"} +{"current_steps": 582, "total_steps": 2069, "loss": 0.546, "lr": 8.615157878749462e-06, "epoch": 0.2813633067440174, "percentage": 28.13, "elapsed_time": "10:02:13", "remaining_time": "1 day, 1:38:40"} +{"current_steps": 583, "total_steps": 2069, "loss": 0.5521, "lr": 8.609630972553098e-06, "epoch": 0.281846748851825, "percentage": 28.18, "elapsed_time": "10:03:12", "remaining_time": "1 day, 1:37:30"} +{"current_steps": 584, "total_steps": 2069, "loss": 0.5586, "lr": 8.604094839849047e-06, "epoch": 0.2823301909596326, "percentage": 28.23, "elapsed_time": "10:04:12", "remaining_time": "1 day, 1:36:24"} +{"current_steps": 585, "total_steps": 2069, "loss": 0.5384, "lr": 8.598549494788111e-06, "epoch": 0.2828136330674402, "percentage": 28.27, "elapsed_time": "10:05:16", "remaining_time": "1 day, 1:35:26"} +{"current_steps": 586, "total_steps": 2069, "loss": 0.5368, "lr": 8.592994951544637e-06, "epoch": 0.28329707517524777, "percentage": 28.32, "elapsed_time": "10:06:18", "remaining_time": "1 day, 1:34:23"} +{"current_steps": 587, "total_steps": 2069, "loss": 0.5475, "lr": 8.587431224316488e-06, "epoch": 0.28378051728305537, "percentage": 28.37, "elapsed_time": "10:07:18", "remaining_time": "1 day, 1:33:16"} +{"current_steps": 588, "total_steps": 2069, "loss": 0.5212, "lr": 8.581858327324996e-06, "epoch": 0.28426395939086296, "percentage": 28.42, "elapsed_time": "10:08:20", "remaining_time": "1 day, 1:32:13"} +{"current_steps": 589, "total_steps": 2069, "loss": 0.553, "lr": 8.576276274814936e-06, "epoch": 0.28474740149867056, "percentage": 28.47, "elapsed_time": "10:09:24", "remaining_time": "1 day, 1:31:15"} +{"current_steps": 590, "total_steps": 2069, "loss": 0.5216, "lr": 8.570685081054487e-06, "epoch": 0.2852308436064781, "percentage": 28.52, "elapsed_time": "10:10:29", "remaining_time": "1 day, 1:30:22"} +{"current_steps": 591, "total_steps": 2069, "loss": 0.5505, "lr": 8.565084760335188e-06, "epoch": 0.2857142857142857, "percentage": 28.56, "elapsed_time": "10:11:34", "remaining_time": "1 day, 1:29:27"} +{"current_steps": 592, "total_steps": 2069, "loss": 0.551, "lr": 8.559475326971907e-06, "epoch": 0.2861977278220933, "percentage": 28.61, "elapsed_time": "10:12:36", "remaining_time": "1 day, 1:28:25"} +{"current_steps": 593, "total_steps": 2069, "loss": 0.5421, "lr": 8.553856795302815e-06, "epoch": 0.2866811699299009, "percentage": 28.66, "elapsed_time": "10:13:37", "remaining_time": "1 day, 1:27:20"} +{"current_steps": 594, "total_steps": 2069, "loss": 0.5519, "lr": 8.548229179689325e-06, "epoch": 0.2871646120377085, "percentage": 28.71, "elapsed_time": "10:14:41", "remaining_time": "1 day, 1:26:21"} +{"current_steps": 595, "total_steps": 2069, "loss": 0.5537, "lr": 8.54259249451608e-06, "epoch": 0.2876480541455161, "percentage": 28.76, "elapsed_time": "10:15:42", "remaining_time": "1 day, 1:25:18"} +{"current_steps": 596, "total_steps": 2069, "loss": 0.564, "lr": 8.536946754190903e-06, "epoch": 0.2881314962533237, "percentage": 28.81, "elapsed_time": "10:16:47", "remaining_time": "1 day, 1:24:22"} +{"current_steps": 597, "total_steps": 2069, "loss": 0.5452, "lr": 8.531291973144755e-06, "epoch": 0.2886149383611313, "percentage": 28.85, "elapsed_time": "10:17:49", "remaining_time": "1 day, 1:23:20"} +{"current_steps": 598, "total_steps": 2069, "loss": 0.5509, "lr": 8.52562816583172e-06, "epoch": 0.2890983804689388, "percentage": 28.9, "elapsed_time": "10:18:54", "remaining_time": "1 day, 1:22:25"} +{"current_steps": 599, "total_steps": 2069, "loss": 0.5428, "lr": 8.519955346728939e-06, "epoch": 0.2895818225767464, "percentage": 28.95, "elapsed_time": "10:19:55", "remaining_time": "1 day, 1:21:20"} +{"current_steps": 600, "total_steps": 2069, "loss": 0.5205, "lr": 8.5142735303366e-06, "epoch": 0.290065264684554, "percentage": 29.0, "elapsed_time": "10:20:58", "remaining_time": "1 day, 1:20:20"} +{"current_steps": 601, "total_steps": 2069, "loss": 0.5476, "lr": 8.50858273117788e-06, "epoch": 0.2905487067923616, "percentage": 29.05, "elapsed_time": "10:22:00", "remaining_time": "1 day, 1:19:17"} +{"current_steps": 602, "total_steps": 2069, "loss": 0.545, "lr": 8.502882963798923e-06, "epoch": 0.2910321489001692, "percentage": 29.1, "elapsed_time": "10:22:59", "remaining_time": "1 day, 1:18:08"} +{"current_steps": 603, "total_steps": 2069, "loss": 0.5515, "lr": 8.497174242768792e-06, "epoch": 0.2915155910079768, "percentage": 29.14, "elapsed_time": "10:23:58", "remaining_time": "1 day, 1:16:59"} +{"current_steps": 604, "total_steps": 2069, "loss": 0.5453, "lr": 8.49145658267944e-06, "epoch": 0.2919990331157844, "percentage": 29.19, "elapsed_time": "10:24:58", "remaining_time": "1 day, 1:15:52"} +{"current_steps": 605, "total_steps": 2069, "loss": 0.5452, "lr": 8.485729998145665e-06, "epoch": 0.292482475223592, "percentage": 29.24, "elapsed_time": "10:26:01", "remaining_time": "1 day, 1:14:52"} +{"current_steps": 606, "total_steps": 2069, "loss": 0.5536, "lr": 8.479994503805079e-06, "epoch": 0.29296591733139954, "percentage": 29.29, "elapsed_time": "10:26:59", "remaining_time": "1 day, 1:13:40"} +{"current_steps": 607, "total_steps": 2069, "loss": 0.5216, "lr": 8.474250114318066e-06, "epoch": 0.29344935943920714, "percentage": 29.34, "elapsed_time": "10:28:01", "remaining_time": "1 day, 1:12:39"} +{"current_steps": 608, "total_steps": 2069, "loss": 0.5582, "lr": 8.468496844367752e-06, "epoch": 0.29393280154701473, "percentage": 29.39, "elapsed_time": "10:29:03", "remaining_time": "1 day, 1:11:35"} +{"current_steps": 609, "total_steps": 2069, "loss": 0.5511, "lr": 8.462734708659959e-06, "epoch": 0.29441624365482233, "percentage": 29.43, "elapsed_time": "10:30:06", "remaining_time": "1 day, 1:10:37"} +{"current_steps": 610, "total_steps": 2069, "loss": 0.5279, "lr": 8.456963721923166e-06, "epoch": 0.2948996857626299, "percentage": 29.48, "elapsed_time": "10:31:13", "remaining_time": "1 day, 1:09:47"} +{"current_steps": 611, "total_steps": 2069, "loss": 0.5546, "lr": 8.451183898908484e-06, "epoch": 0.2953831278704375, "percentage": 29.53, "elapsed_time": "10:32:16", "remaining_time": "1 day, 1:08:45"} +{"current_steps": 612, "total_steps": 2069, "loss": 0.5221, "lr": 8.445395254389605e-06, "epoch": 0.2958665699782451, "percentage": 29.58, "elapsed_time": "10:33:22", "remaining_time": "1 day, 1:07:54"} +{"current_steps": 613, "total_steps": 2069, "loss": 0.5489, "lr": 8.439597803162773e-06, "epoch": 0.2963500120860527, "percentage": 29.63, "elapsed_time": "10:34:21", "remaining_time": "1 day, 1:06:43"} +{"current_steps": 614, "total_steps": 2069, "loss": 0.5457, "lr": 8.433791560046737e-06, "epoch": 0.29683345419386026, "percentage": 29.68, "elapsed_time": "10:35:23", "remaining_time": "1 day, 1:05:42"} +{"current_steps": 615, "total_steps": 2069, "loss": 0.5553, "lr": 8.427976539882725e-06, "epoch": 0.29731689630166785, "percentage": 29.72, "elapsed_time": "10:36:22", "remaining_time": "1 day, 1:04:31"} +{"current_steps": 616, "total_steps": 2069, "loss": 0.5435, "lr": 8.422152757534395e-06, "epoch": 0.29780033840947545, "percentage": 29.77, "elapsed_time": "10:37:19", "remaining_time": "1 day, 1:03:18"} +{"current_steps": 617, "total_steps": 2069, "loss": 0.5526, "lr": 8.416320227887805e-06, "epoch": 0.29828378051728305, "percentage": 29.82, "elapsed_time": "10:38:15", "remaining_time": "1 day, 1:02:02"} +{"current_steps": 618, "total_steps": 2069, "loss": 0.5542, "lr": 8.410478965851371e-06, "epoch": 0.29876722262509064, "percentage": 29.87, "elapsed_time": "10:39:18", "remaining_time": "1 day, 1:01:02"} +{"current_steps": 619, "total_steps": 2069, "loss": 0.5546, "lr": 8.404628986355832e-06, "epoch": 0.29925066473289824, "percentage": 29.92, "elapsed_time": "10:40:20", "remaining_time": "1 day, 0:59:59"} +{"current_steps": 620, "total_steps": 2069, "loss": 0.5566, "lr": 8.398770304354203e-06, "epoch": 0.29973410684070584, "percentage": 29.97, "elapsed_time": "10:41:22", "remaining_time": "1 day, 0:58:58"} +{"current_steps": 621, "total_steps": 2069, "loss": 0.5508, "lr": 8.39290293482175e-06, "epoch": 0.30021754894851344, "percentage": 30.01, "elapsed_time": "10:42:26", "remaining_time": "1 day, 0:58:00"} +{"current_steps": 622, "total_steps": 2069, "loss": 0.5568, "lr": 8.387026892755942e-06, "epoch": 0.30070099105632103, "percentage": 30.06, "elapsed_time": "10:43:27", "remaining_time": "1 day, 0:56:54"} +{"current_steps": 623, "total_steps": 2069, "loss": 0.5489, "lr": 8.381142193176414e-06, "epoch": 0.3011844331641286, "percentage": 30.11, "elapsed_time": "10:44:26", "remaining_time": "1 day, 0:55:45"} +{"current_steps": 624, "total_steps": 2069, "loss": 0.5554, "lr": 8.375248851124937e-06, "epoch": 0.30166787527193617, "percentage": 30.16, "elapsed_time": "10:45:28", "remaining_time": "1 day, 0:54:43"} +{"current_steps": 625, "total_steps": 2069, "loss": 0.5466, "lr": 8.369346881665364e-06, "epoch": 0.30215131737974377, "percentage": 30.21, "elapsed_time": "10:47:34", "remaining_time": "1 day, 0:56:09"} +{"current_steps": 626, "total_steps": 2069, "loss": 0.5644, "lr": 8.363436299883604e-06, "epoch": 0.30263475948755136, "percentage": 30.26, "elapsed_time": "10:48:36", "remaining_time": "1 day, 0:55:07"} +{"current_steps": 627, "total_steps": 2069, "loss": 0.5493, "lr": 8.357517120887586e-06, "epoch": 0.30311820159535896, "percentage": 30.3, "elapsed_time": "10:49:38", "remaining_time": "1 day, 0:54:04"} +{"current_steps": 628, "total_steps": 2069, "loss": 0.5523, "lr": 8.351589359807204e-06, "epoch": 0.30360164370316656, "percentage": 30.35, "elapsed_time": "10:50:43", "remaining_time": "1 day, 0:53:07"} +{"current_steps": 629, "total_steps": 2069, "loss": 0.5348, "lr": 8.345653031794292e-06, "epoch": 0.30408508581097415, "percentage": 30.4, "elapsed_time": "10:51:45", "remaining_time": "1 day, 0:52:06"} +{"current_steps": 630, "total_steps": 2069, "loss": 0.554, "lr": 8.339708152022586e-06, "epoch": 0.30456852791878175, "percentage": 30.45, "elapsed_time": "10:52:49", "remaining_time": "1 day, 0:51:09"} +{"current_steps": 631, "total_steps": 2069, "loss": 0.5489, "lr": 8.333754735687677e-06, "epoch": 0.3050519700265893, "percentage": 30.5, "elapsed_time": "10:53:48", "remaining_time": "1 day, 0:49:59"} +{"current_steps": 632, "total_steps": 2069, "loss": 0.5508, "lr": 8.327792798006977e-06, "epoch": 0.3055354121343969, "percentage": 30.55, "elapsed_time": "10:54:52", "remaining_time": "1 day, 0:49:01"} +{"current_steps": 633, "total_steps": 2069, "loss": 0.5505, "lr": 8.321822354219677e-06, "epoch": 0.3060188542422045, "percentage": 30.59, "elapsed_time": "10:55:52", "remaining_time": "1 day, 0:47:54"} +{"current_steps": 634, "total_steps": 2069, "loss": 0.5574, "lr": 8.315843419586717e-06, "epoch": 0.3065022963500121, "percentage": 30.64, "elapsed_time": "10:56:57", "remaining_time": "1 day, 0:46:57"} +{"current_steps": 635, "total_steps": 2069, "loss": 0.5281, "lr": 8.309856009390732e-06, "epoch": 0.3069857384578197, "percentage": 30.69, "elapsed_time": "10:58:00", "remaining_time": "1 day, 0:45:56"} +{"current_steps": 636, "total_steps": 2069, "loss": 0.5607, "lr": 8.303860138936027e-06, "epoch": 0.3074691805656273, "percentage": 30.74, "elapsed_time": "10:58:57", "remaining_time": "1 day, 0:44:43"} +{"current_steps": 637, "total_steps": 2069, "loss": 0.5565, "lr": 8.297855823548528e-06, "epoch": 0.3079526226734349, "percentage": 30.79, "elapsed_time": "10:59:58", "remaining_time": "1 day, 0:43:38"} +{"current_steps": 638, "total_steps": 2069, "loss": 0.5485, "lr": 8.291843078575752e-06, "epoch": 0.30843606478124247, "percentage": 30.84, "elapsed_time": "11:00:58", "remaining_time": "1 day, 0:42:32"} +{"current_steps": 639, "total_steps": 2069, "loss": 0.5456, "lr": 8.285821919386758e-06, "epoch": 0.30891950688905, "percentage": 30.88, "elapsed_time": "11:01:56", "remaining_time": "1 day, 0:41:21"} +{"current_steps": 640, "total_steps": 2069, "loss": 0.5602, "lr": 8.279792361372114e-06, "epoch": 0.3094029489968576, "percentage": 30.93, "elapsed_time": "11:02:58", "remaining_time": "1 day, 0:40:17"} +{"current_steps": 641, "total_steps": 2069, "loss": 0.5536, "lr": 8.273754419943856e-06, "epoch": 0.3098863911046652, "percentage": 30.98, "elapsed_time": "11:04:02", "remaining_time": "1 day, 0:39:19"} +{"current_steps": 642, "total_steps": 2069, "loss": 0.5477, "lr": 8.267708110535449e-06, "epoch": 0.3103698332124728, "percentage": 31.03, "elapsed_time": "11:05:03", "remaining_time": "1 day, 0:38:15"} +{"current_steps": 643, "total_steps": 2069, "loss": 0.571, "lr": 8.26165344860175e-06, "epoch": 0.3108532753202804, "percentage": 31.08, "elapsed_time": "11:06:02", "remaining_time": "1 day, 0:37:06"} +{"current_steps": 644, "total_steps": 2069, "loss": 0.546, "lr": 8.255590449618958e-06, "epoch": 0.311336717428088, "percentage": 31.13, "elapsed_time": "11:07:03", "remaining_time": "1 day, 0:36:02"} +{"current_steps": 645, "total_steps": 2069, "loss": 0.5446, "lr": 8.24951912908459e-06, "epoch": 0.3118201595358956, "percentage": 31.17, "elapsed_time": "11:08:10", "remaining_time": "1 day, 0:35:08"} +{"current_steps": 646, "total_steps": 2069, "loss": 0.5352, "lr": 8.243439502517432e-06, "epoch": 0.3123036016437032, "percentage": 31.22, "elapsed_time": "11:09:11", "remaining_time": "1 day, 0:34:04"} +{"current_steps": 647, "total_steps": 2069, "loss": 0.5298, "lr": 8.237351585457499e-06, "epoch": 0.31278704375151073, "percentage": 31.27, "elapsed_time": "11:10:15", "remaining_time": "1 day, 0:33:06"} +{"current_steps": 648, "total_steps": 2069, "loss": 0.5387, "lr": 8.231255393465993e-06, "epoch": 0.3132704858593183, "percentage": 31.32, "elapsed_time": "11:11:15", "remaining_time": "1 day, 0:32:00"} +{"current_steps": 649, "total_steps": 2069, "loss": 0.5156, "lr": 8.225150942125278e-06, "epoch": 0.3137539279671259, "percentage": 31.37, "elapsed_time": "11:12:19", "remaining_time": "1 day, 0:31:02"} +{"current_steps": 650, "total_steps": 2069, "loss": 0.552, "lr": 8.21903824703882e-06, "epoch": 0.3142373700749335, "percentage": 31.42, "elapsed_time": "11:13:19", "remaining_time": "1 day, 0:29:55"} +{"current_steps": 651, "total_steps": 2069, "loss": 0.5498, "lr": 8.21291732383116e-06, "epoch": 0.3147208121827411, "percentage": 31.46, "elapsed_time": "11:14:19", "remaining_time": "1 day, 0:28:48"} +{"current_steps": 652, "total_steps": 2069, "loss": 0.5327, "lr": 8.206788188147874e-06, "epoch": 0.3152042542905487, "percentage": 31.51, "elapsed_time": "11:15:22", "remaining_time": "1 day, 0:27:47"} +{"current_steps": 653, "total_steps": 2069, "loss": 0.5523, "lr": 8.200650855655525e-06, "epoch": 0.3156876963983563, "percentage": 31.56, "elapsed_time": "11:16:23", "remaining_time": "1 day, 0:26:43"} +{"current_steps": 654, "total_steps": 2069, "loss": 0.5428, "lr": 8.19450534204163e-06, "epoch": 0.3161711385061639, "percentage": 31.61, "elapsed_time": "11:17:21", "remaining_time": "1 day, 0:25:32"} +{"current_steps": 655, "total_steps": 2069, "loss": 0.5511, "lr": 8.188351663014615e-06, "epoch": 0.3166545806139715, "percentage": 31.66, "elapsed_time": "11:18:21", "remaining_time": "1 day, 0:24:24"} +{"current_steps": 656, "total_steps": 2069, "loss": 0.5515, "lr": 8.182189834303783e-06, "epoch": 0.31713802272177904, "percentage": 31.71, "elapsed_time": "11:19:24", "remaining_time": "1 day, 0:23:25"} +{"current_steps": 657, "total_steps": 2069, "loss": 0.5425, "lr": 8.176019871659263e-06, "epoch": 0.31762146482958664, "percentage": 31.75, "elapsed_time": "11:20:25", "remaining_time": "1 day, 0:22:20"} +{"current_steps": 658, "total_steps": 2069, "loss": 0.5192, "lr": 8.169841790851976e-06, "epoch": 0.31810490693739424, "percentage": 31.8, "elapsed_time": "11:21:28", "remaining_time": "1 day, 0:21:20"} +{"current_steps": 659, "total_steps": 2069, "loss": 0.5516, "lr": 8.163655607673594e-06, "epoch": 0.31858834904520184, "percentage": 31.85, "elapsed_time": "11:22:33", "remaining_time": "1 day, 0:20:24"} +{"current_steps": 660, "total_steps": 2069, "loss": 0.5398, "lr": 8.157461337936506e-06, "epoch": 0.31907179115300943, "percentage": 31.9, "elapsed_time": "11:23:36", "remaining_time": "1 day, 0:19:23"} +{"current_steps": 661, "total_steps": 2069, "loss": 0.5501, "lr": 8.151258997473757e-06, "epoch": 0.31955523326081703, "percentage": 31.95, "elapsed_time": "11:24:37", "remaining_time": "1 day, 0:18:18"} +{"current_steps": 662, "total_steps": 2069, "loss": 0.5473, "lr": 8.145048602139031e-06, "epoch": 0.3200386753686246, "percentage": 32.0, "elapsed_time": "11:25:39", "remaining_time": "1 day, 0:17:17"} +{"current_steps": 663, "total_steps": 2069, "loss": 0.5481, "lr": 8.138830167806601e-06, "epoch": 0.3205221174764322, "percentage": 32.04, "elapsed_time": "11:26:40", "remaining_time": "1 day, 0:16:13"} +{"current_steps": 664, "total_steps": 2069, "loss": 0.5563, "lr": 8.132603710371287e-06, "epoch": 0.32100555958423976, "percentage": 32.09, "elapsed_time": "11:27:40", "remaining_time": "1 day, 0:15:05"} +{"current_steps": 665, "total_steps": 2069, "loss": 0.5418, "lr": 8.126369245748413e-06, "epoch": 0.32148900169204736, "percentage": 32.14, "elapsed_time": "11:28:42", "remaining_time": "1 day, 0:14:03"} +{"current_steps": 666, "total_steps": 2069, "loss": 0.549, "lr": 8.120126789873775e-06, "epoch": 0.32197244379985496, "percentage": 32.19, "elapsed_time": "11:29:42", "remaining_time": "1 day, 0:12:57"} +{"current_steps": 667, "total_steps": 2069, "loss": 0.5515, "lr": 8.113876358703593e-06, "epoch": 0.32245588590766255, "percentage": 32.24, "elapsed_time": "11:30:42", "remaining_time": "1 day, 0:11:51"} +{"current_steps": 668, "total_steps": 2069, "loss": 0.5529, "lr": 8.10761796821447e-06, "epoch": 0.32293932801547015, "percentage": 32.29, "elapsed_time": "11:31:43", "remaining_time": "1 day, 0:10:45"} +{"current_steps": 669, "total_steps": 2069, "loss": 0.5507, "lr": 8.10135163440336e-06, "epoch": 0.32342277012327775, "percentage": 32.33, "elapsed_time": "11:32:44", "remaining_time": "1 day, 0:09:41"} +{"current_steps": 670, "total_steps": 2069, "loss": 0.5363, "lr": 8.095077373287517e-06, "epoch": 0.32390621223108534, "percentage": 32.38, "elapsed_time": "11:33:44", "remaining_time": "1 day, 0:08:33"} +{"current_steps": 671, "total_steps": 2069, "loss": 0.5443, "lr": 8.088795200904457e-06, "epoch": 0.32438965433889294, "percentage": 32.43, "elapsed_time": "11:34:47", "remaining_time": "1 day, 0:07:33"} +{"current_steps": 672, "total_steps": 2069, "loss": 0.5547, "lr": 8.08250513331192e-06, "epoch": 0.3248730964467005, "percentage": 32.48, "elapsed_time": "11:35:47", "remaining_time": "1 day, 0:06:28"} +{"current_steps": 673, "total_steps": 2069, "loss": 0.552, "lr": 8.076207186587826e-06, "epoch": 0.3253565385545081, "percentage": 32.53, "elapsed_time": "11:36:46", "remaining_time": "1 day, 0:05:18"} +{"current_steps": 674, "total_steps": 2069, "loss": 0.5449, "lr": 8.069901376830232e-06, "epoch": 0.3258399806623157, "percentage": 32.58, "elapsed_time": "11:37:50", "remaining_time": "1 day, 0:04:20"} +{"current_steps": 675, "total_steps": 2069, "loss": 0.5544, "lr": 8.063587720157298e-06, "epoch": 0.3263234227701233, "percentage": 32.62, "elapsed_time": "11:38:50", "remaining_time": "1 day, 0:03:15"} +{"current_steps": 676, "total_steps": 2069, "loss": 0.5388, "lr": 8.057266232707239e-06, "epoch": 0.32680686487793087, "percentage": 32.67, "elapsed_time": "11:39:56", "remaining_time": "1 day, 0:02:19"} +{"current_steps": 677, "total_steps": 2069, "loss": 0.5523, "lr": 8.050936930638285e-06, "epoch": 0.32729030698573847, "percentage": 32.72, "elapsed_time": "11:40:59", "remaining_time": "1 day, 0:01:20"} +{"current_steps": 678, "total_steps": 2069, "loss": 0.5498, "lr": 8.044599830128643e-06, "epoch": 0.32777374909354606, "percentage": 32.77, "elapsed_time": "11:42:03", "remaining_time": "1 day, 0:00:22"} +{"current_steps": 679, "total_steps": 2069, "loss": 0.5378, "lr": 8.038254947376454e-06, "epoch": 0.32825719120135366, "percentage": 32.82, "elapsed_time": "11:43:08", "remaining_time": "23:59:25"} +{"current_steps": 680, "total_steps": 2069, "loss": 0.5541, "lr": 8.03190229859975e-06, "epoch": 0.3287406333091612, "percentage": 32.87, "elapsed_time": "11:44:13", "remaining_time": "23:58:28"} +{"current_steps": 681, "total_steps": 2069, "loss": 0.5505, "lr": 8.02554190003641e-06, "epoch": 0.3292240754169688, "percentage": 32.91, "elapsed_time": "11:45:14", "remaining_time": "23:57:24"} +{"current_steps": 682, "total_steps": 2069, "loss": 0.5563, "lr": 8.019173767944128e-06, "epoch": 0.3297075175247764, "percentage": 32.96, "elapsed_time": "11:46:15", "remaining_time": "23:56:21"} +{"current_steps": 683, "total_steps": 2069, "loss": 0.5241, "lr": 8.012797918600363e-06, "epoch": 0.330190959632584, "percentage": 33.01, "elapsed_time": "11:47:22", "remaining_time": "23:55:28"} +{"current_steps": 684, "total_steps": 2069, "loss": 0.5251, "lr": 8.006414368302297e-06, "epoch": 0.3306744017403916, "percentage": 33.06, "elapsed_time": "11:48:26", "remaining_time": "23:54:28"} +{"current_steps": 685, "total_steps": 2069, "loss": 0.5449, "lr": 8.000023133366804e-06, "epoch": 0.3311578438481992, "percentage": 33.11, "elapsed_time": "11:49:27", "remaining_time": "23:53:25"} +{"current_steps": 686, "total_steps": 2069, "loss": 0.5401, "lr": 7.99362423013039e-06, "epoch": 0.3316412859560068, "percentage": 33.16, "elapsed_time": "11:50:28", "remaining_time": "23:52:21"} +{"current_steps": 687, "total_steps": 2069, "loss": 0.5381, "lr": 7.98721767494917e-06, "epoch": 0.3321247280638144, "percentage": 33.2, "elapsed_time": "11:51:31", "remaining_time": "23:51:20"} +{"current_steps": 688, "total_steps": 2069, "loss": 0.5542, "lr": 7.980803484198817e-06, "epoch": 0.332608170171622, "percentage": 33.25, "elapsed_time": "11:52:33", "remaining_time": "23:50:17"} +{"current_steps": 689, "total_steps": 2069, "loss": 0.5394, "lr": 7.974381674274517e-06, "epoch": 0.3330916122794295, "percentage": 33.3, "elapsed_time": "11:53:31", "remaining_time": "23:49:06"} +{"current_steps": 690, "total_steps": 2069, "loss": 0.5478, "lr": 7.967952261590936e-06, "epoch": 0.3335750543872371, "percentage": 33.35, "elapsed_time": "11:54:35", "remaining_time": "23:48:08"} +{"current_steps": 691, "total_steps": 2069, "loss": 0.5387, "lr": 7.961515262582168e-06, "epoch": 0.3340584964950447, "percentage": 33.4, "elapsed_time": "11:55:37", "remaining_time": "23:47:07"} +{"current_steps": 692, "total_steps": 2069, "loss": 0.5488, "lr": 7.955070693701704e-06, "epoch": 0.3345419386028523, "percentage": 33.45, "elapsed_time": "11:56:39", "remaining_time": "23:46:04"} +{"current_steps": 693, "total_steps": 2069, "loss": 0.5161, "lr": 7.94861857142238e-06, "epoch": 0.3350253807106599, "percentage": 33.49, "elapsed_time": "11:57:45", "remaining_time": "23:45:09"} +{"current_steps": 694, "total_steps": 2069, "loss": 0.5504, "lr": 7.942158912236339e-06, "epoch": 0.3355088228184675, "percentage": 33.54, "elapsed_time": "11:58:40", "remaining_time": "23:43:52"} +{"current_steps": 695, "total_steps": 2069, "loss": 0.5525, "lr": 7.935691732654995e-06, "epoch": 0.3359922649262751, "percentage": 33.59, "elapsed_time": "11:59:43", "remaining_time": "23:42:52"} +{"current_steps": 696, "total_steps": 2069, "loss": 0.5549, "lr": 7.929217049208977e-06, "epoch": 0.3364757070340827, "percentage": 33.64, "elapsed_time": "12:00:41", "remaining_time": "23:41:42"} +{"current_steps": 697, "total_steps": 2069, "loss": 0.5543, "lr": 7.922734878448099e-06, "epoch": 0.33695914914189024, "percentage": 33.69, "elapsed_time": "12:01:38", "remaining_time": "23:40:30"} +{"current_steps": 698, "total_steps": 2069, "loss": 0.5456, "lr": 7.916245236941311e-06, "epoch": 0.33744259124969783, "percentage": 33.74, "elapsed_time": "12:02:40", "remaining_time": "23:39:28"} +{"current_steps": 699, "total_steps": 2069, "loss": 0.5436, "lr": 7.90974814127666e-06, "epoch": 0.33792603335750543, "percentage": 33.78, "elapsed_time": "12:03:38", "remaining_time": "23:38:16"} +{"current_steps": 700, "total_steps": 2069, "loss": 0.5569, "lr": 7.903243608061246e-06, "epoch": 0.338409475465313, "percentage": 33.83, "elapsed_time": "12:04:39", "remaining_time": "23:37:14"} +{"current_steps": 701, "total_steps": 2069, "loss": 0.5497, "lr": 7.89673165392118e-06, "epoch": 0.3388929175731206, "percentage": 33.88, "elapsed_time": "12:05:46", "remaining_time": "23:36:20"} +{"current_steps": 702, "total_steps": 2069, "loss": 0.5489, "lr": 7.890212295501542e-06, "epoch": 0.3393763596809282, "percentage": 33.93, "elapsed_time": "12:06:48", "remaining_time": "23:35:17"} +{"current_steps": 703, "total_steps": 2069, "loss": 0.5438, "lr": 7.883685549466337e-06, "epoch": 0.3398598017887358, "percentage": 33.98, "elapsed_time": "12:07:51", "remaining_time": "23:34:17"} +{"current_steps": 704, "total_steps": 2069, "loss": 0.5506, "lr": 7.877151432498456e-06, "epoch": 0.3403432438965434, "percentage": 34.03, "elapsed_time": "12:08:53", "remaining_time": "23:33:16"} +{"current_steps": 705, "total_steps": 2069, "loss": 0.536, "lr": 7.870609961299627e-06, "epoch": 0.34082668600435095, "percentage": 34.07, "elapsed_time": "12:09:56", "remaining_time": "23:32:15"} +{"current_steps": 706, "total_steps": 2069, "loss": 0.5539, "lr": 7.864061152590376e-06, "epoch": 0.34131012811215855, "percentage": 34.12, "elapsed_time": "12:10:56", "remaining_time": "23:31:09"} +{"current_steps": 707, "total_steps": 2069, "loss": 0.5461, "lr": 7.857505023109989e-06, "epoch": 0.34179357021996615, "percentage": 34.17, "elapsed_time": "12:11:58", "remaining_time": "23:30:06"} +{"current_steps": 708, "total_steps": 2069, "loss": 0.5371, "lr": 7.850941589616458e-06, "epoch": 0.34227701232777374, "percentage": 34.22, "elapsed_time": "12:13:05", "remaining_time": "23:29:13"} +{"current_steps": 709, "total_steps": 2069, "loss": 0.5557, "lr": 7.844370868886452e-06, "epoch": 0.34276045443558134, "percentage": 34.27, "elapsed_time": "12:14:07", "remaining_time": "23:28:12"} +{"current_steps": 710, "total_steps": 2069, "loss": 0.5459, "lr": 7.83779287771526e-06, "epoch": 0.34324389654338894, "percentage": 34.32, "elapsed_time": "12:15:11", "remaining_time": "23:27:12"} +{"current_steps": 711, "total_steps": 2069, "loss": 0.5466, "lr": 7.831207632916757e-06, "epoch": 0.34372733865119653, "percentage": 34.36, "elapsed_time": "12:16:12", "remaining_time": "23:26:07"} +{"current_steps": 712, "total_steps": 2069, "loss": 0.519, "lr": 7.824615151323363e-06, "epoch": 0.34421078075900413, "percentage": 34.41, "elapsed_time": "12:17:14", "remaining_time": "23:25:07"} +{"current_steps": 713, "total_steps": 2069, "loss": 0.5485, "lr": 7.818015449785987e-06, "epoch": 0.3446942228668117, "percentage": 34.46, "elapsed_time": "12:18:16", "remaining_time": "23:24:03"} +{"current_steps": 714, "total_steps": 2069, "loss": 0.5453, "lr": 7.811408545174001e-06, "epoch": 0.34517766497461927, "percentage": 34.51, "elapsed_time": "12:19:15", "remaining_time": "23:22:56"} +{"current_steps": 715, "total_steps": 2069, "loss": 0.5504, "lr": 7.804794454375189e-06, "epoch": 0.34566110708242687, "percentage": 34.56, "elapsed_time": "12:20:14", "remaining_time": "23:21:48"} +{"current_steps": 716, "total_steps": 2069, "loss": 0.5425, "lr": 7.798173194295693e-06, "epoch": 0.34614454919023446, "percentage": 34.61, "elapsed_time": "12:21:14", "remaining_time": "23:20:41"} +{"current_steps": 717, "total_steps": 2069, "loss": 0.5402, "lr": 7.791544781859993e-06, "epoch": 0.34662799129804206, "percentage": 34.65, "elapsed_time": "12:22:18", "remaining_time": "23:19:44"} +{"current_steps": 718, "total_steps": 2069, "loss": 0.5448, "lr": 7.784909234010843e-06, "epoch": 0.34711143340584966, "percentage": 34.7, "elapsed_time": "12:23:21", "remaining_time": "23:18:43"} +{"current_steps": 719, "total_steps": 2069, "loss": 0.5532, "lr": 7.778266567709239e-06, "epoch": 0.34759487551365725, "percentage": 34.75, "elapsed_time": "12:24:23", "remaining_time": "23:17:41"} +{"current_steps": 720, "total_steps": 2069, "loss": 0.5403, "lr": 7.771616799934372e-06, "epoch": 0.34807831762146485, "percentage": 34.8, "elapsed_time": "12:25:24", "remaining_time": "23:16:35"} +{"current_steps": 721, "total_steps": 2069, "loss": 0.5484, "lr": 7.764959947683581e-06, "epoch": 0.34856175972927245, "percentage": 34.85, "elapsed_time": "12:26:22", "remaining_time": "23:15:26"} +{"current_steps": 722, "total_steps": 2069, "loss": 0.5367, "lr": 7.758296027972324e-06, "epoch": 0.34904520183708, "percentage": 34.9, "elapsed_time": "12:27:25", "remaining_time": "23:14:26"} +{"current_steps": 723, "total_steps": 2069, "loss": 0.5221, "lr": 7.751625057834107e-06, "epoch": 0.3495286439448876, "percentage": 34.94, "elapsed_time": "12:28:32", "remaining_time": "23:13:32"} +{"current_steps": 724, "total_steps": 2069, "loss": 0.552, "lr": 7.744947054320475e-06, "epoch": 0.3500120860526952, "percentage": 34.99, "elapsed_time": "12:29:29", "remaining_time": "23:12:21"} +{"current_steps": 725, "total_steps": 2069, "loss": 0.5516, "lr": 7.73826203450094e-06, "epoch": 0.3504955281605028, "percentage": 35.04, "elapsed_time": "12:30:27", "remaining_time": "23:11:11"} +{"current_steps": 726, "total_steps": 2069, "loss": 0.5385, "lr": 7.731570015462953e-06, "epoch": 0.3509789702683104, "percentage": 35.09, "elapsed_time": "12:31:25", "remaining_time": "23:10:02"} +{"current_steps": 727, "total_steps": 2069, "loss": 0.5512, "lr": 7.724871014311853e-06, "epoch": 0.35146241237611797, "percentage": 35.14, "elapsed_time": "12:32:26", "remaining_time": "23:08:57"} +{"current_steps": 728, "total_steps": 2069, "loss": 0.5436, "lr": 7.718165048170827e-06, "epoch": 0.35194585448392557, "percentage": 35.19, "elapsed_time": "12:33:26", "remaining_time": "23:07:51"} +{"current_steps": 729, "total_steps": 2069, "loss": 0.5439, "lr": 7.711452134180865e-06, "epoch": 0.35242929659173317, "percentage": 35.23, "elapsed_time": "12:34:29", "remaining_time": "23:06:51"} +{"current_steps": 730, "total_steps": 2069, "loss": 0.535, "lr": 7.704732289500717e-06, "epoch": 0.3529127386995407, "percentage": 35.28, "elapsed_time": "12:35:30", "remaining_time": "23:05:47"} +{"current_steps": 731, "total_steps": 2069, "loss": 0.5438, "lr": 7.698005531306844e-06, "epoch": 0.3533961808073483, "percentage": 35.33, "elapsed_time": "12:36:30", "remaining_time": "23:04:41"} +{"current_steps": 732, "total_steps": 2069, "loss": 0.5412, "lr": 7.691271876793387e-06, "epoch": 0.3538796229151559, "percentage": 35.38, "elapsed_time": "12:37:32", "remaining_time": "23:03:38"} +{"current_steps": 733, "total_steps": 2069, "loss": 0.5326, "lr": 7.684531343172108e-06, "epoch": 0.3543630650229635, "percentage": 35.43, "elapsed_time": "12:38:37", "remaining_time": "23:02:42"} +{"current_steps": 734, "total_steps": 2069, "loss": 0.5352, "lr": 7.677783947672352e-06, "epoch": 0.3548465071307711, "percentage": 35.48, "elapsed_time": "12:39:42", "remaining_time": "23:01:44"} +{"current_steps": 735, "total_steps": 2069, "loss": 0.5083, "lr": 7.67102970754101e-06, "epoch": 0.3553299492385787, "percentage": 35.52, "elapsed_time": "12:40:45", "remaining_time": "23:00:45"} +{"current_steps": 736, "total_steps": 2069, "loss": 0.5493, "lr": 7.664268640042459e-06, "epoch": 0.3558133913463863, "percentage": 35.57, "elapsed_time": "12:41:46", "remaining_time": "22:59:41"} +{"current_steps": 737, "total_steps": 2069, "loss": 0.5415, "lr": 7.657500762458536e-06, "epoch": 0.3562968334541939, "percentage": 35.62, "elapsed_time": "12:42:50", "remaining_time": "22:58:43"} +{"current_steps": 738, "total_steps": 2069, "loss": 0.5402, "lr": 7.65072609208848e-06, "epoch": 0.3567802755620014, "percentage": 35.67, "elapsed_time": "12:43:53", "remaining_time": "22:57:42"} +{"current_steps": 739, "total_steps": 2069, "loss": 0.5523, "lr": 7.643944646248898e-06, "epoch": 0.357263717669809, "percentage": 35.72, "elapsed_time": "12:44:56", "remaining_time": "22:56:41"} +{"current_steps": 740, "total_steps": 2069, "loss": 0.5472, "lr": 7.637156442273705e-06, "epoch": 0.3577471597776166, "percentage": 35.77, "elapsed_time": "12:45:59", "remaining_time": "22:55:40"} +{"current_steps": 741, "total_steps": 2069, "loss": 0.5409, "lr": 7.630361497514104e-06, "epoch": 0.3582306018854242, "percentage": 35.81, "elapsed_time": "12:46:56", "remaining_time": "22:54:29"} +{"current_steps": 742, "total_steps": 2069, "loss": 0.548, "lr": 7.6235598293385184e-06, "epoch": 0.3587140439932318, "percentage": 35.86, "elapsed_time": "12:47:57", "remaining_time": "22:53:24"} +{"current_steps": 743, "total_steps": 2069, "loss": 0.5061, "lr": 7.616751455132561e-06, "epoch": 0.3591974861010394, "percentage": 35.91, "elapsed_time": "12:49:01", "remaining_time": "22:52:26"} +{"current_steps": 744, "total_steps": 2069, "loss": 0.5408, "lr": 7.6099363922989845e-06, "epoch": 0.359680928208847, "percentage": 35.96, "elapsed_time": "12:50:02", "remaining_time": "22:51:22"} +{"current_steps": 745, "total_steps": 2069, "loss": 0.5419, "lr": 7.60311465825764e-06, "epoch": 0.3601643703166546, "percentage": 36.01, "elapsed_time": "12:51:02", "remaining_time": "22:50:17"} +{"current_steps": 746, "total_steps": 2069, "loss": 0.5474, "lr": 7.596286270445429e-06, "epoch": 0.36064781242446214, "percentage": 36.06, "elapsed_time": "12:52:03", "remaining_time": "22:49:12"} +{"current_steps": 747, "total_steps": 2069, "loss": 0.5481, "lr": 7.5894512463162595e-06, "epoch": 0.36113125453226974, "percentage": 36.1, "elapsed_time": "12:53:09", "remaining_time": "22:48:18"} +{"current_steps": 748, "total_steps": 2069, "loss": 0.5483, "lr": 7.5826096033410056e-06, "epoch": 0.36161469664007734, "percentage": 36.15, "elapsed_time": "12:54:12", "remaining_time": "22:47:17"} +{"current_steps": 749, "total_steps": 2069, "loss": 0.5375, "lr": 7.575761359007459e-06, "epoch": 0.36209813874788493, "percentage": 36.2, "elapsed_time": "12:55:14", "remaining_time": "22:46:14"} +{"current_steps": 750, "total_steps": 2069, "loss": 0.5406, "lr": 7.568906530820281e-06, "epoch": 0.36258158085569253, "percentage": 36.25, "elapsed_time": "12:56:16", "remaining_time": "22:45:12"} +{"current_steps": 751, "total_steps": 2069, "loss": 0.547, "lr": 7.562045136300969e-06, "epoch": 0.36306502296350013, "percentage": 36.3, "elapsed_time": "12:57:15", "remaining_time": "22:44:05"} +{"current_steps": 752, "total_steps": 2069, "loss": 0.5372, "lr": 7.555177192987797e-06, "epoch": 0.3635484650713077, "percentage": 36.35, "elapsed_time": "12:58:20", "remaining_time": "22:43:07"} +{"current_steps": 753, "total_steps": 2069, "loss": 0.5484, "lr": 7.5483027184357825e-06, "epoch": 0.3640319071791153, "percentage": 36.39, "elapsed_time": "12:59:21", "remaining_time": "22:42:03"} +{"current_steps": 754, "total_steps": 2069, "loss": 0.4914, "lr": 7.541421730216638e-06, "epoch": 0.3645153492869229, "percentage": 36.44, "elapsed_time": "13:00:31", "remaining_time": "22:41:15"} +{"current_steps": 755, "total_steps": 2069, "loss": 0.5362, "lr": 7.534534245918723e-06, "epoch": 0.36499879139473046, "percentage": 36.49, "elapsed_time": "13:01:31", "remaining_time": "22:40:10"} +{"current_steps": 756, "total_steps": 2069, "loss": 0.5387, "lr": 7.527640283147003e-06, "epoch": 0.36548223350253806, "percentage": 36.54, "elapsed_time": "13:02:33", "remaining_time": "22:39:07"} +{"current_steps": 757, "total_steps": 2069, "loss": 0.5334, "lr": 7.520739859523001e-06, "epoch": 0.36596567561034565, "percentage": 36.59, "elapsed_time": "13:03:33", "remaining_time": "22:38:02"} +{"current_steps": 758, "total_steps": 2069, "loss": 0.5423, "lr": 7.513832992684758e-06, "epoch": 0.36644911771815325, "percentage": 36.64, "elapsed_time": "13:04:36", "remaining_time": "22:37:00"} +{"current_steps": 759, "total_steps": 2069, "loss": 0.5371, "lr": 7.50691970028678e-06, "epoch": 0.36693255982596085, "percentage": 36.68, "elapsed_time": "13:05:36", "remaining_time": "22:35:56"} +{"current_steps": 760, "total_steps": 2069, "loss": 0.5602, "lr": 7.500000000000001e-06, "epoch": 0.36741600193376844, "percentage": 36.73, "elapsed_time": "13:06:34", "remaining_time": "22:34:46"} +{"current_steps": 761, "total_steps": 2069, "loss": 0.5399, "lr": 7.493073909511732e-06, "epoch": 0.36789944404157604, "percentage": 36.78, "elapsed_time": "13:07:38", "remaining_time": "22:33:47"} +{"current_steps": 762, "total_steps": 2069, "loss": 0.5465, "lr": 7.486141446525619e-06, "epoch": 0.36838288614938364, "percentage": 36.83, "elapsed_time": "13:08:38", "remaining_time": "22:32:42"} +{"current_steps": 763, "total_steps": 2069, "loss": 0.5412, "lr": 7.479202628761597e-06, "epoch": 0.3688663282571912, "percentage": 36.88, "elapsed_time": "13:09:36", "remaining_time": "22:31:33"} +{"current_steps": 764, "total_steps": 2069, "loss": 0.5429, "lr": 7.472257473955841e-06, "epoch": 0.3693497703649988, "percentage": 36.93, "elapsed_time": "13:10:37", "remaining_time": "22:30:28"} +{"current_steps": 765, "total_steps": 2069, "loss": 0.5358, "lr": 7.465305999860728e-06, "epoch": 0.36983321247280637, "percentage": 36.97, "elapsed_time": "13:11:37", "remaining_time": "22:29:23"} +{"current_steps": 766, "total_steps": 2069, "loss": 0.528, "lr": 7.4583482242447856e-06, "epoch": 0.37031665458061397, "percentage": 37.02, "elapsed_time": "13:12:40", "remaining_time": "22:28:23"} +{"current_steps": 767, "total_steps": 2069, "loss": 0.5466, "lr": 7.45138416489265e-06, "epoch": 0.37080009668842157, "percentage": 37.07, "elapsed_time": "13:13:43", "remaining_time": "22:27:21"} +{"current_steps": 768, "total_steps": 2069, "loss": 0.5315, "lr": 7.444413839605017e-06, "epoch": 0.37128353879622916, "percentage": 37.12, "elapsed_time": "13:14:44", "remaining_time": "22:26:18"} +{"current_steps": 769, "total_steps": 2069, "loss": 0.5443, "lr": 7.437437266198602e-06, "epoch": 0.37176698090403676, "percentage": 37.17, "elapsed_time": "13:15:46", "remaining_time": "22:25:15"} +{"current_steps": 770, "total_steps": 2069, "loss": 0.5417, "lr": 7.430454462506085e-06, "epoch": 0.37225042301184436, "percentage": 37.22, "elapsed_time": "13:16:47", "remaining_time": "22:24:12"} +{"current_steps": 771, "total_steps": 2069, "loss": 0.5389, "lr": 7.423465446376079e-06, "epoch": 0.3727338651196519, "percentage": 37.26, "elapsed_time": "13:17:50", "remaining_time": "22:23:11"} +{"current_steps": 772, "total_steps": 2069, "loss": 0.538, "lr": 7.416470235673069e-06, "epoch": 0.3732173072274595, "percentage": 37.31, "elapsed_time": "13:18:53", "remaining_time": "22:22:11"} +{"current_steps": 773, "total_steps": 2069, "loss": 0.5293, "lr": 7.40946884827738e-06, "epoch": 0.3737007493352671, "percentage": 37.36, "elapsed_time": "13:19:56", "remaining_time": "22:21:10"} +{"current_steps": 774, "total_steps": 2069, "loss": 0.5402, "lr": 7.402461302085121e-06, "epoch": 0.3741841914430747, "percentage": 37.41, "elapsed_time": "13:20:59", "remaining_time": "22:20:09"} +{"current_steps": 775, "total_steps": 2069, "loss": 0.5377, "lr": 7.395447615008147e-06, "epoch": 0.3746676335508823, "percentage": 37.46, "elapsed_time": "13:22:03", "remaining_time": "22:19:11"} +{"current_steps": 776, "total_steps": 2069, "loss": 0.5455, "lr": 7.388427804974003e-06, "epoch": 0.3751510756586899, "percentage": 37.51, "elapsed_time": "13:23:06", "remaining_time": "22:18:10"} +{"current_steps": 777, "total_steps": 2069, "loss": 0.5311, "lr": 7.381401889925894e-06, "epoch": 0.3756345177664975, "percentage": 37.55, "elapsed_time": "13:24:08", "remaining_time": "22:17:08"} +{"current_steps": 778, "total_steps": 2069, "loss": 0.5416, "lr": 7.374369887822623e-06, "epoch": 0.3761179598743051, "percentage": 37.6, "elapsed_time": "13:25:09", "remaining_time": "22:16:03"} +{"current_steps": 779, "total_steps": 2069, "loss": 0.5464, "lr": 7.367331816638554e-06, "epoch": 0.3766014019821126, "percentage": 37.65, "elapsed_time": "13:26:12", "remaining_time": "22:15:03"} +{"current_steps": 780, "total_steps": 2069, "loss": 0.5415, "lr": 7.360287694363566e-06, "epoch": 0.3770848440899202, "percentage": 37.7, "elapsed_time": "13:27:15", "remaining_time": "22:14:02"} +{"current_steps": 781, "total_steps": 2069, "loss": 0.5388, "lr": 7.353237539002999e-06, "epoch": 0.3775682861977278, "percentage": 37.75, "elapsed_time": "13:28:13", "remaining_time": "22:12:53"} +{"current_steps": 782, "total_steps": 2069, "loss": 0.5513, "lr": 7.346181368577624e-06, "epoch": 0.3780517283055354, "percentage": 37.8, "elapsed_time": "13:29:15", "remaining_time": "22:11:51"} +{"current_steps": 783, "total_steps": 2069, "loss": 0.5393, "lr": 7.3391192011235764e-06, "epoch": 0.378535170413343, "percentage": 37.84, "elapsed_time": "13:30:14", "remaining_time": "22:10:45"} +{"current_steps": 784, "total_steps": 2069, "loss": 0.5509, "lr": 7.3320510546923285e-06, "epoch": 0.3790186125211506, "percentage": 37.89, "elapsed_time": "13:31:16", "remaining_time": "22:09:41"} +{"current_steps": 785, "total_steps": 2069, "loss": 0.5387, "lr": 7.324976947350631e-06, "epoch": 0.3795020546289582, "percentage": 37.94, "elapsed_time": "13:32:19", "remaining_time": "22:08:42"} +{"current_steps": 786, "total_steps": 2069, "loss": 0.5298, "lr": 7.317896897180472e-06, "epoch": 0.3799854967367658, "percentage": 37.99, "elapsed_time": "13:33:22", "remaining_time": "22:07:40"} +{"current_steps": 787, "total_steps": 2069, "loss": 0.5371, "lr": 7.31081092227903e-06, "epoch": 0.3804689388445734, "percentage": 38.04, "elapsed_time": "13:34:24", "remaining_time": "22:06:38"} +{"current_steps": 788, "total_steps": 2069, "loss": 0.5368, "lr": 7.303719040758631e-06, "epoch": 0.38095238095238093, "percentage": 38.09, "elapsed_time": "13:35:24", "remaining_time": "22:05:33"} +{"current_steps": 789, "total_steps": 2069, "loss": 0.5439, "lr": 7.296621270746691e-06, "epoch": 0.38143582306018853, "percentage": 38.13, "elapsed_time": "13:36:26", "remaining_time": "22:04:31"} +{"current_steps": 790, "total_steps": 2069, "loss": 0.5188, "lr": 7.289517630385687e-06, "epoch": 0.3819192651679961, "percentage": 38.18, "elapsed_time": "13:37:25", "remaining_time": "22:03:23"} +{"current_steps": 791, "total_steps": 2069, "loss": 0.5404, "lr": 7.282408137833093e-06, "epoch": 0.3824027072758037, "percentage": 38.23, "elapsed_time": "13:38:25", "remaining_time": "22:02:19"} +{"current_steps": 792, "total_steps": 2069, "loss": 0.5377, "lr": 7.275292811261346e-06, "epoch": 0.3828861493836113, "percentage": 38.28, "elapsed_time": "13:39:26", "remaining_time": "22:01:14"} +{"current_steps": 793, "total_steps": 2069, "loss": 0.5489, "lr": 7.268171668857794e-06, "epoch": 0.3833695914914189, "percentage": 38.33, "elapsed_time": "13:40:32", "remaining_time": "22:00:19"} +{"current_steps": 794, "total_steps": 2069, "loss": 0.5415, "lr": 7.261044728824652e-06, "epoch": 0.3838530335992265, "percentage": 38.38, "elapsed_time": "13:41:34", "remaining_time": "21:59:17"} +{"current_steps": 795, "total_steps": 2069, "loss": 0.5526, "lr": 7.253912009378953e-06, "epoch": 0.3843364757070341, "percentage": 38.42, "elapsed_time": "13:42:36", "remaining_time": "21:58:15"} +{"current_steps": 796, "total_steps": 2069, "loss": 0.5452, "lr": 7.246773528752501e-06, "epoch": 0.38481991781484165, "percentage": 38.47, "elapsed_time": "13:43:40", "remaining_time": "21:57:15"} +{"current_steps": 797, "total_steps": 2069, "loss": 0.528, "lr": 7.239629305191828e-06, "epoch": 0.38530335992264925, "percentage": 38.52, "elapsed_time": "13:44:41", "remaining_time": "21:56:12"} +{"current_steps": 798, "total_steps": 2069, "loss": 0.5413, "lr": 7.2324793569581474e-06, "epoch": 0.38578680203045684, "percentage": 38.57, "elapsed_time": "13:45:44", "remaining_time": "21:55:10"} +{"current_steps": 799, "total_steps": 2069, "loss": 0.5111, "lr": 7.2253237023273e-06, "epoch": 0.38627024413826444, "percentage": 38.62, "elapsed_time": "13:46:48", "remaining_time": "21:54:12"} +{"current_steps": 800, "total_steps": 2069, "loss": 0.5472, "lr": 7.21816235958972e-06, "epoch": 0.38675368624607204, "percentage": 38.67, "elapsed_time": "13:47:46", "remaining_time": "21:53:03"} +{"current_steps": 801, "total_steps": 2069, "loss": 0.5441, "lr": 7.210995347050372e-06, "epoch": 0.38723712835387963, "percentage": 38.71, "elapsed_time": "13:48:51", "remaining_time": "21:52:05"} +{"current_steps": 802, "total_steps": 2069, "loss": 0.5403, "lr": 7.203822683028721e-06, "epoch": 0.38772057046168723, "percentage": 38.76, "elapsed_time": "13:49:53", "remaining_time": "21:51:03"} +{"current_steps": 803, "total_steps": 2069, "loss": 0.5303, "lr": 7.196644385858673e-06, "epoch": 0.3882040125694948, "percentage": 38.81, "elapsed_time": "13:50:53", "remaining_time": "21:49:58"} +{"current_steps": 804, "total_steps": 2069, "loss": 0.5453, "lr": 7.189460473888535e-06, "epoch": 0.38868745467730237, "percentage": 38.86, "elapsed_time": "13:51:52", "remaining_time": "21:48:51"} +{"current_steps": 805, "total_steps": 2069, "loss": 0.5491, "lr": 7.182270965480963e-06, "epoch": 0.38917089678510997, "percentage": 38.91, "elapsed_time": "13:52:57", "remaining_time": "21:47:53"} +{"current_steps": 806, "total_steps": 2069, "loss": 0.5328, "lr": 7.17507587901292e-06, "epoch": 0.38965433889291756, "percentage": 38.96, "elapsed_time": "13:54:01", "remaining_time": "21:46:54"} +{"current_steps": 807, "total_steps": 2069, "loss": 0.5401, "lr": 7.167875232875632e-06, "epoch": 0.39013778100072516, "percentage": 39.0, "elapsed_time": "13:54:59", "remaining_time": "21:45:45"} +{"current_steps": 808, "total_steps": 2069, "loss": 0.5198, "lr": 7.160669045474524e-06, "epoch": 0.39062122310853276, "percentage": 39.05, "elapsed_time": "13:56:06", "remaining_time": "21:44:51"} +{"current_steps": 809, "total_steps": 2069, "loss": 0.5396, "lr": 7.153457335229196e-06, "epoch": 0.39110466521634035, "percentage": 39.1, "elapsed_time": "13:57:09", "remaining_time": "21:43:51"} +{"current_steps": 810, "total_steps": 2069, "loss": 0.5421, "lr": 7.146240120573358e-06, "epoch": 0.39158810732414795, "percentage": 39.15, "elapsed_time": "13:58:11", "remaining_time": "21:42:49"} +{"current_steps": 811, "total_steps": 2069, "loss": 0.5495, "lr": 7.1390174199547945e-06, "epoch": 0.39207154943195555, "percentage": 39.2, "elapsed_time": "13:59:07", "remaining_time": "21:41:37"} +{"current_steps": 812, "total_steps": 2069, "loss": 0.5528, "lr": 7.131789251835309e-06, "epoch": 0.3925549915397631, "percentage": 39.25, "elapsed_time": "14:00:10", "remaining_time": "21:40:37"} +{"current_steps": 813, "total_steps": 2069, "loss": 0.546, "lr": 7.124555634690684e-06, "epoch": 0.3930384336475707, "percentage": 39.29, "elapsed_time": "14:01:13", "remaining_time": "21:39:36"} +{"current_steps": 814, "total_steps": 2069, "loss": 0.5164, "lr": 7.117316587010625e-06, "epoch": 0.3935218757553783, "percentage": 39.34, "elapsed_time": "14:02:19", "remaining_time": "21:38:39"} +{"current_steps": 815, "total_steps": 2069, "loss": 0.5405, "lr": 7.110072127298722e-06, "epoch": 0.3940053178631859, "percentage": 39.39, "elapsed_time": "14:03:20", "remaining_time": "21:37:37"} +{"current_steps": 816, "total_steps": 2069, "loss": 0.5474, "lr": 7.1028222740724e-06, "epoch": 0.3944887599709935, "percentage": 39.44, "elapsed_time": "14:04:27", "remaining_time": "21:36:41"} +{"current_steps": 817, "total_steps": 2069, "loss": 0.537, "lr": 7.095567045862867e-06, "epoch": 0.39497220207880107, "percentage": 39.49, "elapsed_time": "14:05:28", "remaining_time": "21:35:37"} +{"current_steps": 818, "total_steps": 2069, "loss": 0.5418, "lr": 7.0883064612150684e-06, "epoch": 0.39545564418660867, "percentage": 39.54, "elapsed_time": "14:06:31", "remaining_time": "21:34:37"} +{"current_steps": 819, "total_steps": 2069, "loss": 0.5421, "lr": 7.081040538687649e-06, "epoch": 0.39593908629441626, "percentage": 39.58, "elapsed_time": "14:07:33", "remaining_time": "21:33:35"} +{"current_steps": 820, "total_steps": 2069, "loss": 0.5322, "lr": 7.073769296852888e-06, "epoch": 0.3964225284022238, "percentage": 39.63, "elapsed_time": "14:08:32", "remaining_time": "21:32:29"} +{"current_steps": 821, "total_steps": 2069, "loss": 0.5021, "lr": 7.066492754296668e-06, "epoch": 0.3969059705100314, "percentage": 39.68, "elapsed_time": "14:09:35", "remaining_time": "21:31:28"} +{"current_steps": 822, "total_steps": 2069, "loss": 0.5449, "lr": 7.059210929618416e-06, "epoch": 0.397389412617839, "percentage": 39.73, "elapsed_time": "14:10:36", "remaining_time": "21:30:24"} +{"current_steps": 823, "total_steps": 2069, "loss": 0.5376, "lr": 7.051923841431063e-06, "epoch": 0.3978728547256466, "percentage": 39.78, "elapsed_time": "14:11:36", "remaining_time": "21:29:19"} +{"current_steps": 824, "total_steps": 2069, "loss": 0.5449, "lr": 7.044631508360996e-06, "epoch": 0.3983562968334542, "percentage": 39.83, "elapsed_time": "14:12:37", "remaining_time": "21:28:14"} +{"current_steps": 825, "total_steps": 2069, "loss": 0.5443, "lr": 7.037333949048005e-06, "epoch": 0.3988397389412618, "percentage": 39.87, "elapsed_time": "14:13:34", "remaining_time": "21:27:04"} +{"current_steps": 826, "total_steps": 2069, "loss": 0.4994, "lr": 7.03003118214524e-06, "epoch": 0.3993231810490694, "percentage": 39.92, "elapsed_time": "14:14:39", "remaining_time": "21:26:07"} +{"current_steps": 827, "total_steps": 2069, "loss": 0.5249, "lr": 7.022723226319159e-06, "epoch": 0.399806623156877, "percentage": 39.97, "elapsed_time": "14:15:41", "remaining_time": "21:25:05"} +{"current_steps": 828, "total_steps": 2069, "loss": 0.531, "lr": 7.0154101002494914e-06, "epoch": 0.4002900652646846, "percentage": 40.02, "elapsed_time": "14:16:41", "remaining_time": "21:24:00"} +{"current_steps": 829, "total_steps": 2069, "loss": 0.5331, "lr": 7.008091822629172e-06, "epoch": 0.4007735073724921, "percentage": 40.07, "elapsed_time": "14:17:43", "remaining_time": "21:22:58"} +{"current_steps": 830, "total_steps": 2069, "loss": 0.5302, "lr": 7.00076841216431e-06, "epoch": 0.4012569494802997, "percentage": 40.12, "elapsed_time": "14:18:40", "remaining_time": "21:21:48"} +{"current_steps": 831, "total_steps": 2069, "loss": 0.5418, "lr": 6.993439887574133e-06, "epoch": 0.4017403915881073, "percentage": 40.16, "elapsed_time": "14:19:40", "remaining_time": "21:20:43"} +{"current_steps": 832, "total_steps": 2069, "loss": 0.535, "lr": 6.986106267590942e-06, "epoch": 0.4022238336959149, "percentage": 40.21, "elapsed_time": "14:20:45", "remaining_time": "21:19:44"} +{"current_steps": 833, "total_steps": 2069, "loss": 0.5362, "lr": 6.978767570960057e-06, "epoch": 0.4027072758037225, "percentage": 40.26, "elapsed_time": "14:22:57", "remaining_time": "21:20:27"} +{"current_steps": 834, "total_steps": 2069, "loss": 0.5346, "lr": 6.971423816439782e-06, "epoch": 0.4031907179115301, "percentage": 40.31, "elapsed_time": "14:24:02", "remaining_time": "21:19:29"} +{"current_steps": 835, "total_steps": 2069, "loss": 0.541, "lr": 6.964075022801341e-06, "epoch": 0.4036741600193377, "percentage": 40.36, "elapsed_time": "14:25:01", "remaining_time": "21:18:22"} +{"current_steps": 836, "total_steps": 2069, "loss": 0.5441, "lr": 6.956721208828847e-06, "epoch": 0.4041576021271453, "percentage": 40.41, "elapsed_time": "14:26:04", "remaining_time": "21:17:21"} +{"current_steps": 837, "total_steps": 2069, "loss": 0.5416, "lr": 6.949362393319239e-06, "epoch": 0.40464104423495284, "percentage": 40.45, "elapsed_time": "14:27:08", "remaining_time": "21:16:21"} +{"current_steps": 838, "total_steps": 2069, "loss": 0.5438, "lr": 6.941998595082243e-06, "epoch": 0.40512448634276044, "percentage": 40.5, "elapsed_time": "14:28:07", "remaining_time": "21:15:14"} +{"current_steps": 839, "total_steps": 2069, "loss": 0.5322, "lr": 6.934629832940322e-06, "epoch": 0.40560792845056803, "percentage": 40.55, "elapsed_time": "14:29:05", "remaining_time": "21:14:07"} +{"current_steps": 840, "total_steps": 2069, "loss": 0.544, "lr": 6.927256125728624e-06, "epoch": 0.40609137055837563, "percentage": 40.6, "elapsed_time": "14:30:08", "remaining_time": "21:13:06"} +{"current_steps": 841, "total_steps": 2069, "loss": 0.5401, "lr": 6.91987749229494e-06, "epoch": 0.4065748126661832, "percentage": 40.65, "elapsed_time": "14:31:06", "remaining_time": "21:11:57"} +{"current_steps": 842, "total_steps": 2069, "loss": 0.5393, "lr": 6.91249395149965e-06, "epoch": 0.4070582547739908, "percentage": 40.7, "elapsed_time": "14:32:06", "remaining_time": "21:10:52"} +{"current_steps": 843, "total_steps": 2069, "loss": 0.5384, "lr": 6.905105522215684e-06, "epoch": 0.4075416968817984, "percentage": 40.74, "elapsed_time": "14:33:09", "remaining_time": "21:09:51"} +{"current_steps": 844, "total_steps": 2069, "loss": 0.5297, "lr": 6.897712223328457e-06, "epoch": 0.408025138989606, "percentage": 40.79, "elapsed_time": "14:34:08", "remaining_time": "21:08:45"} +{"current_steps": 845, "total_steps": 2069, "loss": 0.5386, "lr": 6.89031407373584e-06, "epoch": 0.40850858109741356, "percentage": 40.84, "elapsed_time": "14:35:13", "remaining_time": "21:07:47"} +{"current_steps": 846, "total_steps": 2069, "loss": 0.5429, "lr": 6.8829110923481e-06, "epoch": 0.40899202320522116, "percentage": 40.89, "elapsed_time": "14:36:18", "remaining_time": "21:06:48"} +{"current_steps": 847, "total_steps": 2069, "loss": 0.5339, "lr": 6.875503298087853e-06, "epoch": 0.40947546531302875, "percentage": 40.94, "elapsed_time": "14:37:17", "remaining_time": "21:05:42"} +{"current_steps": 848, "total_steps": 2069, "loss": 0.5392, "lr": 6.868090709890016e-06, "epoch": 0.40995890742083635, "percentage": 40.99, "elapsed_time": "14:38:16", "remaining_time": "21:04:35"} +{"current_steps": 849, "total_steps": 2069, "loss": 0.5046, "lr": 6.8606733467017675e-06, "epoch": 0.41044234952864395, "percentage": 41.03, "elapsed_time": "14:39:18", "remaining_time": "21:03:33"} +{"current_steps": 850, "total_steps": 2069, "loss": 0.5331, "lr": 6.85325122748248e-06, "epoch": 0.41092579163645154, "percentage": 41.08, "elapsed_time": "14:40:19", "remaining_time": "21:02:28"} +{"current_steps": 851, "total_steps": 2069, "loss": 0.5409, "lr": 6.845824371203691e-06, "epoch": 0.41140923374425914, "percentage": 41.13, "elapsed_time": "14:41:26", "remaining_time": "21:01:34"} +{"current_steps": 852, "total_steps": 2069, "loss": 0.5371, "lr": 6.838392796849042e-06, "epoch": 0.41189267585206674, "percentage": 41.18, "elapsed_time": "14:42:28", "remaining_time": "21:00:31"} +{"current_steps": 853, "total_steps": 2069, "loss": 0.5304, "lr": 6.830956523414239e-06, "epoch": 0.4123761179598743, "percentage": 41.23, "elapsed_time": "14:43:29", "remaining_time": "20:59:28"} +{"current_steps": 854, "total_steps": 2069, "loss": 0.5316, "lr": 6.8235155699069944e-06, "epoch": 0.4128595600676819, "percentage": 41.28, "elapsed_time": "14:44:30", "remaining_time": "20:58:23"} +{"current_steps": 855, "total_steps": 2069, "loss": 0.5127, "lr": 6.816069955346986e-06, "epoch": 0.41334300217548947, "percentage": 41.32, "elapsed_time": "14:45:32", "remaining_time": "20:57:21"} +{"current_steps": 856, "total_steps": 2069, "loss": 0.5459, "lr": 6.808619698765804e-06, "epoch": 0.41382644428329707, "percentage": 41.37, "elapsed_time": "14:46:34", "remaining_time": "20:56:20"} +{"current_steps": 857, "total_steps": 2069, "loss": 0.5316, "lr": 6.8011648192069045e-06, "epoch": 0.41430988639110466, "percentage": 41.42, "elapsed_time": "14:47:36", "remaining_time": "20:55:16"} +{"current_steps": 858, "total_steps": 2069, "loss": 0.5341, "lr": 6.7937053357255585e-06, "epoch": 0.41479332849891226, "percentage": 41.47, "elapsed_time": "14:48:37", "remaining_time": "20:54:12"} +{"current_steps": 859, "total_steps": 2069, "loss": 0.5392, "lr": 6.786241267388812e-06, "epoch": 0.41527677060671986, "percentage": 41.52, "elapsed_time": "14:49:40", "remaining_time": "20:53:12"} +{"current_steps": 860, "total_steps": 2069, "loss": 0.5259, "lr": 6.778772633275421e-06, "epoch": 0.41576021271452746, "percentage": 41.57, "elapsed_time": "14:50:42", "remaining_time": "20:52:10"} +{"current_steps": 861, "total_steps": 2069, "loss": 0.5439, "lr": 6.771299452475818e-06, "epoch": 0.41624365482233505, "percentage": 41.61, "elapsed_time": "14:51:44", "remaining_time": "20:51:07"} +{"current_steps": 862, "total_steps": 2069, "loss": 0.521, "lr": 6.763821744092054e-06, "epoch": 0.4167270969301426, "percentage": 41.66, "elapsed_time": "14:52:45", "remaining_time": "20:50:03"} +{"current_steps": 863, "total_steps": 2069, "loss": 0.5282, "lr": 6.756339527237756e-06, "epoch": 0.4172105390379502, "percentage": 41.71, "elapsed_time": "14:53:46", "remaining_time": "20:49:00"} +{"current_steps": 864, "total_steps": 2069, "loss": 0.5362, "lr": 6.748852821038075e-06, "epoch": 0.4176939811457578, "percentage": 41.76, "elapsed_time": "14:54:48", "remaining_time": "20:47:57"} +{"current_steps": 865, "total_steps": 2069, "loss": 0.5452, "lr": 6.741361644629629e-06, "epoch": 0.4181774232535654, "percentage": 41.81, "elapsed_time": "14:55:45", "remaining_time": "20:46:48"} +{"current_steps": 866, "total_steps": 2069, "loss": 0.5374, "lr": 6.733866017160475e-06, "epoch": 0.418660865361373, "percentage": 41.86, "elapsed_time": "14:56:42", "remaining_time": "20:45:39"} +{"current_steps": 867, "total_steps": 2069, "loss": 0.5368, "lr": 6.7263659577900375e-06, "epoch": 0.4191443074691806, "percentage": 41.9, "elapsed_time": "14:57:41", "remaining_time": "20:44:33"} +{"current_steps": 868, "total_steps": 2069, "loss": 0.5361, "lr": 6.718861485689077e-06, "epoch": 0.4196277495769882, "percentage": 41.95, "elapsed_time": "14:58:39", "remaining_time": "20:43:25"} +{"current_steps": 869, "total_steps": 2069, "loss": 0.5132, "lr": 6.711352620039623e-06, "epoch": 0.42011119168479577, "percentage": 42.0, "elapsed_time": "14:59:43", "remaining_time": "20:42:25"} +{"current_steps": 870, "total_steps": 2069, "loss": 0.5282, "lr": 6.703839380034945e-06, "epoch": 0.4205946337926033, "percentage": 42.05, "elapsed_time": "15:00:43", "remaining_time": "20:41:21"} +{"current_steps": 871, "total_steps": 2069, "loss": 0.5046, "lr": 6.6963217848794895e-06, "epoch": 0.4210780759004109, "percentage": 42.1, "elapsed_time": "15:01:50", "remaining_time": "20:40:25"} +{"current_steps": 872, "total_steps": 2069, "loss": 0.5365, "lr": 6.6887998537888354e-06, "epoch": 0.4215615180082185, "percentage": 42.15, "elapsed_time": "15:02:52", "remaining_time": "20:39:22"} +{"current_steps": 873, "total_steps": 2069, "loss": 0.5315, "lr": 6.681273605989643e-06, "epoch": 0.4220449601160261, "percentage": 42.19, "elapsed_time": "15:03:55", "remaining_time": "20:38:22"} +{"current_steps": 874, "total_steps": 2069, "loss": 0.5328, "lr": 6.673743060719613e-06, "epoch": 0.4225284022238337, "percentage": 42.24, "elapsed_time": "15:04:58", "remaining_time": "20:37:21"} +{"current_steps": 875, "total_steps": 2069, "loss": 0.5359, "lr": 6.666208237227421e-06, "epoch": 0.4230118443316413, "percentage": 42.29, "elapsed_time": "15:06:02", "remaining_time": "20:36:21"} +{"current_steps": 876, "total_steps": 2069, "loss": 0.5139, "lr": 6.6586691547726855e-06, "epoch": 0.4234952864394489, "percentage": 42.34, "elapsed_time": "15:07:00", "remaining_time": "20:35:13"} +{"current_steps": 877, "total_steps": 2069, "loss": 0.5455, "lr": 6.651125832625908e-06, "epoch": 0.4239787285472565, "percentage": 42.39, "elapsed_time": "15:07:59", "remaining_time": "20:34:07"} +{"current_steps": 878, "total_steps": 2069, "loss": 0.5528, "lr": 6.6435782900684284e-06, "epoch": 0.42446217065506403, "percentage": 42.44, "elapsed_time": "15:08:58", "remaining_time": "20:33:01"} +{"current_steps": 879, "total_steps": 2069, "loss": 0.5391, "lr": 6.636026546392374e-06, "epoch": 0.4249456127628716, "percentage": 42.48, "elapsed_time": "15:09:58", "remaining_time": "20:31:55"} +{"current_steps": 880, "total_steps": 2069, "loss": 0.5309, "lr": 6.628470620900611e-06, "epoch": 0.4254290548706792, "percentage": 42.53, "elapsed_time": "15:10:58", "remaining_time": "20:30:50"} +{"current_steps": 881, "total_steps": 2069, "loss": 0.5194, "lr": 6.620910532906692e-06, "epoch": 0.4259124969784868, "percentage": 42.58, "elapsed_time": "15:11:59", "remaining_time": "20:29:46"} +{"current_steps": 882, "total_steps": 2069, "loss": 0.5132, "lr": 6.613346301734813e-06, "epoch": 0.4263959390862944, "percentage": 42.63, "elapsed_time": "15:13:05", "remaining_time": "20:28:50"} +{"current_steps": 883, "total_steps": 2069, "loss": 0.5358, "lr": 6.605777946719757e-06, "epoch": 0.426879381194102, "percentage": 42.68, "elapsed_time": "15:14:08", "remaining_time": "20:27:49"} +{"current_steps": 884, "total_steps": 2069, "loss": 0.74, "lr": 6.59820548720685e-06, "epoch": 0.4273628233019096, "percentage": 42.73, "elapsed_time": "15:15:14", "remaining_time": "20:26:52"} +{"current_steps": 885, "total_steps": 2069, "loss": 0.5401, "lr": 6.590628942551909e-06, "epoch": 0.4278462654097172, "percentage": 42.77, "elapsed_time": "15:16:13", "remaining_time": "20:25:47"} +{"current_steps": 886, "total_steps": 2069, "loss": 0.5384, "lr": 6.583048332121193e-06, "epoch": 0.42832970751752475, "percentage": 42.82, "elapsed_time": "15:17:16", "remaining_time": "20:24:45"} +{"current_steps": 887, "total_steps": 2069, "loss": 0.5018, "lr": 6.5754636752913535e-06, "epoch": 0.42881314962533235, "percentage": 42.87, "elapsed_time": "15:18:23", "remaining_time": "20:23:50"} +{"current_steps": 888, "total_steps": 2069, "loss": 0.5303, "lr": 6.567874991449383e-06, "epoch": 0.42929659173313994, "percentage": 42.92, "elapsed_time": "15:19:23", "remaining_time": "20:22:44"} +{"current_steps": 889, "total_steps": 2069, "loss": 0.5454, "lr": 6.560282299992571e-06, "epoch": 0.42978003384094754, "percentage": 42.97, "elapsed_time": "15:20:29", "remaining_time": "20:21:47"} +{"current_steps": 890, "total_steps": 2069, "loss": 0.5115, "lr": 6.552685620328447e-06, "epoch": 0.43026347594875514, "percentage": 43.02, "elapsed_time": "15:21:33", "remaining_time": "20:20:48"} +{"current_steps": 891, "total_steps": 2069, "loss": 0.5386, "lr": 6.545084971874738e-06, "epoch": 0.43074691805656273, "percentage": 43.06, "elapsed_time": "15:22:37", "remaining_time": "20:19:48"} +{"current_steps": 892, "total_steps": 2069, "loss": 0.5464, "lr": 6.537480374059313e-06, "epoch": 0.43123036016437033, "percentage": 43.11, "elapsed_time": "15:23:38", "remaining_time": "20:18:44"} +{"current_steps": 893, "total_steps": 2069, "loss": 0.5225, "lr": 6.529871846320138e-06, "epoch": 0.4317138022721779, "percentage": 43.16, "elapsed_time": "15:24:41", "remaining_time": "20:17:44"} +{"current_steps": 894, "total_steps": 2069, "loss": 0.5363, "lr": 6.522259408105223e-06, "epoch": 0.4321972443799855, "percentage": 43.21, "elapsed_time": "15:25:47", "remaining_time": "20:16:46"} +{"current_steps": 895, "total_steps": 2069, "loss": 0.533, "lr": 6.514643078872571e-06, "epoch": 0.43268068648779306, "percentage": 43.26, "elapsed_time": "15:26:51", "remaining_time": "20:15:46"} +{"current_steps": 896, "total_steps": 2069, "loss": 0.5428, "lr": 6.507022878090137e-06, "epoch": 0.43316412859560066, "percentage": 43.31, "elapsed_time": "15:27:56", "remaining_time": "20:14:48"} +{"current_steps": 897, "total_steps": 2069, "loss": 0.5337, "lr": 6.499398825235767e-06, "epoch": 0.43364757070340826, "percentage": 43.35, "elapsed_time": "15:28:56", "remaining_time": "20:13:44"} +{"current_steps": 898, "total_steps": 2069, "loss": 0.5323, "lr": 6.491770939797152e-06, "epoch": 0.43413101281121586, "percentage": 43.4, "elapsed_time": "15:29:57", "remaining_time": "20:12:40"} +{"current_steps": 899, "total_steps": 2069, "loss": 0.5407, "lr": 6.4841392412717864e-06, "epoch": 0.43461445491902345, "percentage": 43.45, "elapsed_time": "15:30:58", "remaining_time": "20:11:37"} +{"current_steps": 900, "total_steps": 2069, "loss": 0.5347, "lr": 6.476503749166903e-06, "epoch": 0.43509789702683105, "percentage": 43.5, "elapsed_time": "15:32:00", "remaining_time": "20:10:34"} +{"current_steps": 901, "total_steps": 2069, "loss": 0.5295, "lr": 6.4688644829994385e-06, "epoch": 0.43558133913463865, "percentage": 43.55, "elapsed_time": "15:32:58", "remaining_time": "20:09:27"} +{"current_steps": 902, "total_steps": 2069, "loss": 0.5457, "lr": 6.4612214622959705e-06, "epoch": 0.43606478124244624, "percentage": 43.6, "elapsed_time": "15:34:02", "remaining_time": "20:08:27"} +{"current_steps": 903, "total_steps": 2069, "loss": 0.521, "lr": 6.453574706592676e-06, "epoch": 0.4365482233502538, "percentage": 43.64, "elapsed_time": "15:35:02", "remaining_time": "20:07:22"} +{"current_steps": 904, "total_steps": 2069, "loss": 0.5354, "lr": 6.44592423543528e-06, "epoch": 0.4370316654580614, "percentage": 43.69, "elapsed_time": "15:36:05", "remaining_time": "20:06:21"} +{"current_steps": 905, "total_steps": 2069, "loss": 0.544, "lr": 6.4382700683790025e-06, "epoch": 0.437515107565869, "percentage": 43.74, "elapsed_time": "15:37:07", "remaining_time": "20:05:18"} +{"current_steps": 906, "total_steps": 2069, "loss": 0.5192, "lr": 6.4306122249885105e-06, "epoch": 0.4379985496736766, "percentage": 43.79, "elapsed_time": "15:38:10", "remaining_time": "20:04:18"} +{"current_steps": 907, "total_steps": 2069, "loss": 0.526, "lr": 6.422950724837872e-06, "epoch": 0.43848199178148417, "percentage": 43.84, "elapsed_time": "15:39:10", "remaining_time": "20:03:13"} +{"current_steps": 908, "total_steps": 2069, "loss": 0.5088, "lr": 6.415285587510495e-06, "epoch": 0.43896543388929177, "percentage": 43.89, "elapsed_time": "15:40:14", "remaining_time": "20:02:12"} +{"current_steps": 909, "total_steps": 2069, "loss": 0.5291, "lr": 6.407616832599091e-06, "epoch": 0.43944887599709936, "percentage": 43.93, "elapsed_time": "15:41:20", "remaining_time": "20:01:16"} +{"current_steps": 910, "total_steps": 2069, "loss": 0.5349, "lr": 6.399944479705615e-06, "epoch": 0.43993231810490696, "percentage": 43.98, "elapsed_time": "15:42:23", "remaining_time": "20:00:15"} +{"current_steps": 911, "total_steps": 2069, "loss": 0.5356, "lr": 6.392268548441218e-06, "epoch": 0.4404157602127145, "percentage": 44.03, "elapsed_time": "15:43:28", "remaining_time": "19:59:16"} +{"current_steps": 912, "total_steps": 2069, "loss": 0.5297, "lr": 6.384589058426201e-06, "epoch": 0.4408992023205221, "percentage": 44.08, "elapsed_time": "15:44:30", "remaining_time": "19:58:14"} +{"current_steps": 913, "total_steps": 2069, "loss": 0.531, "lr": 6.3769060292899585e-06, "epoch": 0.4413826444283297, "percentage": 44.13, "elapsed_time": "15:45:35", "remaining_time": "19:57:16"} +{"current_steps": 914, "total_steps": 2069, "loss": 0.5266, "lr": 6.3692194806709326e-06, "epoch": 0.4418660865361373, "percentage": 44.18, "elapsed_time": "15:46:36", "remaining_time": "19:56:12"} +{"current_steps": 915, "total_steps": 2069, "loss": 0.5068, "lr": 6.36152943221656e-06, "epoch": 0.4423495286439449, "percentage": 44.22, "elapsed_time": "15:47:42", "remaining_time": "19:55:15"} +{"current_steps": 916, "total_steps": 2069, "loss": 0.5135, "lr": 6.353835903583225e-06, "epoch": 0.4428329707517525, "percentage": 44.27, "elapsed_time": "15:48:48", "remaining_time": "19:54:17"} +{"current_steps": 917, "total_steps": 2069, "loss": 0.53, "lr": 6.346138914436207e-06, "epoch": 0.4433164128595601, "percentage": 44.32, "elapsed_time": "15:49:50", "remaining_time": "19:53:15"} +{"current_steps": 918, "total_steps": 2069, "loss": 0.5282, "lr": 6.338438484449632e-06, "epoch": 0.4437998549673677, "percentage": 44.37, "elapsed_time": "15:50:53", "remaining_time": "19:52:14"} +{"current_steps": 919, "total_steps": 2069, "loss": 0.5205, "lr": 6.330734633306415e-06, "epoch": 0.4442832970751752, "percentage": 44.42, "elapsed_time": "15:51:55", "remaining_time": "19:51:12"} +{"current_steps": 920, "total_steps": 2069, "loss": 0.5354, "lr": 6.3230273806982254e-06, "epoch": 0.4447667391829828, "percentage": 44.47, "elapsed_time": "15:52:57", "remaining_time": "19:50:10"} +{"current_steps": 921, "total_steps": 2069, "loss": 0.5403, "lr": 6.31531674632542e-06, "epoch": 0.4452501812907904, "percentage": 44.51, "elapsed_time": "15:53:58", "remaining_time": "19:49:06"} +{"current_steps": 922, "total_steps": 2069, "loss": 0.5285, "lr": 6.307602749897001e-06, "epoch": 0.445733623398598, "percentage": 44.56, "elapsed_time": "15:54:56", "remaining_time": "19:47:58"} +{"current_steps": 923, "total_steps": 2069, "loss": 0.5293, "lr": 6.299885411130566e-06, "epoch": 0.4462170655064056, "percentage": 44.61, "elapsed_time": "15:55:54", "remaining_time": "19:46:52"} +{"current_steps": 924, "total_steps": 2069, "loss": 0.5358, "lr": 6.292164749752256e-06, "epoch": 0.4467005076142132, "percentage": 44.66, "elapsed_time": "15:56:52", "remaining_time": "19:45:44"} +{"current_steps": 925, "total_steps": 2069, "loss": 0.5303, "lr": 6.284440785496701e-06, "epoch": 0.4471839497220208, "percentage": 44.71, "elapsed_time": "15:57:51", "remaining_time": "19:44:38"} +{"current_steps": 926, "total_steps": 2069, "loss": 0.5384, "lr": 6.27671353810698e-06, "epoch": 0.4476673918298284, "percentage": 44.76, "elapsed_time": "15:58:52", "remaining_time": "19:43:35"} +{"current_steps": 927, "total_steps": 2069, "loss": 0.5349, "lr": 6.268983027334557e-06, "epoch": 0.448150833937636, "percentage": 44.8, "elapsed_time": "15:59:55", "remaining_time": "19:42:33"} +{"current_steps": 928, "total_steps": 2069, "loss": 0.5445, "lr": 6.2612492729392396e-06, "epoch": 0.44863427604544354, "percentage": 44.85, "elapsed_time": "16:01:00", "remaining_time": "19:41:34"} +{"current_steps": 929, "total_steps": 2069, "loss": 0.5243, "lr": 6.25351229468913e-06, "epoch": 0.44911771815325113, "percentage": 44.9, "elapsed_time": "16:02:04", "remaining_time": "19:40:35"} +{"current_steps": 930, "total_steps": 2069, "loss": 0.5335, "lr": 6.245772112360568e-06, "epoch": 0.44960116026105873, "percentage": 44.95, "elapsed_time": "16:03:05", "remaining_time": "19:39:31"} +{"current_steps": 931, "total_steps": 2069, "loss": 0.5295, "lr": 6.2380287457380814e-06, "epoch": 0.4500846023688663, "percentage": 45.0, "elapsed_time": "16:04:03", "remaining_time": "19:38:24"} +{"current_steps": 932, "total_steps": 2069, "loss": 0.5277, "lr": 6.230282214614342e-06, "epoch": 0.4505680444766739, "percentage": 45.05, "elapsed_time": "16:05:04", "remaining_time": "19:37:21"} +{"current_steps": 933, "total_steps": 2069, "loss": 0.5038, "lr": 6.222532538790107e-06, "epoch": 0.4510514865844815, "percentage": 45.09, "elapsed_time": "16:06:05", "remaining_time": "19:36:17"} +{"current_steps": 934, "total_steps": 2069, "loss": 0.5482, "lr": 6.214779738074169e-06, "epoch": 0.4515349286922891, "percentage": 45.14, "elapsed_time": "16:07:02", "remaining_time": "19:35:09"} +{"current_steps": 935, "total_steps": 2069, "loss": 0.5408, "lr": 6.2070238322833165e-06, "epoch": 0.4520183708000967, "percentage": 45.19, "elapsed_time": "16:08:02", "remaining_time": "19:34:04"} +{"current_steps": 936, "total_steps": 2069, "loss": 0.507, "lr": 6.199264841242267e-06, "epoch": 0.45250181290790426, "percentage": 45.24, "elapsed_time": "16:09:09", "remaining_time": "19:33:08"} +{"current_steps": 937, "total_steps": 2069, "loss": 0.5361, "lr": 6.191502784783627e-06, "epoch": 0.45298525501571185, "percentage": 45.29, "elapsed_time": "16:10:08", "remaining_time": "19:32:02"} +{"current_steps": 938, "total_steps": 2069, "loss": 0.5404, "lr": 6.183737682747839e-06, "epoch": 0.45346869712351945, "percentage": 45.34, "elapsed_time": "16:11:11", "remaining_time": "19:31:01"} +{"current_steps": 939, "total_steps": 2069, "loss": 0.475, "lr": 6.17596955498313e-06, "epoch": 0.45395213923132705, "percentage": 45.38, "elapsed_time": "16:12:13", "remaining_time": "19:29:58"} +{"current_steps": 940, "total_steps": 2069, "loss": 0.5293, "lr": 6.16819842134546e-06, "epoch": 0.45443558133913464, "percentage": 45.43, "elapsed_time": "16:13:15", "remaining_time": "19:28:56"} +{"current_steps": 941, "total_steps": 2069, "loss": 0.5315, "lr": 6.160424301698472e-06, "epoch": 0.45491902344694224, "percentage": 45.48, "elapsed_time": "16:14:18", "remaining_time": "19:27:55"} +{"current_steps": 942, "total_steps": 2069, "loss": 0.5398, "lr": 6.1526472159134454e-06, "epoch": 0.45540246555474984, "percentage": 45.53, "elapsed_time": "16:15:18", "remaining_time": "19:26:50"} +{"current_steps": 943, "total_steps": 2069, "loss": 0.5246, "lr": 6.1448671838692365e-06, "epoch": 0.45588590766255743, "percentage": 45.58, "elapsed_time": "16:16:20", "remaining_time": "19:25:48"} +{"current_steps": 944, "total_steps": 2069, "loss": 0.5392, "lr": 6.1370842254522325e-06, "epoch": 0.456369349770365, "percentage": 45.63, "elapsed_time": "16:17:18", "remaining_time": "19:24:41"} +{"current_steps": 945, "total_steps": 2069, "loss": 0.5216, "lr": 6.129298360556304e-06, "epoch": 0.45685279187817257, "percentage": 45.67, "elapsed_time": "16:18:24", "remaining_time": "19:23:43"} +{"current_steps": 946, "total_steps": 2069, "loss": 0.5341, "lr": 6.1215096090827485e-06, "epoch": 0.45733623398598017, "percentage": 45.72, "elapsed_time": "16:19:24", "remaining_time": "19:22:38"} +{"current_steps": 947, "total_steps": 2069, "loss": 0.5357, "lr": 6.1137179909402445e-06, "epoch": 0.45781967609378776, "percentage": 45.77, "elapsed_time": "16:20:29", "remaining_time": "19:21:41"} +{"current_steps": 948, "total_steps": 2069, "loss": 0.5312, "lr": 6.105923526044794e-06, "epoch": 0.45830311820159536, "percentage": 45.82, "elapsed_time": "16:21:33", "remaining_time": "19:20:40"} +{"current_steps": 949, "total_steps": 2069, "loss": 0.5164, "lr": 6.098126234319679e-06, "epoch": 0.45878656030940296, "percentage": 45.87, "elapsed_time": "16:22:35", "remaining_time": "19:19:38"} +{"current_steps": 950, "total_steps": 2069, "loss": 0.5406, "lr": 6.0903261356954035e-06, "epoch": 0.45927000241721055, "percentage": 45.92, "elapsed_time": "16:23:32", "remaining_time": "19:18:30"} +{"current_steps": 951, "total_steps": 2069, "loss": 0.5369, "lr": 6.08252325010965e-06, "epoch": 0.45975344452501815, "percentage": 45.96, "elapsed_time": "16:24:32", "remaining_time": "19:17:25"} +{"current_steps": 952, "total_steps": 2069, "loss": 0.5318, "lr": 6.074717597507223e-06, "epoch": 0.4602368866328257, "percentage": 46.01, "elapsed_time": "16:25:35", "remaining_time": "19:16:24"} +{"current_steps": 953, "total_steps": 2069, "loss": 0.5053, "lr": 6.066909197839996e-06, "epoch": 0.4607203287406333, "percentage": 46.06, "elapsed_time": "16:26:39", "remaining_time": "19:15:25"} +{"current_steps": 954, "total_steps": 2069, "loss": 0.5313, "lr": 6.059098071066874e-06, "epoch": 0.4612037708484409, "percentage": 46.11, "elapsed_time": "16:27:43", "remaining_time": "19:14:24"} +{"current_steps": 955, "total_steps": 2069, "loss": 0.5304, "lr": 6.051284237153723e-06, "epoch": 0.4616872129562485, "percentage": 46.16, "elapsed_time": "16:28:43", "remaining_time": "19:13:20"} +{"current_steps": 956, "total_steps": 2069, "loss": 0.5392, "lr": 6.043467716073333e-06, "epoch": 0.4621706550640561, "percentage": 46.21, "elapsed_time": "16:29:41", "remaining_time": "19:12:13"} +{"current_steps": 957, "total_steps": 2069, "loss": 0.5333, "lr": 6.035648527805359e-06, "epoch": 0.4626540971718637, "percentage": 46.25, "elapsed_time": "16:30:38", "remaining_time": "19:11:05"} +{"current_steps": 958, "total_steps": 2069, "loss": 0.5331, "lr": 6.0278266923362805e-06, "epoch": 0.4631375392796713, "percentage": 46.3, "elapsed_time": "16:31:37", "remaining_time": "19:09:59"} +{"current_steps": 959, "total_steps": 2069, "loss": 0.5432, "lr": 6.0200022296593375e-06, "epoch": 0.46362098138747887, "percentage": 46.35, "elapsed_time": "16:32:39", "remaining_time": "19:08:57"} +{"current_steps": 960, "total_steps": 2069, "loss": 0.5323, "lr": 6.012175159774488e-06, "epoch": 0.46410442349528647, "percentage": 46.4, "elapsed_time": "16:33:43", "remaining_time": "19:07:57"} +{"current_steps": 961, "total_steps": 2069, "loss": 0.5299, "lr": 6.004345502688353e-06, "epoch": 0.464587865603094, "percentage": 46.45, "elapsed_time": "16:34:46", "remaining_time": "19:06:56"} +{"current_steps": 962, "total_steps": 2069, "loss": 0.5385, "lr": 5.996513278414166e-06, "epoch": 0.4650713077109016, "percentage": 46.5, "elapsed_time": "16:35:49", "remaining_time": "19:05:55"} +{"current_steps": 963, "total_steps": 2069, "loss": 0.5303, "lr": 5.988678506971726e-06, "epoch": 0.4655547498187092, "percentage": 46.54, "elapsed_time": "16:36:52", "remaining_time": "19:04:54"} +{"current_steps": 964, "total_steps": 2069, "loss": 0.503, "lr": 5.980841208387338e-06, "epoch": 0.4660381919265168, "percentage": 46.59, "elapsed_time": "16:37:53", "remaining_time": "19:03:50"} +{"current_steps": 965, "total_steps": 2069, "loss": 0.5253, "lr": 5.973001402693769e-06, "epoch": 0.4665216340343244, "percentage": 46.64, "elapsed_time": "16:38:53", "remaining_time": "19:02:46"} +{"current_steps": 966, "total_steps": 2069, "loss": 0.5386, "lr": 5.965159109930196e-06, "epoch": 0.467005076142132, "percentage": 46.69, "elapsed_time": "16:39:55", "remaining_time": "19:01:43"} +{"current_steps": 967, "total_steps": 2069, "loss": 0.529, "lr": 5.957314350142149e-06, "epoch": 0.4674885182499396, "percentage": 46.74, "elapsed_time": "16:40:57", "remaining_time": "19:00:41"} +{"current_steps": 968, "total_steps": 2069, "loss": 0.538, "lr": 5.94946714338147e-06, "epoch": 0.4679719603577472, "percentage": 46.79, "elapsed_time": "16:41:56", "remaining_time": "18:59:36"} +{"current_steps": 969, "total_steps": 2069, "loss": 0.5333, "lr": 5.941617509706247e-06, "epoch": 0.4684554024655547, "percentage": 46.83, "elapsed_time": "16:42:57", "remaining_time": "18:58:32"} +{"current_steps": 970, "total_steps": 2069, "loss": 0.5329, "lr": 5.933765469180779e-06, "epoch": 0.4689388445733623, "percentage": 46.88, "elapsed_time": "16:43:57", "remaining_time": "18:57:28"} +{"current_steps": 971, "total_steps": 2069, "loss": 0.5304, "lr": 5.925911041875514e-06, "epoch": 0.4694222866811699, "percentage": 46.93, "elapsed_time": "16:44:59", "remaining_time": "18:56:26"} +{"current_steps": 972, "total_steps": 2069, "loss": 0.5339, "lr": 5.9180542478670025e-06, "epoch": 0.4699057287889775, "percentage": 46.98, "elapsed_time": "16:46:00", "remaining_time": "18:55:22"} +{"current_steps": 973, "total_steps": 2069, "loss": 0.5311, "lr": 5.910195107237842e-06, "epoch": 0.4703891708967851, "percentage": 47.03, "elapsed_time": "16:47:04", "remaining_time": "18:54:22"} +{"current_steps": 974, "total_steps": 2069, "loss": 0.5294, "lr": 5.902333640076627e-06, "epoch": 0.4708726130045927, "percentage": 47.08, "elapsed_time": "16:48:06", "remaining_time": "18:53:20"} +{"current_steps": 975, "total_steps": 2069, "loss": 0.5319, "lr": 5.894469866477905e-06, "epoch": 0.4713560551124003, "percentage": 47.12, "elapsed_time": "16:49:07", "remaining_time": "18:52:16"} +{"current_steps": 976, "total_steps": 2069, "loss": 0.5308, "lr": 5.886603806542114e-06, "epoch": 0.4718394972202079, "percentage": 47.17, "elapsed_time": "16:50:08", "remaining_time": "18:51:14"} +{"current_steps": 977, "total_steps": 2069, "loss": 0.5271, "lr": 5.878735480375537e-06, "epoch": 0.47232293932801545, "percentage": 47.22, "elapsed_time": "16:51:12", "remaining_time": "18:50:14"} +{"current_steps": 978, "total_steps": 2069, "loss": 0.5363, "lr": 5.87086490809025e-06, "epoch": 0.47280638143582304, "percentage": 47.27, "elapsed_time": "16:52:13", "remaining_time": "18:49:10"} +{"current_steps": 979, "total_steps": 2069, "loss": 0.5208, "lr": 5.862992109804071e-06, "epoch": 0.47328982354363064, "percentage": 47.32, "elapsed_time": "16:53:14", "remaining_time": "18:48:07"} +{"current_steps": 980, "total_steps": 2069, "loss": 0.5045, "lr": 5.855117105640503e-06, "epoch": 0.47377326565143824, "percentage": 47.37, "elapsed_time": "16:54:19", "remaining_time": "18:47:08"} +{"current_steps": 981, "total_steps": 2069, "loss": 0.5213, "lr": 5.847239915728695e-06, "epoch": 0.47425670775924583, "percentage": 47.41, "elapsed_time": "16:55:21", "remaining_time": "18:46:06"} +{"current_steps": 982, "total_steps": 2069, "loss": 0.5416, "lr": 5.839360560203379e-06, "epoch": 0.47474014986705343, "percentage": 47.46, "elapsed_time": "16:56:19", "remaining_time": "18:44:59"} +{"current_steps": 983, "total_steps": 2069, "loss": 0.5397, "lr": 5.831479059204822e-06, "epoch": 0.475223591974861, "percentage": 47.51, "elapsed_time": "16:57:23", "remaining_time": "18:43:59"} +{"current_steps": 984, "total_steps": 2069, "loss": 0.5253, "lr": 5.823595432878775e-06, "epoch": 0.4757070340826686, "percentage": 47.56, "elapsed_time": "16:58:25", "remaining_time": "18:42:57"} +{"current_steps": 985, "total_steps": 2069, "loss": 0.501, "lr": 5.815709701376424e-06, "epoch": 0.47619047619047616, "percentage": 47.61, "elapsed_time": "16:59:34", "remaining_time": "18:42:02"} +{"current_steps": 986, "total_steps": 2069, "loss": 0.5425, "lr": 5.8078218848543326e-06, "epoch": 0.47667391829828376, "percentage": 47.66, "elapsed_time": "17:00:37", "remaining_time": "18:41:01"} +{"current_steps": 987, "total_steps": 2069, "loss": 0.5293, "lr": 5.799932003474398e-06, "epoch": 0.47715736040609136, "percentage": 47.7, "elapsed_time": "17:01:36", "remaining_time": "18:39:55"} +{"current_steps": 988, "total_steps": 2069, "loss": 0.5273, "lr": 5.7920400774037884e-06, "epoch": 0.47764080251389895, "percentage": 47.75, "elapsed_time": "17:02:42", "remaining_time": "18:38:58"} +{"current_steps": 989, "total_steps": 2069, "loss": 0.523, "lr": 5.784146126814909e-06, "epoch": 0.47812424462170655, "percentage": 47.8, "elapsed_time": "17:03:40", "remaining_time": "18:37:52"} +{"current_steps": 990, "total_steps": 2069, "loss": 0.5289, "lr": 5.776250171885329e-06, "epoch": 0.47860768672951415, "percentage": 47.85, "elapsed_time": "17:04:42", "remaining_time": "18:36:49"} +{"current_steps": 991, "total_steps": 2069, "loss": 0.5354, "lr": 5.768352232797748e-06, "epoch": 0.47909112883732174, "percentage": 47.9, "elapsed_time": "17:05:46", "remaining_time": "18:35:50"} +{"current_steps": 992, "total_steps": 2069, "loss": 0.5346, "lr": 5.760452329739933e-06, "epoch": 0.47957457094512934, "percentage": 47.95, "elapsed_time": "17:06:48", "remaining_time": "18:34:47"} +{"current_steps": 993, "total_steps": 2069, "loss": 0.5381, "lr": 5.752550482904674e-06, "epoch": 0.48005801305293694, "percentage": 47.99, "elapsed_time": "17:07:49", "remaining_time": "18:33:43"} +{"current_steps": 994, "total_steps": 2069, "loss": 0.5347, "lr": 5.744646712489729e-06, "epoch": 0.4805414551607445, "percentage": 48.04, "elapsed_time": "17:08:51", "remaining_time": "18:32:42"} +{"current_steps": 995, "total_steps": 2069, "loss": 0.546, "lr": 5.736741038697771e-06, "epoch": 0.4810248972685521, "percentage": 48.09, "elapsed_time": "17:09:53", "remaining_time": "18:31:40"} +{"current_steps": 996, "total_steps": 2069, "loss": 0.5189, "lr": 5.728833481736339e-06, "epoch": 0.4815083393763597, "percentage": 48.14, "elapsed_time": "17:10:57", "remaining_time": "18:30:39"} +{"current_steps": 997, "total_steps": 2069, "loss": 0.5405, "lr": 5.720924061817786e-06, "epoch": 0.48199178148416727, "percentage": 48.19, "elapsed_time": "17:11:56", "remaining_time": "18:29:34"} +{"current_steps": 998, "total_steps": 2069, "loss": 0.5317, "lr": 5.71301279915923e-06, "epoch": 0.48247522359197487, "percentage": 48.24, "elapsed_time": "17:12:58", "remaining_time": "18:28:32"} +{"current_steps": 999, "total_steps": 2069, "loss": 0.532, "lr": 5.705099713982491e-06, "epoch": 0.48295866569978246, "percentage": 48.28, "elapsed_time": "17:14:00", "remaining_time": "18:27:30"} +{"current_steps": 1000, "total_steps": 2069, "loss": 0.5305, "lr": 5.697184826514058e-06, "epoch": 0.48344210780759006, "percentage": 48.33, "elapsed_time": "17:15:03", "remaining_time": "18:26:28"} +{"current_steps": 1001, "total_steps": 2069, "loss": 0.5385, "lr": 5.689268156985015e-06, "epoch": 0.48392554991539766, "percentage": 48.38, "elapsed_time": "17:16:03", "remaining_time": "18:25:24"} +{"current_steps": 1002, "total_steps": 2069, "loss": 0.5468, "lr": 5.6813497256310124e-06, "epoch": 0.4844089920232052, "percentage": 48.43, "elapsed_time": "17:17:00", "remaining_time": "18:24:17"} +{"current_steps": 1003, "total_steps": 2069, "loss": 0.5259, "lr": 5.673429552692196e-06, "epoch": 0.4848924341310128, "percentage": 48.48, "elapsed_time": "17:17:58", "remaining_time": "18:23:10"} +{"current_steps": 1004, "total_steps": 2069, "loss": 0.5312, "lr": 5.66550765841317e-06, "epoch": 0.4853758762388204, "percentage": 48.53, "elapsed_time": "17:19:00", "remaining_time": "18:22:08"} +{"current_steps": 1005, "total_steps": 2069, "loss": 0.5234, "lr": 5.6575840630429295e-06, "epoch": 0.485859318346628, "percentage": 48.57, "elapsed_time": "17:20:03", "remaining_time": "18:21:06"} +{"current_steps": 1006, "total_steps": 2069, "loss": 0.5337, "lr": 5.649658786834825e-06, "epoch": 0.4863427604544356, "percentage": 48.62, "elapsed_time": "17:21:04", "remaining_time": "18:20:03"} +{"current_steps": 1007, "total_steps": 2069, "loss": 0.5292, "lr": 5.641731850046503e-06, "epoch": 0.4868262025622432, "percentage": 48.67, "elapsed_time": "17:22:06", "remaining_time": "18:19:01"} +{"current_steps": 1008, "total_steps": 2069, "loss": 0.5033, "lr": 5.633803272939851e-06, "epoch": 0.4873096446700508, "percentage": 48.72, "elapsed_time": "17:23:13", "remaining_time": "18:18:04"} +{"current_steps": 1009, "total_steps": 2069, "loss": 0.5199, "lr": 5.62587307578095e-06, "epoch": 0.4877930867778584, "percentage": 48.77, "elapsed_time": "17:24:18", "remaining_time": "18:17:06"} +{"current_steps": 1010, "total_steps": 2069, "loss": 0.5285, "lr": 5.6179412788400255e-06, "epoch": 0.4882765288856659, "percentage": 48.82, "elapsed_time": "17:25:17", "remaining_time": "18:16:00"} +{"current_steps": 1011, "total_steps": 2069, "loss": 0.5302, "lr": 5.610007902391387e-06, "epoch": 0.4887599709934735, "percentage": 48.86, "elapsed_time": "17:26:19", "remaining_time": "18:14:57"} +{"current_steps": 1012, "total_steps": 2069, "loss": 0.5319, "lr": 5.602072966713389e-06, "epoch": 0.4892434131012811, "percentage": 48.91, "elapsed_time": "17:27:17", "remaining_time": "18:13:52"} +{"current_steps": 1013, "total_steps": 2069, "loss": 0.533, "lr": 5.594136492088363e-06, "epoch": 0.4897268552090887, "percentage": 48.96, "elapsed_time": "17:28:20", "remaining_time": "18:12:50"} +{"current_steps": 1014, "total_steps": 2069, "loss": 0.5207, "lr": 5.586198498802577e-06, "epoch": 0.4902102973168963, "percentage": 49.01, "elapsed_time": "17:29:24", "remaining_time": "18:11:49"} +{"current_steps": 1015, "total_steps": 2069, "loss": 0.5182, "lr": 5.578259007146183e-06, "epoch": 0.4906937394247039, "percentage": 49.06, "elapsed_time": "17:30:28", "remaining_time": "18:10:50"} +{"current_steps": 1016, "total_steps": 2069, "loss": 0.5335, "lr": 5.570318037413162e-06, "epoch": 0.4911771815325115, "percentage": 49.11, "elapsed_time": "17:31:30", "remaining_time": "18:09:47"} +{"current_steps": 1017, "total_steps": 2069, "loss": 0.498, "lr": 5.562375609901273e-06, "epoch": 0.4916606236403191, "percentage": 49.15, "elapsed_time": "17:32:35", "remaining_time": "18:08:48"} +{"current_steps": 1018, "total_steps": 2069, "loss": 0.535, "lr": 5.5544317449119975e-06, "epoch": 0.49214406574812664, "percentage": 49.2, "elapsed_time": "17:33:37", "remaining_time": "18:07:47"} +{"current_steps": 1019, "total_steps": 2069, "loss": 0.529, "lr": 5.546486462750499e-06, "epoch": 0.49262750785593423, "percentage": 49.25, "elapsed_time": "17:34:35", "remaining_time": "18:06:40"} +{"current_steps": 1020, "total_steps": 2069, "loss": 0.5415, "lr": 5.538539783725556e-06, "epoch": 0.49311094996374183, "percentage": 49.3, "elapsed_time": "17:35:38", "remaining_time": "18:05:39"} +{"current_steps": 1021, "total_steps": 2069, "loss": 0.5237, "lr": 5.530591728149522e-06, "epoch": 0.4935943920715494, "percentage": 49.35, "elapsed_time": "17:36:43", "remaining_time": "18:04:40"} +{"current_steps": 1022, "total_steps": 2069, "loss": 0.5275, "lr": 5.522642316338268e-06, "epoch": 0.494077834179357, "percentage": 49.4, "elapsed_time": "17:37:48", "remaining_time": "18:03:41"} +{"current_steps": 1023, "total_steps": 2069, "loss": 0.5279, "lr": 5.51469156861113e-06, "epoch": 0.4945612762871646, "percentage": 49.44, "elapsed_time": "17:38:50", "remaining_time": "18:02:38"} +{"current_steps": 1024, "total_steps": 2069, "loss": 0.5261, "lr": 5.50673950529086e-06, "epoch": 0.4950447183949722, "percentage": 49.49, "elapsed_time": "17:39:49", "remaining_time": "18:01:33"} +{"current_steps": 1025, "total_steps": 2069, "loss": 0.5392, "lr": 5.498786146703575e-06, "epoch": 0.4955281605027798, "percentage": 49.54, "elapsed_time": "17:40:50", "remaining_time": "18:00:30"} +{"current_steps": 1026, "total_steps": 2069, "loss": 0.513, "lr": 5.490831513178698e-06, "epoch": 0.4960116026105874, "percentage": 49.59, "elapsed_time": "17:41:51", "remaining_time": "17:59:27"} +{"current_steps": 1027, "total_steps": 2069, "loss": 0.5342, "lr": 5.482875625048916e-06, "epoch": 0.49649504471839495, "percentage": 49.64, "elapsed_time": "17:42:49", "remaining_time": "17:58:21"} +{"current_steps": 1028, "total_steps": 2069, "loss": 0.5371, "lr": 5.474918502650116e-06, "epoch": 0.49697848682620255, "percentage": 49.69, "elapsed_time": "17:43:50", "remaining_time": "17:57:18"} +{"current_steps": 1029, "total_steps": 2069, "loss": 0.5248, "lr": 5.466960166321348e-06, "epoch": 0.49746192893401014, "percentage": 49.73, "elapsed_time": "17:44:52", "remaining_time": "17:56:15"} +{"current_steps": 1030, "total_steps": 2069, "loss": 0.5236, "lr": 5.459000636404759e-06, "epoch": 0.49794537104181774, "percentage": 49.78, "elapsed_time": "17:45:53", "remaining_time": "17:55:12"} +{"current_steps": 1031, "total_steps": 2069, "loss": 0.5342, "lr": 5.451039933245551e-06, "epoch": 0.49842881314962534, "percentage": 49.83, "elapsed_time": "17:46:52", "remaining_time": "17:54:07"} +{"current_steps": 1032, "total_steps": 2069, "loss": 0.5249, "lr": 5.44307807719192e-06, "epoch": 0.49891225525743294, "percentage": 49.88, "elapsed_time": "17:47:55", "remaining_time": "17:53:05"} +{"current_steps": 1033, "total_steps": 2069, "loss": 0.4997, "lr": 5.435115088595016e-06, "epoch": 0.49939569736524053, "percentage": 49.93, "elapsed_time": "17:48:57", "remaining_time": "17:52:04"} +{"current_steps": 1034, "total_steps": 2069, "loss": 0.5263, "lr": 5.4271509878088755e-06, "epoch": 0.49987913947304813, "percentage": 49.98, "elapsed_time": "17:49:59", "remaining_time": "17:51:01"} +{"current_steps": 1035, "total_steps": 2069, "loss": 0.503, "lr": 5.4191857951903825e-06, "epoch": 0.5003625815808557, "percentage": 50.02, "elapsed_time": "17:51:02", "remaining_time": "17:50:00"} +{"current_steps": 1036, "total_steps": 2069, "loss": 0.5228, "lr": 5.4112195310992144e-06, "epoch": 0.5008460236886633, "percentage": 50.07, "elapsed_time": "17:52:00", "remaining_time": "17:48:53"} +{"current_steps": 1037, "total_steps": 2069, "loss": 0.5295, "lr": 5.403252215897781e-06, "epoch": 0.5013294657964709, "percentage": 50.12, "elapsed_time": "17:52:57", "remaining_time": "17:47:47"} +{"current_steps": 1038, "total_steps": 2069, "loss": 0.5402, "lr": 5.395283869951184e-06, "epoch": 0.5018129079042785, "percentage": 50.17, "elapsed_time": "17:54:02", "remaining_time": "17:46:48"} +{"current_steps": 1039, "total_steps": 2069, "loss": 0.5228, "lr": 5.387314513627156e-06, "epoch": 0.5022963500120861, "percentage": 50.22, "elapsed_time": "17:55:05", "remaining_time": "17:45:46"} +{"current_steps": 1040, "total_steps": 2069, "loss": 0.5302, "lr": 5.379344167296017e-06, "epoch": 0.5027797921198937, "percentage": 50.27, "elapsed_time": "17:56:06", "remaining_time": "17:44:43"} +{"current_steps": 1041, "total_steps": 2069, "loss": 0.5337, "lr": 5.371372851330612e-06, "epoch": 0.5032632342277013, "percentage": 50.31, "elapsed_time": "17:58:20", "remaining_time": "17:44:52"} +{"current_steps": 1042, "total_steps": 2069, "loss": 0.5348, "lr": 5.3634005861062675e-06, "epoch": 0.5037466763355088, "percentage": 50.36, "elapsed_time": "17:59:21", "remaining_time": "17:43:49"} +{"current_steps": 1043, "total_steps": 2069, "loss": 0.5367, "lr": 5.355427392000736e-06, "epoch": 0.5042301184433164, "percentage": 50.41, "elapsed_time": "18:00:24", "remaining_time": "17:42:47"} +{"current_steps": 1044, "total_steps": 2069, "loss": 0.5236, "lr": 5.347453289394146e-06, "epoch": 0.504713560551124, "percentage": 50.46, "elapsed_time": "18:01:24", "remaining_time": "17:41:43"} +{"current_steps": 1045, "total_steps": 2069, "loss": 0.5374, "lr": 5.339478298668943e-06, "epoch": 0.5051970026589316, "percentage": 50.51, "elapsed_time": "18:02:25", "remaining_time": "17:40:40"} +{"current_steps": 1046, "total_steps": 2069, "loss": 0.529, "lr": 5.331502440209849e-06, "epoch": 0.5056804447667392, "percentage": 50.56, "elapsed_time": "18:03:26", "remaining_time": "17:39:37"} +{"current_steps": 1047, "total_steps": 2069, "loss": 0.5363, "lr": 5.3235257344037996e-06, "epoch": 0.5061638868745467, "percentage": 50.6, "elapsed_time": "18:04:29", "remaining_time": "17:38:35"} +{"current_steps": 1048, "total_steps": 2069, "loss": 0.5335, "lr": 5.3155482016398995e-06, "epoch": 0.5066473289823543, "percentage": 50.65, "elapsed_time": "18:05:30", "remaining_time": "17:37:32"} +{"current_steps": 1049, "total_steps": 2069, "loss": 0.5269, "lr": 5.307569862309363e-06, "epoch": 0.5071307710901619, "percentage": 50.7, "elapsed_time": "18:06:36", "remaining_time": "17:36:34"} +{"current_steps": 1050, "total_steps": 2069, "loss": 0.5269, "lr": 5.29959073680547e-06, "epoch": 0.5076142131979695, "percentage": 50.75, "elapsed_time": "18:07:35", "remaining_time": "17:35:28"} +{"current_steps": 1051, "total_steps": 2069, "loss": 0.5247, "lr": 5.2916108455235084e-06, "epoch": 0.5080976553057771, "percentage": 50.8, "elapsed_time": "18:08:38", "remaining_time": "17:34:27"} +{"current_steps": 1052, "total_steps": 2069, "loss": 0.5257, "lr": 5.2836302088607235e-06, "epoch": 0.5085810974135847, "percentage": 50.85, "elapsed_time": "18:09:35", "remaining_time": "17:33:20"} +{"current_steps": 1053, "total_steps": 2069, "loss": 0.5326, "lr": 5.275648847216263e-06, "epoch": 0.5090645395213923, "percentage": 50.89, "elapsed_time": "18:10:38", "remaining_time": "17:32:19"} +{"current_steps": 1054, "total_steps": 2069, "loss": 0.5384, "lr": 5.267666780991135e-06, "epoch": 0.5095479816291999, "percentage": 50.94, "elapsed_time": "18:11:38", "remaining_time": "17:31:15"} +{"current_steps": 1055, "total_steps": 2069, "loss": 0.5217, "lr": 5.259684030588141e-06, "epoch": 0.5100314237370075, "percentage": 50.99, "elapsed_time": "18:12:39", "remaining_time": "17:30:11"} +{"current_steps": 1056, "total_steps": 2069, "loss": 0.5292, "lr": 5.251700616411836e-06, "epoch": 0.5105148658448151, "percentage": 51.04, "elapsed_time": "18:13:46", "remaining_time": "17:29:14"} +{"current_steps": 1057, "total_steps": 2069, "loss": 0.5335, "lr": 5.243716558868469e-06, "epoch": 0.5109983079526227, "percentage": 51.09, "elapsed_time": "18:14:46", "remaining_time": "17:28:10"} +{"current_steps": 1058, "total_steps": 2069, "loss": 0.5366, "lr": 5.235731878365935e-06, "epoch": 0.5114817500604303, "percentage": 51.14, "elapsed_time": "18:15:49", "remaining_time": "17:27:08"} +{"current_steps": 1059, "total_steps": 2069, "loss": 0.5343, "lr": 5.22774659531372e-06, "epoch": 0.5119651921682379, "percentage": 51.18, "elapsed_time": "18:16:50", "remaining_time": "17:26:05"} +{"current_steps": 1060, "total_steps": 2069, "loss": 0.5318, "lr": 5.219760730122854e-06, "epoch": 0.5124486342760455, "percentage": 51.23, "elapsed_time": "18:17:55", "remaining_time": "17:25:06"} +{"current_steps": 1061, "total_steps": 2069, "loss": 0.5055, "lr": 5.211774303205849e-06, "epoch": 0.5129320763838531, "percentage": 51.28, "elapsed_time": "18:19:01", "remaining_time": "17:24:07"} +{"current_steps": 1062, "total_steps": 2069, "loss": 0.5015, "lr": 5.203787334976655e-06, "epoch": 0.5134155184916607, "percentage": 51.33, "elapsed_time": "18:20:02", "remaining_time": "17:23:04"} +{"current_steps": 1063, "total_steps": 2069, "loss": 0.525, "lr": 5.195799845850611e-06, "epoch": 0.5138989605994683, "percentage": 51.38, "elapsed_time": "18:21:01", "remaining_time": "17:21:59"} +{"current_steps": 1064, "total_steps": 2069, "loss": 0.5265, "lr": 5.18781185624438e-06, "epoch": 0.5143824027072758, "percentage": 51.43, "elapsed_time": "18:22:01", "remaining_time": "17:20:54"} +{"current_steps": 1065, "total_steps": 2069, "loss": 0.5311, "lr": 5.179823386575908e-06, "epoch": 0.5148658448150834, "percentage": 51.47, "elapsed_time": "18:23:01", "remaining_time": "17:19:50"} +{"current_steps": 1066, "total_steps": 2069, "loss": 0.5286, "lr": 5.171834457264364e-06, "epoch": 0.515349286922891, "percentage": 51.52, "elapsed_time": "18:24:04", "remaining_time": "17:18:49"} +{"current_steps": 1067, "total_steps": 2069, "loss": 0.5282, "lr": 5.1638450887301006e-06, "epoch": 0.5158327290306985, "percentage": 51.57, "elapsed_time": "18:25:04", "remaining_time": "17:17:45"} +{"current_steps": 1068, "total_steps": 2069, "loss": 0.527, "lr": 5.155855301394585e-06, "epoch": 0.5163161711385061, "percentage": 51.62, "elapsed_time": "18:26:05", "remaining_time": "17:16:42"} +{"current_steps": 1069, "total_steps": 2069, "loss": 0.5289, "lr": 5.147865115680357e-06, "epoch": 0.5167996132463137, "percentage": 51.67, "elapsed_time": "18:27:04", "remaining_time": "17:15:37"} +{"current_steps": 1070, "total_steps": 2069, "loss": 0.531, "lr": 5.139874552010975e-06, "epoch": 0.5172830553541213, "percentage": 51.72, "elapsed_time": "18:28:05", "remaining_time": "17:14:33"} +{"current_steps": 1071, "total_steps": 2069, "loss": 0.5428, "lr": 5.131883630810966e-06, "epoch": 0.5177664974619289, "percentage": 51.76, "elapsed_time": "18:29:07", "remaining_time": "17:13:31"} +{"current_steps": 1072, "total_steps": 2069, "loss": 0.524, "lr": 5.123892372505768e-06, "epoch": 0.5182499395697365, "percentage": 51.81, "elapsed_time": "18:30:08", "remaining_time": "17:12:28"} +{"current_steps": 1073, "total_steps": 2069, "loss": 0.5337, "lr": 5.11590079752168e-06, "epoch": 0.5187333816775441, "percentage": 51.86, "elapsed_time": "18:31:10", "remaining_time": "17:11:26"} +{"current_steps": 1074, "total_steps": 2069, "loss": 0.5247, "lr": 5.107908926285813e-06, "epoch": 0.5192168237853517, "percentage": 51.91, "elapsed_time": "18:32:12", "remaining_time": "17:10:23"} +{"current_steps": 1075, "total_steps": 2069, "loss": 0.5314, "lr": 5.099916779226032e-06, "epoch": 0.5197002658931593, "percentage": 51.96, "elapsed_time": "18:33:17", "remaining_time": "17:09:23"} +{"current_steps": 1076, "total_steps": 2069, "loss": 0.5267, "lr": 5.091924376770912e-06, "epoch": 0.5201837080009669, "percentage": 52.01, "elapsed_time": "18:34:18", "remaining_time": "17:08:21"} +{"current_steps": 1077, "total_steps": 2069, "loss": 0.5227, "lr": 5.083931739349675e-06, "epoch": 0.5206671501087745, "percentage": 52.05, "elapsed_time": "18:35:21", "remaining_time": "17:07:20"} +{"current_steps": 1078, "total_steps": 2069, "loss": 0.5148, "lr": 5.075938887392149e-06, "epoch": 0.5211505922165821, "percentage": 52.1, "elapsed_time": "18:36:27", "remaining_time": "17:06:21"} +{"current_steps": 1079, "total_steps": 2069, "loss": 0.5168, "lr": 5.0679458413287055e-06, "epoch": 0.5216340343243897, "percentage": 52.15, "elapsed_time": "18:37:27", "remaining_time": "17:05:17"} +{"current_steps": 1080, "total_steps": 2069, "loss": 0.5274, "lr": 5.059952621590216e-06, "epoch": 0.5221174764321972, "percentage": 52.2, "elapsed_time": "18:38:25", "remaining_time": "17:04:11"} +{"current_steps": 1081, "total_steps": 2069, "loss": 0.5251, "lr": 5.051959248607993e-06, "epoch": 0.5226009185400048, "percentage": 52.25, "elapsed_time": "18:39:27", "remaining_time": "17:03:09"} +{"current_steps": 1082, "total_steps": 2069, "loss": 0.5246, "lr": 5.043965742813744e-06, "epoch": 0.5230843606478124, "percentage": 52.3, "elapsed_time": "18:40:27", "remaining_time": "17:02:05"} +{"current_steps": 1083, "total_steps": 2069, "loss": 0.5299, "lr": 5.035972124639511e-06, "epoch": 0.52356780275562, "percentage": 52.34, "elapsed_time": "18:41:33", "remaining_time": "17:01:05"} +{"current_steps": 1084, "total_steps": 2069, "loss": 0.5273, "lr": 5.02797841451763e-06, "epoch": 0.5240512448634276, "percentage": 52.39, "elapsed_time": "18:42:34", "remaining_time": "17:00:03"} +{"current_steps": 1085, "total_steps": 2069, "loss": 0.5342, "lr": 5.019984632880665e-06, "epoch": 0.5245346869712352, "percentage": 52.44, "elapsed_time": "18:43:32", "remaining_time": "16:58:57"} +{"current_steps": 1086, "total_steps": 2069, "loss": 0.5314, "lr": 5.011990800161369e-06, "epoch": 0.5250181290790428, "percentage": 52.49, "elapsed_time": "18:44:32", "remaining_time": "16:57:53"} +{"current_steps": 1087, "total_steps": 2069, "loss": 0.5291, "lr": 5.00399693679262e-06, "epoch": 0.5255015711868504, "percentage": 52.54, "elapsed_time": "18:45:35", "remaining_time": "16:56:52"} +{"current_steps": 1088, "total_steps": 2069, "loss": 0.4852, "lr": 4.9960030632073815e-06, "epoch": 0.525985013294658, "percentage": 52.59, "elapsed_time": "18:46:45", "remaining_time": "16:55:56"} +{"current_steps": 1089, "total_steps": 2069, "loss": 0.5266, "lr": 4.988009199838632e-06, "epoch": 0.5264684554024656, "percentage": 52.63, "elapsed_time": "18:47:47", "remaining_time": "16:54:54"} +{"current_steps": 1090, "total_steps": 2069, "loss": 0.5128, "lr": 4.980015367119336e-06, "epoch": 0.5269518975102732, "percentage": 52.68, "elapsed_time": "18:48:51", "remaining_time": "16:53:54"} +{"current_steps": 1091, "total_steps": 2069, "loss": 0.5215, "lr": 4.9720215854823716e-06, "epoch": 0.5274353396180808, "percentage": 52.73, "elapsed_time": "18:49:52", "remaining_time": "16:52:50"} +{"current_steps": 1092, "total_steps": 2069, "loss": 0.529, "lr": 4.96402787536049e-06, "epoch": 0.5279187817258884, "percentage": 52.78, "elapsed_time": "18:50:48", "remaining_time": "16:51:43"} +{"current_steps": 1093, "total_steps": 2069, "loss": 0.5196, "lr": 4.956034257186258e-06, "epoch": 0.528402223833696, "percentage": 52.83, "elapsed_time": "18:51:48", "remaining_time": "16:50:38"} +{"current_steps": 1094, "total_steps": 2069, "loss": 0.527, "lr": 4.9480407513920086e-06, "epoch": 0.5288856659415035, "percentage": 52.88, "elapsed_time": "18:52:48", "remaining_time": "16:49:34"} +{"current_steps": 1095, "total_steps": 2069, "loss": 0.523, "lr": 4.940047378409786e-06, "epoch": 0.5293691080493111, "percentage": 52.92, "elapsed_time": "18:53:46", "remaining_time": "16:48:29"} +{"current_steps": 1096, "total_steps": 2069, "loss": 0.5244, "lr": 4.932054158671295e-06, "epoch": 0.5298525501571187, "percentage": 52.97, "elapsed_time": "18:54:51", "remaining_time": "16:47:29"} +{"current_steps": 1097, "total_steps": 2069, "loss": 0.532, "lr": 4.924061112607853e-06, "epoch": 0.5303359922649262, "percentage": 53.02, "elapsed_time": "18:55:52", "remaining_time": "16:46:26"} +{"current_steps": 1098, "total_steps": 2069, "loss": 0.5226, "lr": 4.9160682606503255e-06, "epoch": 0.5308194343727338, "percentage": 53.07, "elapsed_time": "18:56:51", "remaining_time": "16:45:21"} +{"current_steps": 1099, "total_steps": 2069, "loss": 0.5242, "lr": 4.908075623229089e-06, "epoch": 0.5313028764805414, "percentage": 53.12, "elapsed_time": "18:57:50", "remaining_time": "16:44:17"} +{"current_steps": 1100, "total_steps": 2069, "loss": 0.5082, "lr": 4.900083220773968e-06, "epoch": 0.531786318588349, "percentage": 53.17, "elapsed_time": "18:58:51", "remaining_time": "16:43:14"} +{"current_steps": 1101, "total_steps": 2069, "loss": 0.5162, "lr": 4.892091073714189e-06, "epoch": 0.5322697606961566, "percentage": 53.21, "elapsed_time": "18:59:54", "remaining_time": "16:42:12"} +{"current_steps": 1102, "total_steps": 2069, "loss": 0.5287, "lr": 4.88409920247832e-06, "epoch": 0.5327532028039642, "percentage": 53.26, "elapsed_time": "19:00:55", "remaining_time": "16:41:09"} +{"current_steps": 1103, "total_steps": 2069, "loss": 0.5224, "lr": 4.876107627494234e-06, "epoch": 0.5332366449117718, "percentage": 53.31, "elapsed_time": "19:01:57", "remaining_time": "16:40:07"} +{"current_steps": 1104, "total_steps": 2069, "loss": 0.519, "lr": 4.868116369189033e-06, "epoch": 0.5337200870195794, "percentage": 53.36, "elapsed_time": "19:03:00", "remaining_time": "16:39:05"} +{"current_steps": 1105, "total_steps": 2069, "loss": 0.5315, "lr": 4.860125447989026e-06, "epoch": 0.534203529127387, "percentage": 53.41, "elapsed_time": "19:04:00", "remaining_time": "16:38:01"} +{"current_steps": 1106, "total_steps": 2069, "loss": 0.5013, "lr": 4.852134884319646e-06, "epoch": 0.5346869712351946, "percentage": 53.46, "elapsed_time": "19:05:06", "remaining_time": "16:37:03"} +{"current_steps": 1107, "total_steps": 2069, "loss": 0.5163, "lr": 4.844144698605418e-06, "epoch": 0.5351704133430022, "percentage": 53.5, "elapsed_time": "19:06:08", "remaining_time": "16:36:00"} +{"current_steps": 1108, "total_steps": 2069, "loss": 0.5216, "lr": 4.836154911269902e-06, "epoch": 0.5356538554508098, "percentage": 53.55, "elapsed_time": "19:07:10", "remaining_time": "16:34:58"} +{"current_steps": 1109, "total_steps": 2069, "loss": 0.5248, "lr": 4.8281655427356375e-06, "epoch": 0.5361372975586174, "percentage": 53.6, "elapsed_time": "19:08:13", "remaining_time": "16:33:57"} +{"current_steps": 1110, "total_steps": 2069, "loss": 0.5302, "lr": 4.820176613424095e-06, "epoch": 0.536620739666425, "percentage": 53.65, "elapsed_time": "19:09:16", "remaining_time": "16:32:55"} +{"current_steps": 1111, "total_steps": 2069, "loss": 0.5214, "lr": 4.812188143755621e-06, "epoch": 0.5371041817742326, "percentage": 53.7, "elapsed_time": "19:10:13", "remaining_time": "16:31:49"} +{"current_steps": 1112, "total_steps": 2069, "loss": 0.5305, "lr": 4.80420015414939e-06, "epoch": 0.5375876238820402, "percentage": 53.75, "elapsed_time": "19:11:17", "remaining_time": "16:30:48"} +{"current_steps": 1113, "total_steps": 2069, "loss": 0.5237, "lr": 4.796212665023345e-06, "epoch": 0.5380710659898477, "percentage": 53.79, "elapsed_time": "19:12:18", "remaining_time": "16:29:45"} +{"current_steps": 1114, "total_steps": 2069, "loss": 0.5277, "lr": 4.788225696794153e-06, "epoch": 0.5385545080976553, "percentage": 53.84, "elapsed_time": "19:13:16", "remaining_time": "16:28:40"} +{"current_steps": 1115, "total_steps": 2069, "loss": 0.5313, "lr": 4.780239269877147e-06, "epoch": 0.5390379502054629, "percentage": 53.89, "elapsed_time": "19:14:13", "remaining_time": "16:27:33"} +{"current_steps": 1116, "total_steps": 2069, "loss": 0.5231, "lr": 4.7722534046862805e-06, "epoch": 0.5395213923132705, "percentage": 53.94, "elapsed_time": "19:15:14", "remaining_time": "16:26:30"} +{"current_steps": 1117, "total_steps": 2069, "loss": 0.507, "lr": 4.764268121634066e-06, "epoch": 0.540004834421078, "percentage": 53.99, "elapsed_time": "19:16:21", "remaining_time": "16:25:32"} +{"current_steps": 1118, "total_steps": 2069, "loss": 0.5042, "lr": 4.7562834411315324e-06, "epoch": 0.5404882765288856, "percentage": 54.04, "elapsed_time": "19:17:28", "remaining_time": "16:24:35"} +{"current_steps": 1119, "total_steps": 2069, "loss": 0.5258, "lr": 4.748299383588167e-06, "epoch": 0.5409717186366932, "percentage": 54.08, "elapsed_time": "19:18:25", "remaining_time": "16:23:28"} +{"current_steps": 1120, "total_steps": 2069, "loss": 0.5375, "lr": 4.74031596941186e-06, "epoch": 0.5414551607445008, "percentage": 54.13, "elapsed_time": "19:19:27", "remaining_time": "16:22:26"} +{"current_steps": 1121, "total_steps": 2069, "loss": 0.5199, "lr": 4.7323332190088675e-06, "epoch": 0.5419386028523084, "percentage": 54.18, "elapsed_time": "19:20:27", "remaining_time": "16:21:22"} +{"current_steps": 1122, "total_steps": 2069, "loss": 0.5251, "lr": 4.7243511527837374e-06, "epoch": 0.542422044960116, "percentage": 54.23, "elapsed_time": "19:21:25", "remaining_time": "16:20:16"} +{"current_steps": 1123, "total_steps": 2069, "loss": 0.5308, "lr": 4.716369791139279e-06, "epoch": 0.5429054870679236, "percentage": 54.28, "elapsed_time": "19:22:26", "remaining_time": "16:19:13"} +{"current_steps": 1124, "total_steps": 2069, "loss": 0.5201, "lr": 4.708389154476492e-06, "epoch": 0.5433889291757312, "percentage": 54.33, "elapsed_time": "19:23:31", "remaining_time": "16:18:13"} +{"current_steps": 1125, "total_steps": 2069, "loss": 0.5258, "lr": 4.7004092631945315e-06, "epoch": 0.5438723712835388, "percentage": 54.37, "elapsed_time": "19:24:33", "remaining_time": "16:17:11"} +{"current_steps": 1126, "total_steps": 2069, "loss": 0.5222, "lr": 4.692430137690638e-06, "epoch": 0.5443558133913464, "percentage": 54.42, "elapsed_time": "19:25:27", "remaining_time": "16:16:02"} +{"current_steps": 1127, "total_steps": 2069, "loss": 0.5204, "lr": 4.684451798360102e-06, "epoch": 0.544839255499154, "percentage": 54.47, "elapsed_time": "19:26:29", "remaining_time": "16:15:00"} +{"current_steps": 1128, "total_steps": 2069, "loss": 0.5255, "lr": 4.6764742655962e-06, "epoch": 0.5453226976069616, "percentage": 54.52, "elapsed_time": "19:27:31", "remaining_time": "16:13:58"} +{"current_steps": 1129, "total_steps": 2069, "loss": 0.5275, "lr": 4.6684975597901526e-06, "epoch": 0.5458061397147692, "percentage": 54.57, "elapsed_time": "19:28:34", "remaining_time": "16:12:56"} +{"current_steps": 1130, "total_steps": 2069, "loss": 0.5046, "lr": 4.660521701331058e-06, "epoch": 0.5462895818225767, "percentage": 54.62, "elapsed_time": "19:29:41", "remaining_time": "16:11:58"} +{"current_steps": 1131, "total_steps": 2069, "loss": 0.5284, "lr": 4.652546710605857e-06, "epoch": 0.5467730239303843, "percentage": 54.66, "elapsed_time": "19:30:44", "remaining_time": "16:10:57"} +{"current_steps": 1132, "total_steps": 2069, "loss": 0.5234, "lr": 4.644572607999267e-06, "epoch": 0.5472564660381919, "percentage": 54.71, "elapsed_time": "19:31:46", "remaining_time": "16:09:55"} +{"current_steps": 1133, "total_steps": 2069, "loss": 0.5149, "lr": 4.636599413893734e-06, "epoch": 0.5477399081459995, "percentage": 54.76, "elapsed_time": "19:32:50", "remaining_time": "16:08:55"} +{"current_steps": 1134, "total_steps": 2069, "loss": 0.5069, "lr": 4.628627148669391e-06, "epoch": 0.5482233502538071, "percentage": 54.81, "elapsed_time": "19:33:59", "remaining_time": "16:07:58"} +{"current_steps": 1135, "total_steps": 2069, "loss": 0.5232, "lr": 4.620655832703984e-06, "epoch": 0.5487067923616147, "percentage": 54.86, "elapsed_time": "19:35:03", "remaining_time": "16:06:58"} +{"current_steps": 1136, "total_steps": 2069, "loss": 0.5284, "lr": 4.612685486372846e-06, "epoch": 0.5491902344694223, "percentage": 54.91, "elapsed_time": "19:36:07", "remaining_time": "16:05:57"} +{"current_steps": 1137, "total_steps": 2069, "loss": 0.5292, "lr": 4.604716130048818e-06, "epoch": 0.5496736765772299, "percentage": 54.95, "elapsed_time": "19:37:09", "remaining_time": "16:04:54"} +{"current_steps": 1138, "total_steps": 2069, "loss": 0.5296, "lr": 4.596747784102221e-06, "epoch": 0.5501571186850375, "percentage": 55.0, "elapsed_time": "19:38:05", "remaining_time": "16:03:48"} +{"current_steps": 1139, "total_steps": 2069, "loss": 0.5342, "lr": 4.588780468900787e-06, "epoch": 0.5506405607928451, "percentage": 55.05, "elapsed_time": "19:39:09", "remaining_time": "16:02:47"} +{"current_steps": 1140, "total_steps": 2069, "loss": 0.5278, "lr": 4.580814204809618e-06, "epoch": 0.5511240029006527, "percentage": 55.1, "elapsed_time": "19:40:12", "remaining_time": "16:01:45"} +{"current_steps": 1141, "total_steps": 2069, "loss": 0.5274, "lr": 4.572849012191126e-06, "epoch": 0.5516074450084603, "percentage": 55.15, "elapsed_time": "19:41:14", "remaining_time": "16:00:43"} +{"current_steps": 1142, "total_steps": 2069, "loss": 0.5308, "lr": 4.564884911404986e-06, "epoch": 0.5520908871162679, "percentage": 55.2, "elapsed_time": "19:42:13", "remaining_time": "15:59:39"} +{"current_steps": 1143, "total_steps": 2069, "loss": 0.5228, "lr": 4.5569219228080805e-06, "epoch": 0.5525743292240755, "percentage": 55.24, "elapsed_time": "19:43:12", "remaining_time": "15:58:34"} +{"current_steps": 1144, "total_steps": 2069, "loss": 0.5001, "lr": 4.54896006675445e-06, "epoch": 0.553057771331883, "percentage": 55.29, "elapsed_time": "19:44:18", "remaining_time": "15:57:35"} +{"current_steps": 1145, "total_steps": 2069, "loss": 0.4963, "lr": 4.540999363595242e-06, "epoch": 0.5535412134396906, "percentage": 55.34, "elapsed_time": "19:45:24", "remaining_time": "15:56:36"} +{"current_steps": 1146, "total_steps": 2069, "loss": 0.5277, "lr": 4.5330398336786526e-06, "epoch": 0.5540246555474981, "percentage": 55.39, "elapsed_time": "19:46:28", "remaining_time": "15:55:35"} +{"current_steps": 1147, "total_steps": 2069, "loss": 0.5427, "lr": 4.525081497349887e-06, "epoch": 0.5545080976553057, "percentage": 55.44, "elapsed_time": "19:47:31", "remaining_time": "15:54:34"} +{"current_steps": 1148, "total_steps": 2069, "loss": 0.5178, "lr": 4.517124374951086e-06, "epoch": 0.5549915397631133, "percentage": 55.49, "elapsed_time": "19:48:32", "remaining_time": "15:53:31"} +{"current_steps": 1149, "total_steps": 2069, "loss": 0.5225, "lr": 4.509168486821304e-06, "epoch": 0.5554749818709209, "percentage": 55.53, "elapsed_time": "19:49:34", "remaining_time": "15:52:29"} +{"current_steps": 1150, "total_steps": 2069, "loss": 0.523, "lr": 4.501213853296425e-06, "epoch": 0.5559584239787285, "percentage": 55.58, "elapsed_time": "19:50:32", "remaining_time": "15:51:24"} +{"current_steps": 1151, "total_steps": 2069, "loss": 0.5251, "lr": 4.493260494709141e-06, "epoch": 0.5564418660865361, "percentage": 55.63, "elapsed_time": "19:51:40", "remaining_time": "15:50:26"} +{"current_steps": 1152, "total_steps": 2069, "loss": 0.5419, "lr": 4.48530843138887e-06, "epoch": 0.5569253081943437, "percentage": 55.68, "elapsed_time": "19:52:35", "remaining_time": "15:49:18"} +{"current_steps": 1153, "total_steps": 2069, "loss": 0.5318, "lr": 4.477357683661734e-06, "epoch": 0.5574087503021513, "percentage": 55.73, "elapsed_time": "19:53:35", "remaining_time": "15:48:14"} +{"current_steps": 1154, "total_steps": 2069, "loss": 0.506, "lr": 4.469408271850479e-06, "epoch": 0.5578921924099589, "percentage": 55.78, "elapsed_time": "19:54:41", "remaining_time": "15:47:15"} +{"current_steps": 1155, "total_steps": 2069, "loss": 0.5152, "lr": 4.4614602162744455e-06, "epoch": 0.5583756345177665, "percentage": 55.82, "elapsed_time": "19:55:42", "remaining_time": "15:46:13"} +{"current_steps": 1156, "total_steps": 2069, "loss": 0.527, "lr": 4.453513537249503e-06, "epoch": 0.5588590766255741, "percentage": 55.87, "elapsed_time": "19:56:44", "remaining_time": "15:45:10"} +{"current_steps": 1157, "total_steps": 2069, "loss": 0.5247, "lr": 4.445568255088003e-06, "epoch": 0.5593425187333817, "percentage": 55.92, "elapsed_time": "19:57:46", "remaining_time": "15:44:08"} +{"current_steps": 1158, "total_steps": 2069, "loss": 0.5232, "lr": 4.4376243900987296e-06, "epoch": 0.5598259608411893, "percentage": 55.97, "elapsed_time": "19:58:47", "remaining_time": "15:43:05"} +{"current_steps": 1159, "total_steps": 2069, "loss": 0.5365, "lr": 4.429681962586839e-06, "epoch": 0.5603094029489969, "percentage": 56.02, "elapsed_time": "19:59:45", "remaining_time": "15:42:00"} +{"current_steps": 1160, "total_steps": 2069, "loss": 0.5311, "lr": 4.421740992853818e-06, "epoch": 0.5607928450568045, "percentage": 56.07, "elapsed_time": "20:00:45", "remaining_time": "15:40:56"} +{"current_steps": 1161, "total_steps": 2069, "loss": 0.5192, "lr": 4.413801501197424e-06, "epoch": 0.5612762871646121, "percentage": 56.11, "elapsed_time": "20:01:43", "remaining_time": "15:39:51"} +{"current_steps": 1162, "total_steps": 2069, "loss": 0.5007, "lr": 4.405863507911638e-06, "epoch": 0.5617597292724196, "percentage": 56.16, "elapsed_time": "20:02:50", "remaining_time": "15:38:52"} +{"current_steps": 1163, "total_steps": 2069, "loss": 0.4867, "lr": 4.3979270332866105e-06, "epoch": 0.5622431713802272, "percentage": 56.21, "elapsed_time": "20:03:52", "remaining_time": "15:37:50"} +{"current_steps": 1164, "total_steps": 2069, "loss": 0.5271, "lr": 4.389992097608613e-06, "epoch": 0.5627266134880348, "percentage": 56.26, "elapsed_time": "20:04:53", "remaining_time": "15:36:47"} +{"current_steps": 1165, "total_steps": 2069, "loss": 0.5292, "lr": 4.3820587211599745e-06, "epoch": 0.5632100555958424, "percentage": 56.31, "elapsed_time": "20:05:55", "remaining_time": "15:35:45"} +{"current_steps": 1166, "total_steps": 2069, "loss": 0.5197, "lr": 4.374126924219052e-06, "epoch": 0.56369349770365, "percentage": 56.36, "elapsed_time": "20:06:56", "remaining_time": "15:34:42"} +{"current_steps": 1167, "total_steps": 2069, "loss": 0.5196, "lr": 4.366196727060152e-06, "epoch": 0.5641769398114576, "percentage": 56.4, "elapsed_time": "20:07:59", "remaining_time": "15:33:41"} +{"current_steps": 1168, "total_steps": 2069, "loss": 0.5206, "lr": 4.3582681499535e-06, "epoch": 0.5646603819192652, "percentage": 56.45, "elapsed_time": "20:09:02", "remaining_time": "15:32:39"} +{"current_steps": 1169, "total_steps": 2069, "loss": 0.5341, "lr": 4.3503412131651765e-06, "epoch": 0.5651438240270727, "percentage": 56.5, "elapsed_time": "20:10:06", "remaining_time": "15:31:38"} +{"current_steps": 1170, "total_steps": 2069, "loss": 0.5225, "lr": 4.342415936957073e-06, "epoch": 0.5656272661348803, "percentage": 56.55, "elapsed_time": "20:11:10", "remaining_time": "15:30:38"} +{"current_steps": 1171, "total_steps": 2069, "loss": 0.5328, "lr": 4.334492341586833e-06, "epoch": 0.5661107082426879, "percentage": 56.6, "elapsed_time": "20:12:13", "remaining_time": "15:29:37"} +{"current_steps": 1172, "total_steps": 2069, "loss": 0.5024, "lr": 4.326570447307804e-06, "epoch": 0.5665941503504955, "percentage": 56.65, "elapsed_time": "20:13:15", "remaining_time": "15:28:34"} +{"current_steps": 1173, "total_steps": 2069, "loss": 0.5302, "lr": 4.318650274368989e-06, "epoch": 0.5670775924583031, "percentage": 56.69, "elapsed_time": "20:14:19", "remaining_time": "15:27:33"} +{"current_steps": 1174, "total_steps": 2069, "loss": 0.518, "lr": 4.310731843014985e-06, "epoch": 0.5675610345661107, "percentage": 56.74, "elapsed_time": "20:15:22", "remaining_time": "15:26:32"} +{"current_steps": 1175, "total_steps": 2069, "loss": 0.5262, "lr": 4.302815173485944e-06, "epoch": 0.5680444766739183, "percentage": 56.79, "elapsed_time": "20:16:23", "remaining_time": "15:25:29"} +{"current_steps": 1176, "total_steps": 2069, "loss": 0.5249, "lr": 4.294900286017509e-06, "epoch": 0.5685279187817259, "percentage": 56.84, "elapsed_time": "20:17:28", "remaining_time": "15:24:29"} +{"current_steps": 1177, "total_steps": 2069, "loss": 0.5399, "lr": 4.286987200840772e-06, "epoch": 0.5690113608895335, "percentage": 56.89, "elapsed_time": "20:18:29", "remaining_time": "15:23:26"} +{"current_steps": 1178, "total_steps": 2069, "loss": 0.522, "lr": 4.279075938182214e-06, "epoch": 0.5694948029973411, "percentage": 56.94, "elapsed_time": "20:19:32", "remaining_time": "15:22:25"} +{"current_steps": 1179, "total_steps": 2069, "loss": 0.4916, "lr": 4.271166518263662e-06, "epoch": 0.5699782451051486, "percentage": 56.98, "elapsed_time": "20:20:36", "remaining_time": "15:21:24"} +{"current_steps": 1180, "total_steps": 2069, "loss": 0.5297, "lr": 4.263258961302232e-06, "epoch": 0.5704616872129562, "percentage": 57.03, "elapsed_time": "20:21:36", "remaining_time": "15:20:21"} +{"current_steps": 1181, "total_steps": 2069, "loss": 0.524, "lr": 4.255353287510272e-06, "epoch": 0.5709451293207638, "percentage": 57.08, "elapsed_time": "20:22:41", "remaining_time": "15:19:20"} +{"current_steps": 1182, "total_steps": 2069, "loss": 0.5215, "lr": 4.247449517095329e-06, "epoch": 0.5714285714285714, "percentage": 57.13, "elapsed_time": "20:23:44", "remaining_time": "15:18:19"} +{"current_steps": 1183, "total_steps": 2069, "loss": 0.5099, "lr": 4.239547670260069e-06, "epoch": 0.571912013536379, "percentage": 57.18, "elapsed_time": "20:24:47", "remaining_time": "15:17:18"} +{"current_steps": 1184, "total_steps": 2069, "loss": 0.5191, "lr": 4.231647767202254e-06, "epoch": 0.5723954556441866, "percentage": 57.23, "elapsed_time": "20:25:48", "remaining_time": "15:16:14"} +{"current_steps": 1185, "total_steps": 2069, "loss": 0.528, "lr": 4.223749828114672e-06, "epoch": 0.5728788977519942, "percentage": 57.27, "elapsed_time": "20:26:48", "remaining_time": "15:15:11"} +{"current_steps": 1186, "total_steps": 2069, "loss": 0.4862, "lr": 4.215853873185093e-06, "epoch": 0.5733623398598018, "percentage": 57.32, "elapsed_time": "20:27:49", "remaining_time": "15:14:08"} +{"current_steps": 1187, "total_steps": 2069, "loss": 0.5183, "lr": 4.2079599225962115e-06, "epoch": 0.5738457819676094, "percentage": 57.37, "elapsed_time": "20:28:54", "remaining_time": "15:13:08"} +{"current_steps": 1188, "total_steps": 2069, "loss": 0.5045, "lr": 4.2000679965256045e-06, "epoch": 0.574329224075417, "percentage": 57.42, "elapsed_time": "20:29:57", "remaining_time": "15:12:07"} +{"current_steps": 1189, "total_steps": 2069, "loss": 0.4942, "lr": 4.192178115145668e-06, "epoch": 0.5748126661832246, "percentage": 57.47, "elapsed_time": "20:30:59", "remaining_time": "15:11:04"} +{"current_steps": 1190, "total_steps": 2069, "loss": 0.4962, "lr": 4.184290298623578e-06, "epoch": 0.5752961082910322, "percentage": 57.52, "elapsed_time": "20:32:03", "remaining_time": "15:10:04"} +{"current_steps": 1191, "total_steps": 2069, "loss": 0.5397, "lr": 4.176404567121225e-06, "epoch": 0.5757795503988398, "percentage": 57.56, "elapsed_time": "20:33:05", "remaining_time": "15:09:01"} +{"current_steps": 1192, "total_steps": 2069, "loss": 0.5249, "lr": 4.16852094079518e-06, "epoch": 0.5762629925066474, "percentage": 57.61, "elapsed_time": "20:34:05", "remaining_time": "15:07:57"} +{"current_steps": 1193, "total_steps": 2069, "loss": 0.5192, "lr": 4.160639439796624e-06, "epoch": 0.576746434614455, "percentage": 57.66, "elapsed_time": "20:35:08", "remaining_time": "15:06:56"} +{"current_steps": 1194, "total_steps": 2069, "loss": 0.5224, "lr": 4.152760084271305e-06, "epoch": 0.5772298767222626, "percentage": 57.71, "elapsed_time": "20:36:07", "remaining_time": "15:05:51"} +{"current_steps": 1195, "total_steps": 2069, "loss": 0.531, "lr": 4.1448828943595e-06, "epoch": 0.57771331883007, "percentage": 57.76, "elapsed_time": "20:37:07", "remaining_time": "15:04:48"} +{"current_steps": 1196, "total_steps": 2069, "loss": 0.5309, "lr": 4.1370078901959306e-06, "epoch": 0.5781967609378776, "percentage": 57.81, "elapsed_time": "20:38:12", "remaining_time": "15:03:48"} +{"current_steps": 1197, "total_steps": 2069, "loss": 0.5314, "lr": 4.129135091909752e-06, "epoch": 0.5786802030456852, "percentage": 57.85, "elapsed_time": "20:39:12", "remaining_time": "15:02:44"} +{"current_steps": 1198, "total_steps": 2069, "loss": 0.4971, "lr": 4.121264519624463e-06, "epoch": 0.5791636451534928, "percentage": 57.9, "elapsed_time": "20:40:17", "remaining_time": "15:01:45"} +{"current_steps": 1199, "total_steps": 2069, "loss": 0.5421, "lr": 4.113396193457887e-06, "epoch": 0.5796470872613004, "percentage": 57.95, "elapsed_time": "20:41:21", "remaining_time": "15:00:44"} +{"current_steps": 1200, "total_steps": 2069, "loss": 0.5286, "lr": 4.105530133522096e-06, "epoch": 0.580130529369108, "percentage": 58.0, "elapsed_time": "20:42:24", "remaining_time": "14:59:42"} +{"current_steps": 1201, "total_steps": 2069, "loss": 0.5283, "lr": 4.0976663599233745e-06, "epoch": 0.5806139714769156, "percentage": 58.05, "elapsed_time": "20:43:22", "remaining_time": "14:58:37"} +{"current_steps": 1202, "total_steps": 2069, "loss": 0.5031, "lr": 4.08980489276216e-06, "epoch": 0.5810974135847232, "percentage": 58.1, "elapsed_time": "20:44:25", "remaining_time": "14:57:36"} +{"current_steps": 1203, "total_steps": 2069, "loss": 0.5247, "lr": 4.081945752133e-06, "epoch": 0.5815808556925308, "percentage": 58.14, "elapsed_time": "20:45:25", "remaining_time": "14:56:32"} +{"current_steps": 1204, "total_steps": 2069, "loss": 0.5233, "lr": 4.074088958124488e-06, "epoch": 0.5820642978003384, "percentage": 58.19, "elapsed_time": "20:46:26", "remaining_time": "14:55:29"} +{"current_steps": 1205, "total_steps": 2069, "loss": 0.4997, "lr": 4.066234530819222e-06, "epoch": 0.582547739908146, "percentage": 58.24, "elapsed_time": "20:47:30", "remaining_time": "14:54:28"} +{"current_steps": 1206, "total_steps": 2069, "loss": 0.5327, "lr": 4.058382490293755e-06, "epoch": 0.5830311820159536, "percentage": 58.29, "elapsed_time": "20:48:34", "remaining_time": "14:53:27"} +{"current_steps": 1207, "total_steps": 2069, "loss": 0.5172, "lr": 4.050532856618532e-06, "epoch": 0.5835146241237612, "percentage": 58.34, "elapsed_time": "20:49:34", "remaining_time": "14:52:24"} +{"current_steps": 1208, "total_steps": 2069, "loss": 0.5321, "lr": 4.0426856498578515e-06, "epoch": 0.5839980662315688, "percentage": 58.39, "elapsed_time": "20:50:33", "remaining_time": "14:51:20"} +{"current_steps": 1209, "total_steps": 2069, "loss": 0.5286, "lr": 4.034840890069805e-06, "epoch": 0.5844815083393764, "percentage": 58.43, "elapsed_time": "20:51:34", "remaining_time": "14:50:17"} +{"current_steps": 1210, "total_steps": 2069, "loss": 0.5205, "lr": 4.0269985973062325e-06, "epoch": 0.584964950447184, "percentage": 58.48, "elapsed_time": "20:52:36", "remaining_time": "14:49:14"} +{"current_steps": 1211, "total_steps": 2069, "loss": 0.5186, "lr": 4.019158791612662e-06, "epoch": 0.5854483925549916, "percentage": 58.53, "elapsed_time": "20:53:37", "remaining_time": "14:48:11"} +{"current_steps": 1212, "total_steps": 2069, "loss": 0.5019, "lr": 4.0113214930282765e-06, "epoch": 0.5859318346627991, "percentage": 58.58, "elapsed_time": "20:54:41", "remaining_time": "14:47:11"} +{"current_steps": 1213, "total_steps": 2069, "loss": 0.5292, "lr": 4.003486721585834e-06, "epoch": 0.5864152767706067, "percentage": 58.63, "elapsed_time": "20:55:40", "remaining_time": "14:46:06"} +{"current_steps": 1214, "total_steps": 2069, "loss": 0.524, "lr": 3.995654497311649e-06, "epoch": 0.5868987188784143, "percentage": 58.68, "elapsed_time": "20:56:42", "remaining_time": "14:45:04"} +{"current_steps": 1215, "total_steps": 2069, "loss": 0.5296, "lr": 3.987824840225512e-06, "epoch": 0.5873821609862219, "percentage": 58.72, "elapsed_time": "20:57:43", "remaining_time": "14:44:01"} +{"current_steps": 1216, "total_steps": 2069, "loss": 0.5191, "lr": 3.979997770340664e-06, "epoch": 0.5878656030940295, "percentage": 58.77, "elapsed_time": "20:58:44", "remaining_time": "14:42:58"} +{"current_steps": 1217, "total_steps": 2069, "loss": 0.5259, "lr": 3.972173307663721e-06, "epoch": 0.5883490452018371, "percentage": 58.82, "elapsed_time": "20:59:51", "remaining_time": "14:42:00"} +{"current_steps": 1218, "total_steps": 2069, "loss": 0.5169, "lr": 3.964351472194642e-06, "epoch": 0.5888324873096447, "percentage": 58.87, "elapsed_time": "21:00:53", "remaining_time": "14:40:57"} +{"current_steps": 1219, "total_steps": 2069, "loss": 0.5324, "lr": 3.95653228392667e-06, "epoch": 0.5893159294174523, "percentage": 58.92, "elapsed_time": "21:01:55", "remaining_time": "14:39:55"} +{"current_steps": 1220, "total_steps": 2069, "loss": 0.5253, "lr": 3.9487157628462784e-06, "epoch": 0.5897993715252599, "percentage": 58.97, "elapsed_time": "21:02:55", "remaining_time": "14:38:52"} +{"current_steps": 1221, "total_steps": 2069, "loss": 0.5207, "lr": 3.940901928933127e-06, "epoch": 0.5902828136330674, "percentage": 59.01, "elapsed_time": "21:03:58", "remaining_time": "14:37:50"} +{"current_steps": 1222, "total_steps": 2069, "loss": 0.5216, "lr": 3.933090802160004e-06, "epoch": 0.590766255740875, "percentage": 59.06, "elapsed_time": "21:05:01", "remaining_time": "14:36:49"} +{"current_steps": 1223, "total_steps": 2069, "loss": 0.5158, "lr": 3.925282402492779e-06, "epoch": 0.5912496978486826, "percentage": 59.11, "elapsed_time": "21:06:03", "remaining_time": "14:35:47"} +{"current_steps": 1224, "total_steps": 2069, "loss": 0.5337, "lr": 3.917476749890351e-06, "epoch": 0.5917331399564902, "percentage": 59.16, "elapsed_time": "21:07:04", "remaining_time": "14:34:43"} +{"current_steps": 1225, "total_steps": 2069, "loss": 0.528, "lr": 3.909673864304597e-06, "epoch": 0.5922165820642978, "percentage": 59.21, "elapsed_time": "21:08:06", "remaining_time": "14:33:42"} +{"current_steps": 1226, "total_steps": 2069, "loss": 0.5366, "lr": 3.901873765680322e-06, "epoch": 0.5927000241721054, "percentage": 59.26, "elapsed_time": "21:09:12", "remaining_time": "14:32:42"} +{"current_steps": 1227, "total_steps": 2069, "loss": 0.5202, "lr": 3.894076473955207e-06, "epoch": 0.593183466279913, "percentage": 59.3, "elapsed_time": "21:10:15", "remaining_time": "14:31:41"} +{"current_steps": 1228, "total_steps": 2069, "loss": 0.5293, "lr": 3.886282009059757e-06, "epoch": 0.5936669083877205, "percentage": 59.35, "elapsed_time": "21:11:20", "remaining_time": "14:30:41"} +{"current_steps": 1229, "total_steps": 2069, "loss": 0.521, "lr": 3.878490390917253e-06, "epoch": 0.5941503504955281, "percentage": 59.4, "elapsed_time": "21:12:18", "remaining_time": "14:29:36"} +{"current_steps": 1230, "total_steps": 2069, "loss": 0.5219, "lr": 3.8707016394436985e-06, "epoch": 0.5946337926033357, "percentage": 59.45, "elapsed_time": "21:13:18", "remaining_time": "14:28:32"} +{"current_steps": 1231, "total_steps": 2069, "loss": 0.5274, "lr": 3.86291577454777e-06, "epoch": 0.5951172347111433, "percentage": 59.5, "elapsed_time": "21:14:18", "remaining_time": "14:27:29"} +{"current_steps": 1232, "total_steps": 2069, "loss": 0.5287, "lr": 3.855132816130767e-06, "epoch": 0.5956006768189509, "percentage": 59.55, "elapsed_time": "21:15:19", "remaining_time": "14:26:25"} +{"current_steps": 1233, "total_steps": 2069, "loss": 0.5214, "lr": 3.847352784086556e-06, "epoch": 0.5960841189267585, "percentage": 59.59, "elapsed_time": "21:16:21", "remaining_time": "14:25:23"} +{"current_steps": 1234, "total_steps": 2069, "loss": 0.5348, "lr": 3.839575698301529e-06, "epoch": 0.5965675610345661, "percentage": 59.64, "elapsed_time": "21:17:21", "remaining_time": "14:24:20"} +{"current_steps": 1235, "total_steps": 2069, "loss": 0.521, "lr": 3.831801578654541e-06, "epoch": 0.5970510031423737, "percentage": 59.69, "elapsed_time": "21:18:17", "remaining_time": "14:23:13"} +{"current_steps": 1236, "total_steps": 2069, "loss": 0.5012, "lr": 3.8240304450168716e-06, "epoch": 0.5975344452501813, "percentage": 59.74, "elapsed_time": "21:19:24", "remaining_time": "14:22:15"} +{"current_steps": 1237, "total_steps": 2069, "loss": 0.5239, "lr": 3.8162623172521615e-06, "epoch": 0.5980178873579889, "percentage": 59.79, "elapsed_time": "21:20:30", "remaining_time": "14:21:15"} +{"current_steps": 1238, "total_steps": 2069, "loss": 0.518, "lr": 3.808497215216374e-06, "epoch": 0.5985013294657965, "percentage": 59.84, "elapsed_time": "21:21:31", "remaining_time": "14:20:12"} +{"current_steps": 1239, "total_steps": 2069, "loss": 0.5212, "lr": 3.8007351587577342e-06, "epoch": 0.5989847715736041, "percentage": 59.88, "elapsed_time": "21:22:32", "remaining_time": "14:19:10"} +{"current_steps": 1240, "total_steps": 2069, "loss": 0.5256, "lr": 3.7929761677166847e-06, "epoch": 0.5994682136814117, "percentage": 59.93, "elapsed_time": "21:23:32", "remaining_time": "14:18:06"} +{"current_steps": 1241, "total_steps": 2069, "loss": 0.5258, "lr": 3.7852202619258327e-06, "epoch": 0.5999516557892193, "percentage": 59.98, "elapsed_time": "21:24:30", "remaining_time": "14:17:01"} +{"current_steps": 1242, "total_steps": 2069, "loss": 0.5226, "lr": 3.777467461209895e-06, "epoch": 0.6004350978970269, "percentage": 60.03, "elapsed_time": "21:25:30", "remaining_time": "14:15:58"} +{"current_steps": 1243, "total_steps": 2069, "loss": 0.5265, "lr": 3.76971778538566e-06, "epoch": 0.6009185400048345, "percentage": 60.08, "elapsed_time": "21:26:31", "remaining_time": "14:14:55"} +{"current_steps": 1244, "total_steps": 2069, "loss": 0.521, "lr": 3.76197125426192e-06, "epoch": 0.6014019821126421, "percentage": 60.13, "elapsed_time": "21:27:31", "remaining_time": "14:13:52"} +{"current_steps": 1245, "total_steps": 2069, "loss": 0.5119, "lr": 3.754227887639434e-06, "epoch": 0.6018854242204495, "percentage": 60.17, "elapsed_time": "21:28:34", "remaining_time": "14:12:50"} +{"current_steps": 1246, "total_steps": 2069, "loss": 0.5258, "lr": 3.7464877053108706e-06, "epoch": 0.6023688663282571, "percentage": 60.22, "elapsed_time": "21:29:31", "remaining_time": "14:11:45"} +{"current_steps": 1247, "total_steps": 2069, "loss": 0.529, "lr": 3.7387507270607617e-06, "epoch": 0.6028523084360647, "percentage": 60.27, "elapsed_time": "21:30:34", "remaining_time": "14:10:43"} +{"current_steps": 1248, "total_steps": 2069, "loss": 0.528, "lr": 3.7310169726654444e-06, "epoch": 0.6033357505438723, "percentage": 60.32, "elapsed_time": "21:31:34", "remaining_time": "14:09:40"} +{"current_steps": 1249, "total_steps": 2069, "loss": 0.5182, "lr": 3.7232864618930217e-06, "epoch": 0.6038191926516799, "percentage": 60.37, "elapsed_time": "21:33:38", "remaining_time": "14:09:18"} +{"current_steps": 1250, "total_steps": 2069, "loss": 0.5133, "lr": 3.715559214503298e-06, "epoch": 0.6043026347594875, "percentage": 60.42, "elapsed_time": "21:34:37", "remaining_time": "14:08:14"} +{"current_steps": 1251, "total_steps": 2069, "loss": 0.4877, "lr": 3.707835250247745e-06, "epoch": 0.6047860768672951, "percentage": 60.46, "elapsed_time": "21:35:40", "remaining_time": "14:07:12"} +{"current_steps": 1252, "total_steps": 2069, "loss": 0.5256, "lr": 3.7001145888694335e-06, "epoch": 0.6052695189751027, "percentage": 60.51, "elapsed_time": "21:36:45", "remaining_time": "14:06:12"} +{"current_steps": 1253, "total_steps": 2069, "loss": 0.5028, "lr": 3.6923972501029996e-06, "epoch": 0.6057529610829103, "percentage": 60.56, "elapsed_time": "21:37:47", "remaining_time": "14:05:10"} +{"current_steps": 1254, "total_steps": 2069, "loss": 0.5249, "lr": 3.684683253674583e-06, "epoch": 0.6062364031907179, "percentage": 60.61, "elapsed_time": "21:38:44", "remaining_time": "14:04:04"} +{"current_steps": 1255, "total_steps": 2069, "loss": 0.5119, "lr": 3.676972619301776e-06, "epoch": 0.6067198452985255, "percentage": 60.66, "elapsed_time": "21:39:43", "remaining_time": "14:03:00"} +{"current_steps": 1256, "total_steps": 2069, "loss": 0.5262, "lr": 3.6692653666935875e-06, "epoch": 0.6072032874063331, "percentage": 60.71, "elapsed_time": "21:40:41", "remaining_time": "14:01:55"} +{"current_steps": 1257, "total_steps": 2069, "loss": 0.5168, "lr": 3.6615615155503703e-06, "epoch": 0.6076867295141407, "percentage": 60.75, "elapsed_time": "21:41:43", "remaining_time": "14:00:53"} +{"current_steps": 1258, "total_steps": 2069, "loss": 0.5193, "lr": 3.6538610855637953e-06, "epoch": 0.6081701716219483, "percentage": 60.8, "elapsed_time": "21:42:43", "remaining_time": "13:59:49"} +{"current_steps": 1259, "total_steps": 2069, "loss": 0.5213, "lr": 3.6461640964167755e-06, "epoch": 0.6086536137297559, "percentage": 60.85, "elapsed_time": "21:43:43", "remaining_time": "13:58:46"} +{"current_steps": 1260, "total_steps": 2069, "loss": 0.4982, "lr": 3.638470567783442e-06, "epoch": 0.6091370558375635, "percentage": 60.9, "elapsed_time": "21:44:50", "remaining_time": "13:57:47"} +{"current_steps": 1261, "total_steps": 2069, "loss": 0.5329, "lr": 3.630780519329069e-06, "epoch": 0.609620497945371, "percentage": 60.95, "elapsed_time": "21:45:55", "remaining_time": "13:56:47"} +{"current_steps": 1262, "total_steps": 2069, "loss": 0.5278, "lr": 3.623093970710043e-06, "epoch": 0.6101039400531786, "percentage": 61.0, "elapsed_time": "21:46:59", "remaining_time": "13:55:46"} +{"current_steps": 1263, "total_steps": 2069, "loss": 0.5358, "lr": 3.615410941573799e-06, "epoch": 0.6105873821609862, "percentage": 61.04, "elapsed_time": "21:47:57", "remaining_time": "13:54:41"} +{"current_steps": 1264, "total_steps": 2069, "loss": 0.4973, "lr": 3.607731451558783e-06, "epoch": 0.6110708242687938, "percentage": 61.09, "elapsed_time": "21:49:00", "remaining_time": "13:53:39"} +{"current_steps": 1265, "total_steps": 2069, "loss": 0.5223, "lr": 3.6000555202943872e-06, "epoch": 0.6115542663766014, "percentage": 61.14, "elapsed_time": "21:49:59", "remaining_time": "13:52:36"} +{"current_steps": 1266, "total_steps": 2069, "loss": 0.5206, "lr": 3.59238316740091e-06, "epoch": 0.612037708484409, "percentage": 61.19, "elapsed_time": "21:50:59", "remaining_time": "13:51:32"} +{"current_steps": 1267, "total_steps": 2069, "loss": 0.5306, "lr": 3.584714412489506e-06, "epoch": 0.6125211505922166, "percentage": 61.24, "elapsed_time": "21:51:59", "remaining_time": "13:50:28"} +{"current_steps": 1268, "total_steps": 2069, "loss": 0.5029, "lr": 3.5770492751621292e-06, "epoch": 0.6130045927000242, "percentage": 61.29, "elapsed_time": "21:53:01", "remaining_time": "13:49:26"} +{"current_steps": 1269, "total_steps": 2069, "loss": 0.5167, "lr": 3.5693877750114903e-06, "epoch": 0.6134880348078318, "percentage": 61.33, "elapsed_time": "21:54:06", "remaining_time": "13:48:26"} +{"current_steps": 1270, "total_steps": 2069, "loss": 0.5049, "lr": 3.5617299316209984e-06, "epoch": 0.6139714769156394, "percentage": 61.38, "elapsed_time": "21:55:08", "remaining_time": "13:47:24"} +{"current_steps": 1271, "total_steps": 2069, "loss": 0.4939, "lr": 3.5540757645647217e-06, "epoch": 0.614454919023447, "percentage": 61.43, "elapsed_time": "21:56:18", "remaining_time": "13:46:26"} +{"current_steps": 1272, "total_steps": 2069, "loss": 0.5199, "lr": 3.546425293407324e-06, "epoch": 0.6149383611312546, "percentage": 61.48, "elapsed_time": "21:57:18", "remaining_time": "13:45:23"} +{"current_steps": 1273, "total_steps": 2069, "loss": 0.5132, "lr": 3.5387785377040316e-06, "epoch": 0.6154218032390621, "percentage": 61.53, "elapsed_time": "21:58:22", "remaining_time": "13:44:22"} +{"current_steps": 1274, "total_steps": 2069, "loss": 0.5269, "lr": 3.531135517000561e-06, "epoch": 0.6159052453468697, "percentage": 61.58, "elapsed_time": "21:59:27", "remaining_time": "13:43:21"} +{"current_steps": 1275, "total_steps": 2069, "loss": 0.5122, "lr": 3.523496250833098e-06, "epoch": 0.6163886874546773, "percentage": 61.62, "elapsed_time": "22:00:29", "remaining_time": "13:42:19"} +{"current_steps": 1276, "total_steps": 2069, "loss": 0.5234, "lr": 3.515860758728214e-06, "epoch": 0.6168721295624849, "percentage": 61.67, "elapsed_time": "22:01:30", "remaining_time": "13:41:16"} +{"current_steps": 1277, "total_steps": 2069, "loss": 0.5269, "lr": 3.5082290602028492e-06, "epoch": 0.6173555716702925, "percentage": 61.72, "elapsed_time": "22:02:32", "remaining_time": "13:40:14"} +{"current_steps": 1278, "total_steps": 2069, "loss": 0.5177, "lr": 3.5006011747642366e-06, "epoch": 0.6178390137781, "percentage": 61.77, "elapsed_time": "22:03:32", "remaining_time": "13:39:11"} +{"current_steps": 1279, "total_steps": 2069, "loss": 0.5329, "lr": 3.492977121909865e-06, "epoch": 0.6183224558859076, "percentage": 61.82, "elapsed_time": "22:04:38", "remaining_time": "13:38:11"} +{"current_steps": 1280, "total_steps": 2069, "loss": 0.5275, "lr": 3.4853569211274306e-06, "epoch": 0.6188058979937152, "percentage": 61.87, "elapsed_time": "22:05:40", "remaining_time": "13:37:09"} +{"current_steps": 1281, "total_steps": 2069, "loss": 0.5117, "lr": 3.4777405918947795e-06, "epoch": 0.6192893401015228, "percentage": 61.91, "elapsed_time": "22:06:41", "remaining_time": "13:36:06"} +{"current_steps": 1282, "total_steps": 2069, "loss": 0.5274, "lr": 3.4701281536798638e-06, "epoch": 0.6197727822093304, "percentage": 61.96, "elapsed_time": "22:07:40", "remaining_time": "13:35:02"} +{"current_steps": 1283, "total_steps": 2069, "loss": 0.5282, "lr": 3.462519625940688e-06, "epoch": 0.620256224317138, "percentage": 62.01, "elapsed_time": "22:08:39", "remaining_time": "13:33:58"} +{"current_steps": 1284, "total_steps": 2069, "loss": 0.5224, "lr": 3.4549150281252635e-06, "epoch": 0.6207396664249456, "percentage": 62.06, "elapsed_time": "22:09:43", "remaining_time": "13:32:57"} +{"current_steps": 1285, "total_steps": 2069, "loss": 0.5221, "lr": 3.4473143796715537e-06, "epoch": 0.6212231085327532, "percentage": 62.11, "elapsed_time": "22:10:44", "remaining_time": "13:31:54"} +{"current_steps": 1286, "total_steps": 2069, "loss": 0.5286, "lr": 3.4397177000074307e-06, "epoch": 0.6217065506405608, "percentage": 62.16, "elapsed_time": "22:11:49", "remaining_time": "13:30:54"} +{"current_steps": 1287, "total_steps": 2069, "loss": 0.519, "lr": 3.4321250085506174e-06, "epoch": 0.6221899927483684, "percentage": 62.2, "elapsed_time": "22:12:44", "remaining_time": "13:29:47"} +{"current_steps": 1288, "total_steps": 2069, "loss": 0.5291, "lr": 3.4245363247086477e-06, "epoch": 0.622673434856176, "percentage": 62.25, "elapsed_time": "22:13:49", "remaining_time": "13:28:47"} +{"current_steps": 1289, "total_steps": 2069, "loss": 0.5084, "lr": 3.4169516678788096e-06, "epoch": 0.6231568769639836, "percentage": 62.3, "elapsed_time": "22:14:50", "remaining_time": "13:27:44"} +{"current_steps": 1290, "total_steps": 2069, "loss": 0.5181, "lr": 3.4093710574480926e-06, "epoch": 0.6236403190717912, "percentage": 62.35, "elapsed_time": "22:15:48", "remaining_time": "13:26:40"} +{"current_steps": 1291, "total_steps": 2069, "loss": 0.5215, "lr": 3.4017945127931517e-06, "epoch": 0.6241237611795988, "percentage": 62.4, "elapsed_time": "22:16:52", "remaining_time": "13:25:38"} +{"current_steps": 1292, "total_steps": 2069, "loss": 0.5219, "lr": 3.394222053280245e-06, "epoch": 0.6246072032874064, "percentage": 62.45, "elapsed_time": "22:17:51", "remaining_time": "13:24:34"} +{"current_steps": 1293, "total_steps": 2069, "loss": 0.5295, "lr": 3.386653698265189e-06, "epoch": 0.625090645395214, "percentage": 62.49, "elapsed_time": "22:18:52", "remaining_time": "13:23:32"} +{"current_steps": 1294, "total_steps": 2069, "loss": 0.4993, "lr": 3.3790894670933096e-06, "epoch": 0.6255740875030215, "percentage": 62.54, "elapsed_time": "22:19:57", "remaining_time": "13:22:31"} +{"current_steps": 1295, "total_steps": 2069, "loss": 0.5212, "lr": 3.3715293790993906e-06, "epoch": 0.626057529610829, "percentage": 62.59, "elapsed_time": "22:21:01", "remaining_time": "13:21:30"} +{"current_steps": 1296, "total_steps": 2069, "loss": 0.5145, "lr": 3.3639734536076263e-06, "epoch": 0.6265409717186367, "percentage": 62.64, "elapsed_time": "22:22:01", "remaining_time": "13:20:27"} +{"current_steps": 1297, "total_steps": 2069, "loss": 0.5263, "lr": 3.356421709931573e-06, "epoch": 0.6270244138264442, "percentage": 62.69, "elapsed_time": "22:23:01", "remaining_time": "13:19:23"} +{"current_steps": 1298, "total_steps": 2069, "loss": 0.5193, "lr": 3.348874167374093e-06, "epoch": 0.6275078559342518, "percentage": 62.74, "elapsed_time": "22:24:05", "remaining_time": "13:18:22"} +{"current_steps": 1299, "total_steps": 2069, "loss": 0.5342, "lr": 3.341330845227316e-06, "epoch": 0.6279912980420594, "percentage": 62.78, "elapsed_time": "22:25:09", "remaining_time": "13:17:21"} +{"current_steps": 1300, "total_steps": 2069, "loss": 0.5192, "lr": 3.33379176277258e-06, "epoch": 0.628474740149867, "percentage": 62.83, "elapsed_time": "22:26:10", "remaining_time": "13:16:19"} +{"current_steps": 1301, "total_steps": 2069, "loss": 0.5222, "lr": 3.326256939280389e-06, "epoch": 0.6289581822576746, "percentage": 62.88, "elapsed_time": "22:27:13", "remaining_time": "13:15:16"} +{"current_steps": 1302, "total_steps": 2069, "loss": 0.5008, "lr": 3.3187263940103587e-06, "epoch": 0.6294416243654822, "percentage": 62.93, "elapsed_time": "22:28:15", "remaining_time": "13:14:15"} +{"current_steps": 1303, "total_steps": 2069, "loss": 0.5252, "lr": 3.3112001462111666e-06, "epoch": 0.6299250664732898, "percentage": 62.98, "elapsed_time": "22:29:18", "remaining_time": "13:13:13"} +{"current_steps": 1304, "total_steps": 2069, "loss": 0.5269, "lr": 3.3036782151205134e-06, "epoch": 0.6304085085810974, "percentage": 63.03, "elapsed_time": "22:30:19", "remaining_time": "13:12:10"} +{"current_steps": 1305, "total_steps": 2069, "loss": 0.5348, "lr": 3.296160619965056e-06, "epoch": 0.630891950688905, "percentage": 63.07, "elapsed_time": "22:31:21", "remaining_time": "13:11:08"} +{"current_steps": 1306, "total_steps": 2069, "loss": 0.5315, "lr": 3.2886473799603793e-06, "epoch": 0.6313753927967126, "percentage": 63.12, "elapsed_time": "22:32:17", "remaining_time": "13:10:02"} +{"current_steps": 1307, "total_steps": 2069, "loss": 0.5199, "lr": 3.2811385143109254e-06, "epoch": 0.6318588349045202, "percentage": 63.17, "elapsed_time": "22:33:16", "remaining_time": "13:08:58"} +{"current_steps": 1308, "total_steps": 2069, "loss": 0.5287, "lr": 3.2736340422099633e-06, "epoch": 0.6323422770123278, "percentage": 63.22, "elapsed_time": "22:34:20", "remaining_time": "13:07:57"} +{"current_steps": 1309, "total_steps": 2069, "loss": 0.5034, "lr": 3.2661339828395263e-06, "epoch": 0.6328257191201354, "percentage": 63.27, "elapsed_time": "22:35:28", "remaining_time": "13:06:58"} +{"current_steps": 1310, "total_steps": 2069, "loss": 0.5169, "lr": 3.2586383553703723e-06, "epoch": 0.633309161227943, "percentage": 63.32, "elapsed_time": "22:36:32", "remaining_time": "13:05:57"} +{"current_steps": 1311, "total_steps": 2069, "loss": 0.5084, "lr": 3.2511471789619274e-06, "epoch": 0.6337926033357505, "percentage": 63.36, "elapsed_time": "22:37:38", "remaining_time": "13:04:57"} +{"current_steps": 1312, "total_steps": 2069, "loss": 0.5126, "lr": 3.2436604727622447e-06, "epoch": 0.6342760454435581, "percentage": 63.41, "elapsed_time": "22:38:39", "remaining_time": "13:03:55"} +{"current_steps": 1313, "total_steps": 2069, "loss": 0.5158, "lr": 3.2361782559079465e-06, "epoch": 0.6347594875513657, "percentage": 63.46, "elapsed_time": "22:39:39", "remaining_time": "13:02:51"} +{"current_steps": 1314, "total_steps": 2069, "loss": 0.5145, "lr": 3.228700547524184e-06, "epoch": 0.6352429296591733, "percentage": 63.51, "elapsed_time": "22:40:44", "remaining_time": "13:01:51"} +{"current_steps": 1315, "total_steps": 2069, "loss": 0.5146, "lr": 3.221227366724581e-06, "epoch": 0.6357263717669809, "percentage": 63.56, "elapsed_time": "22:41:43", "remaining_time": "13:00:47"} +{"current_steps": 1316, "total_steps": 2069, "loss": 0.5207, "lr": 3.2137587326111896e-06, "epoch": 0.6362098138747885, "percentage": 63.61, "elapsed_time": "22:42:43", "remaining_time": "12:59:43"} +{"current_steps": 1317, "total_steps": 2069, "loss": 0.5268, "lr": 3.206294664274443e-06, "epoch": 0.6366932559825961, "percentage": 63.65, "elapsed_time": "22:43:44", "remaining_time": "12:58:41"} +{"current_steps": 1318, "total_steps": 2069, "loss": 0.5277, "lr": 3.198835180793097e-06, "epoch": 0.6371766980904037, "percentage": 63.7, "elapsed_time": "22:44:45", "remaining_time": "12:57:38"} +{"current_steps": 1319, "total_steps": 2069, "loss": 0.5195, "lr": 3.1913803012341987e-06, "epoch": 0.6376601401982113, "percentage": 63.75, "elapsed_time": "22:45:42", "remaining_time": "12:56:33"} +{"current_steps": 1320, "total_steps": 2069, "loss": 0.5157, "lr": 3.183930044653014e-06, "epoch": 0.6381435823060189, "percentage": 63.8, "elapsed_time": "22:46:45", "remaining_time": "12:55:32"} +{"current_steps": 1321, "total_steps": 2069, "loss": 0.5312, "lr": 3.176484430093007e-06, "epoch": 0.6386270244138265, "percentage": 63.85, "elapsed_time": "22:47:47", "remaining_time": "12:54:29"} +{"current_steps": 1322, "total_steps": 2069, "loss": 0.523, "lr": 3.1690434765857604e-06, "epoch": 0.6391104665216341, "percentage": 63.9, "elapsed_time": "22:48:48", "remaining_time": "12:53:26"} +{"current_steps": 1323, "total_steps": 2069, "loss": 0.5155, "lr": 3.1616072031509594e-06, "epoch": 0.6395939086294417, "percentage": 63.94, "elapsed_time": "22:49:48", "remaining_time": "12:52:23"} +{"current_steps": 1324, "total_steps": 2069, "loss": 0.5214, "lr": 3.154175628796311e-06, "epoch": 0.6400773507372493, "percentage": 63.99, "elapsed_time": "22:50:51", "remaining_time": "12:51:21"} +{"current_steps": 1325, "total_steps": 2069, "loss": 0.5202, "lr": 3.146748772517523e-06, "epoch": 0.6405607928450568, "percentage": 64.04, "elapsed_time": "22:51:50", "remaining_time": "12:50:18"} +{"current_steps": 1326, "total_steps": 2069, "loss": 0.5226, "lr": 3.139326653298236e-06, "epoch": 0.6410442349528644, "percentage": 64.09, "elapsed_time": "22:52:51", "remaining_time": "12:49:15"} +{"current_steps": 1327, "total_steps": 2069, "loss": 0.5192, "lr": 3.1319092901099847e-06, "epoch": 0.6415276770606719, "percentage": 64.14, "elapsed_time": "22:53:55", "remaining_time": "12:48:14"} +{"current_steps": 1328, "total_steps": 2069, "loss": 0.5127, "lr": 3.1244967019121496e-06, "epoch": 0.6420111191684795, "percentage": 64.19, "elapsed_time": "22:54:55", "remaining_time": "12:47:11"} +{"current_steps": 1329, "total_steps": 2069, "loss": 0.5176, "lr": 3.117088907651902e-06, "epoch": 0.6424945612762871, "percentage": 64.23, "elapsed_time": "22:55:57", "remaining_time": "12:46:08"} +{"current_steps": 1330, "total_steps": 2069, "loss": 0.5165, "lr": 3.109685926264161e-06, "epoch": 0.6429780033840947, "percentage": 64.28, "elapsed_time": "22:56:58", "remaining_time": "12:45:05"} +{"current_steps": 1331, "total_steps": 2069, "loss": 0.5339, "lr": 3.102287776671544e-06, "epoch": 0.6434614454919023, "percentage": 64.33, "elapsed_time": "22:57:59", "remaining_time": "12:44:03"} +{"current_steps": 1332, "total_steps": 2069, "loss": 0.5166, "lr": 3.094894477784318e-06, "epoch": 0.6439448875997099, "percentage": 64.38, "elapsed_time": "22:59:02", "remaining_time": "12:43:01"} +{"current_steps": 1333, "total_steps": 2069, "loss": 0.5274, "lr": 3.0875060485003496e-06, "epoch": 0.6444283297075175, "percentage": 64.43, "elapsed_time": "23:00:05", "remaining_time": "12:42:00"} +{"current_steps": 1334, "total_steps": 2069, "loss": 0.5243, "lr": 3.080122507705062e-06, "epoch": 0.6449117718153251, "percentage": 64.48, "elapsed_time": "23:01:05", "remaining_time": "12:40:56"} +{"current_steps": 1335, "total_steps": 2069, "loss": 0.4982, "lr": 3.0727438742713766e-06, "epoch": 0.6453952139231327, "percentage": 64.52, "elapsed_time": "23:02:07", "remaining_time": "12:39:54"} +{"current_steps": 1336, "total_steps": 2069, "loss": 0.5222, "lr": 3.0653701670596805e-06, "epoch": 0.6458786560309403, "percentage": 64.57, "elapsed_time": "23:03:09", "remaining_time": "12:38:52"} +{"current_steps": 1337, "total_steps": 2069, "loss": 0.5203, "lr": 3.0580014049177566e-06, "epoch": 0.6463620981387479, "percentage": 64.62, "elapsed_time": "23:04:09", "remaining_time": "12:37:49"} +{"current_steps": 1338, "total_steps": 2069, "loss": 0.5131, "lr": 3.0506376066807632e-06, "epoch": 0.6468455402465555, "percentage": 64.67, "elapsed_time": "23:05:08", "remaining_time": "12:36:45"} +{"current_steps": 1339, "total_steps": 2069, "loss": 0.5244, "lr": 3.0432787911711553e-06, "epoch": 0.6473289823543631, "percentage": 64.72, "elapsed_time": "23:06:08", "remaining_time": "12:35:42"} +{"current_steps": 1340, "total_steps": 2069, "loss": 0.5114, "lr": 3.0359249771986605e-06, "epoch": 0.6478124244621707, "percentage": 64.77, "elapsed_time": "23:07:10", "remaining_time": "12:34:40"} +{"current_steps": 1341, "total_steps": 2069, "loss": 0.5345, "lr": 3.028576183560221e-06, "epoch": 0.6482958665699783, "percentage": 64.81, "elapsed_time": "23:08:14", "remaining_time": "12:33:38"} +{"current_steps": 1342, "total_steps": 2069, "loss": 0.5103, "lr": 3.021232429039944e-06, "epoch": 0.6487793086777859, "percentage": 64.86, "elapsed_time": "23:09:15", "remaining_time": "12:32:36"} +{"current_steps": 1343, "total_steps": 2069, "loss": 0.5307, "lr": 3.01389373240906e-06, "epoch": 0.6492627507855935, "percentage": 64.91, "elapsed_time": "23:10:17", "remaining_time": "12:31:33"} +{"current_steps": 1344, "total_steps": 2069, "loss": 0.5146, "lr": 3.006560112425867e-06, "epoch": 0.649746192893401, "percentage": 64.96, "elapsed_time": "23:11:17", "remaining_time": "12:30:30"} +{"current_steps": 1345, "total_steps": 2069, "loss": 0.5113, "lr": 2.999231587835691e-06, "epoch": 0.6502296350012086, "percentage": 65.01, "elapsed_time": "23:12:21", "remaining_time": "12:29:29"} +{"current_steps": 1346, "total_steps": 2069, "loss": 0.5195, "lr": 2.9919081773708293e-06, "epoch": 0.6507130771090162, "percentage": 65.06, "elapsed_time": "23:13:20", "remaining_time": "12:28:25"} +{"current_steps": 1347, "total_steps": 2069, "loss": 0.5201, "lr": 2.9845898997505102e-06, "epoch": 0.6511965192168238, "percentage": 65.1, "elapsed_time": "23:14:20", "remaining_time": "12:27:22"} +{"current_steps": 1348, "total_steps": 2069, "loss": 0.5194, "lr": 2.9772767736808406e-06, "epoch": 0.6516799613246314, "percentage": 65.15, "elapsed_time": "23:15:19", "remaining_time": "12:26:18"} +{"current_steps": 1349, "total_steps": 2069, "loss": 0.5252, "lr": 2.9699688178547615e-06, "epoch": 0.652163403432439, "percentage": 65.2, "elapsed_time": "23:16:20", "remaining_time": "12:25:16"} +{"current_steps": 1350, "total_steps": 2069, "loss": 0.5122, "lr": 2.962666050951997e-06, "epoch": 0.6526468455402465, "percentage": 65.25, "elapsed_time": "23:17:24", "remaining_time": "12:24:14"} +{"current_steps": 1351, "total_steps": 2069, "loss": 0.5092, "lr": 2.9553684916390053e-06, "epoch": 0.6531302876480541, "percentage": 65.3, "elapsed_time": "23:18:28", "remaining_time": "12:23:13"} +{"current_steps": 1352, "total_steps": 2069, "loss": 0.5256, "lr": 2.948076158568939e-06, "epoch": 0.6536137297558617, "percentage": 65.35, "elapsed_time": "23:19:30", "remaining_time": "12:22:11"} +{"current_steps": 1353, "total_steps": 2069, "loss": 0.5001, "lr": 2.940789070381587e-06, "epoch": 0.6540971718636693, "percentage": 65.39, "elapsed_time": "23:20:39", "remaining_time": "12:21:12"} +{"current_steps": 1354, "total_steps": 2069, "loss": 0.5174, "lr": 2.933507245703335e-06, "epoch": 0.6545806139714769, "percentage": 65.44, "elapsed_time": "23:21:37", "remaining_time": "12:20:09"} +{"current_steps": 1355, "total_steps": 2069, "loss": 0.5142, "lr": 2.9262307031471132e-06, "epoch": 0.6550640560792845, "percentage": 65.49, "elapsed_time": "23:22:37", "remaining_time": "12:19:05"} +{"current_steps": 1356, "total_steps": 2069, "loss": 0.5212, "lr": 2.918959461312353e-06, "epoch": 0.6555474981870921, "percentage": 65.54, "elapsed_time": "23:23:37", "remaining_time": "12:18:02"} +{"current_steps": 1357, "total_steps": 2069, "loss": 0.5315, "lr": 2.911693538784931e-06, "epoch": 0.6560309402948997, "percentage": 65.59, "elapsed_time": "23:24:39", "remaining_time": "12:17:00"} +{"current_steps": 1358, "total_steps": 2069, "loss": 0.5197, "lr": 2.904432954137136e-06, "epoch": 0.6565143824027073, "percentage": 65.64, "elapsed_time": "23:25:41", "remaining_time": "12:15:58"} +{"current_steps": 1359, "total_steps": 2069, "loss": 0.4977, "lr": 2.897177725927599e-06, "epoch": 0.6569978245105149, "percentage": 65.68, "elapsed_time": "23:26:43", "remaining_time": "12:14:55"} +{"current_steps": 1360, "total_steps": 2069, "loss": 0.5319, "lr": 2.889927872701278e-06, "epoch": 0.6574812666183224, "percentage": 65.73, "elapsed_time": "23:27:42", "remaining_time": "12:13:52"} +{"current_steps": 1361, "total_steps": 2069, "loss": 0.5166, "lr": 2.8826834129893755e-06, "epoch": 0.65796470872613, "percentage": 65.78, "elapsed_time": "23:28:41", "remaining_time": "12:12:48"} +{"current_steps": 1362, "total_steps": 2069, "loss": 0.4786, "lr": 2.8754443653093186e-06, "epoch": 0.6584481508339376, "percentage": 65.83, "elapsed_time": "23:29:50", "remaining_time": "12:11:49"} +{"current_steps": 1363, "total_steps": 2069, "loss": 0.5216, "lr": 2.8682107481646915e-06, "epoch": 0.6589315929417452, "percentage": 65.88, "elapsed_time": "23:30:53", "remaining_time": "12:10:48"} +{"current_steps": 1364, "total_steps": 2069, "loss": 0.4988, "lr": 2.8609825800452063e-06, "epoch": 0.6594150350495528, "percentage": 65.93, "elapsed_time": "23:31:59", "remaining_time": "12:09:48"} +{"current_steps": 1365, "total_steps": 2069, "loss": 0.5181, "lr": 2.853759879426644e-06, "epoch": 0.6598984771573604, "percentage": 65.97, "elapsed_time": "23:33:02", "remaining_time": "12:08:46"} +{"current_steps": 1366, "total_steps": 2069, "loss": 0.5163, "lr": 2.8465426647708067e-06, "epoch": 0.660381919265168, "percentage": 66.02, "elapsed_time": "23:34:00", "remaining_time": "12:07:42"} +{"current_steps": 1367, "total_steps": 2069, "loss": 0.5214, "lr": 2.8393309545254776e-06, "epoch": 0.6608653613729756, "percentage": 66.07, "elapsed_time": "23:35:03", "remaining_time": "12:06:40"} +{"current_steps": 1368, "total_steps": 2069, "loss": 0.5179, "lr": 2.8321247671243695e-06, "epoch": 0.6613488034807832, "percentage": 66.12, "elapsed_time": "23:36:04", "remaining_time": "12:05:38"} +{"current_steps": 1369, "total_steps": 2069, "loss": 0.5081, "lr": 2.82492412098708e-06, "epoch": 0.6618322455885908, "percentage": 66.17, "elapsed_time": "23:37:06", "remaining_time": "12:04:36"} +{"current_steps": 1370, "total_steps": 2069, "loss": 0.5194, "lr": 2.8177290345190387e-06, "epoch": 0.6623156876963984, "percentage": 66.22, "elapsed_time": "23:38:06", "remaining_time": "12:03:32"} +{"current_steps": 1371, "total_steps": 2069, "loss": 0.5234, "lr": 2.8105395261114666e-06, "epoch": 0.662799129804206, "percentage": 66.26, "elapsed_time": "23:39:11", "remaining_time": "12:02:31"} +{"current_steps": 1372, "total_steps": 2069, "loss": 0.5188, "lr": 2.803355614141327e-06, "epoch": 0.6632825719120136, "percentage": 66.31, "elapsed_time": "23:40:10", "remaining_time": "12:01:28"} +{"current_steps": 1373, "total_steps": 2069, "loss": 0.5125, "lr": 2.7961773169712803e-06, "epoch": 0.6637660140198212, "percentage": 66.36, "elapsed_time": "23:41:06", "remaining_time": "12:00:23"} +{"current_steps": 1374, "total_steps": 2069, "loss": 0.5233, "lr": 2.7890046529496284e-06, "epoch": 0.6642494561276288, "percentage": 66.41, "elapsed_time": "23:42:09", "remaining_time": "11:59:21"} +{"current_steps": 1375, "total_steps": 2069, "loss": 0.5188, "lr": 2.7818376404102832e-06, "epoch": 0.6647328982354364, "percentage": 66.46, "elapsed_time": "23:43:11", "remaining_time": "11:58:19"} +{"current_steps": 1376, "total_steps": 2069, "loss": 0.5248, "lr": 2.774676297672701e-06, "epoch": 0.665216340343244, "percentage": 66.51, "elapsed_time": "23:44:16", "remaining_time": "11:57:18"} +{"current_steps": 1377, "total_steps": 2069, "loss": 0.5265, "lr": 2.7675206430418542e-06, "epoch": 0.6656997824510514, "percentage": 66.55, "elapsed_time": "23:45:18", "remaining_time": "11:56:16"} +{"current_steps": 1378, "total_steps": 2069, "loss": 0.5211, "lr": 2.7603706948081745e-06, "epoch": 0.666183224558859, "percentage": 66.6, "elapsed_time": "23:46:24", "remaining_time": "11:55:16"} +{"current_steps": 1379, "total_steps": 2069, "loss": 0.517, "lr": 2.753226471247501e-06, "epoch": 0.6666666666666666, "percentage": 66.65, "elapsed_time": "23:47:26", "remaining_time": "11:54:14"} +{"current_steps": 1380, "total_steps": 2069, "loss": 0.5107, "lr": 2.7460879906210485e-06, "epoch": 0.6671501087744742, "percentage": 66.7, "elapsed_time": "23:48:31", "remaining_time": "11:53:13"} +{"current_steps": 1381, "total_steps": 2069, "loss": 0.5191, "lr": 2.7389552711753477e-06, "epoch": 0.6676335508822818, "percentage": 66.75, "elapsed_time": "23:49:35", "remaining_time": "11:52:12"} +{"current_steps": 1382, "total_steps": 2069, "loss": 0.5128, "lr": 2.731828331142207e-06, "epoch": 0.6681169929900894, "percentage": 66.8, "elapsed_time": "23:50:37", "remaining_time": "11:51:10"} +{"current_steps": 1383, "total_steps": 2069, "loss": 0.5257, "lr": 2.7247071887386544e-06, "epoch": 0.668600435097897, "percentage": 66.84, "elapsed_time": "23:51:38", "remaining_time": "11:50:07"} +{"current_steps": 1384, "total_steps": 2069, "loss": 0.5184, "lr": 2.7175918621669074e-06, "epoch": 0.6690838772057046, "percentage": 66.89, "elapsed_time": "23:52:37", "remaining_time": "11:49:04"} +{"current_steps": 1385, "total_steps": 2069, "loss": 0.5298, "lr": 2.7104823696143136e-06, "epoch": 0.6695673193135122, "percentage": 66.94, "elapsed_time": "23:53:40", "remaining_time": "11:48:02"} +{"current_steps": 1386, "total_steps": 2069, "loss": 0.5111, "lr": 2.70337872925331e-06, "epoch": 0.6700507614213198, "percentage": 66.99, "elapsed_time": "23:54:41", "remaining_time": "11:46:59"} +{"current_steps": 1387, "total_steps": 2069, "loss": 0.5002, "lr": 2.6962809592413726e-06, "epoch": 0.6705342035291274, "percentage": 67.04, "elapsed_time": "23:55:46", "remaining_time": "11:45:59"} +{"current_steps": 1388, "total_steps": 2069, "loss": 0.5256, "lr": 2.6891890777209696e-06, "epoch": 0.671017645636935, "percentage": 67.09, "elapsed_time": "23:56:45", "remaining_time": "11:44:55"} +{"current_steps": 1389, "total_steps": 2069, "loss": 0.5193, "lr": 2.68210310281953e-06, "epoch": 0.6715010877447426, "percentage": 67.13, "elapsed_time": "23:57:48", "remaining_time": "11:43:53"} +{"current_steps": 1390, "total_steps": 2069, "loss": 0.5163, "lr": 2.67502305264937e-06, "epoch": 0.6719845298525502, "percentage": 67.18, "elapsed_time": "23:58:52", "remaining_time": "11:42:52"} +{"current_steps": 1391, "total_steps": 2069, "loss": 0.5174, "lr": 2.667948945307674e-06, "epoch": 0.6724679719603578, "percentage": 67.23, "elapsed_time": "23:59:54", "remaining_time": "11:41:50"} +{"current_steps": 1392, "total_steps": 2069, "loss": 0.4936, "lr": 2.6608807988764252e-06, "epoch": 0.6729514140681654, "percentage": 67.28, "elapsed_time": "1 day, 0:00:55", "remaining_time": "11:40:47"} +{"current_steps": 1393, "total_steps": 2069, "loss": 0.5138, "lr": 2.653818631422378e-06, "epoch": 0.6734348561759729, "percentage": 67.33, "elapsed_time": "1 day, 0:01:57", "remaining_time": "11:39:45"} +{"current_steps": 1394, "total_steps": 2069, "loss": 0.5145, "lr": 2.6467624609970005e-06, "epoch": 0.6739182982837805, "percentage": 67.38, "elapsed_time": "1 day, 0:03:03", "remaining_time": "11:38:45"} +{"current_steps": 1395, "total_steps": 2069, "loss": 0.5013, "lr": 2.6397123056364364e-06, "epoch": 0.6744017403915881, "percentage": 67.42, "elapsed_time": "1 day, 0:04:07", "remaining_time": "11:37:43"} +{"current_steps": 1396, "total_steps": 2069, "loss": 0.5184, "lr": 2.6326681833614464e-06, "epoch": 0.6748851824993957, "percentage": 67.47, "elapsed_time": "1 day, 0:05:12", "remaining_time": "11:36:43"} +{"current_steps": 1397, "total_steps": 2069, "loss": 0.5149, "lr": 2.6256301121773775e-06, "epoch": 0.6753686246072033, "percentage": 67.52, "elapsed_time": "1 day, 0:06:16", "remaining_time": "11:35:42"} +{"current_steps": 1398, "total_steps": 2069, "loss": 0.5115, "lr": 2.618598110074105e-06, "epoch": 0.6758520667150109, "percentage": 67.57, "elapsed_time": "1 day, 0:07:19", "remaining_time": "11:34:40"} +{"current_steps": 1399, "total_steps": 2069, "loss": 0.5243, "lr": 2.6115721950259977e-06, "epoch": 0.6763355088228185, "percentage": 67.62, "elapsed_time": "1 day, 0:08:17", "remaining_time": "11:33:36"} +{"current_steps": 1400, "total_steps": 2069, "loss": 0.5314, "lr": 2.6045523849918553e-06, "epoch": 0.676818950930626, "percentage": 67.67, "elapsed_time": "1 day, 0:09:14", "remaining_time": "11:32:31"} +{"current_steps": 1401, "total_steps": 2069, "loss": 0.5179, "lr": 2.5975386979148792e-06, "epoch": 0.6773023930384336, "percentage": 67.71, "elapsed_time": "1 day, 0:10:16", "remaining_time": "11:31:29"} +{"current_steps": 1402, "total_steps": 2069, "loss": 0.5165, "lr": 2.590531151722622e-06, "epoch": 0.6777858351462412, "percentage": 67.76, "elapsed_time": "1 day, 0:11:19", "remaining_time": "11:30:27"} +{"current_steps": 1403, "total_steps": 2069, "loss": 0.5212, "lr": 2.5835297643269326e-06, "epoch": 0.6782692772540488, "percentage": 67.81, "elapsed_time": "1 day, 0:12:24", "remaining_time": "11:29:27"} +{"current_steps": 1404, "total_steps": 2069, "loss": 0.5197, "lr": 2.576534553623925e-06, "epoch": 0.6787527193618564, "percentage": 67.86, "elapsed_time": "1 day, 0:13:23", "remaining_time": "11:28:23"} +{"current_steps": 1405, "total_steps": 2069, "loss": 0.4939, "lr": 2.5695455374939147e-06, "epoch": 0.679236161469664, "percentage": 67.91, "elapsed_time": "1 day, 0:14:27", "remaining_time": "11:27:22"} +{"current_steps": 1406, "total_steps": 2069, "loss": 0.5242, "lr": 2.5625627338014004e-06, "epoch": 0.6797196035774716, "percentage": 67.96, "elapsed_time": "1 day, 0:15:29", "remaining_time": "11:26:20"} +{"current_steps": 1407, "total_steps": 2069, "loss": 0.513, "lr": 2.5555861603949832e-06, "epoch": 0.6802030456852792, "percentage": 68.0, "elapsed_time": "1 day, 0:16:34", "remaining_time": "11:25:19"} +{"current_steps": 1408, "total_steps": 2069, "loss": 0.5047, "lr": 2.548615835107352e-06, "epoch": 0.6806864877930868, "percentage": 68.05, "elapsed_time": "1 day, 0:17:35", "remaining_time": "11:24:16"} +{"current_steps": 1409, "total_steps": 2069, "loss": 0.5286, "lr": 2.5416517757552157e-06, "epoch": 0.6811699299008944, "percentage": 68.1, "elapsed_time": "1 day, 0:18:36", "remaining_time": "11:23:14"} +{"current_steps": 1410, "total_steps": 2069, "loss": 0.5169, "lr": 2.534694000139273e-06, "epoch": 0.6816533720087019, "percentage": 68.15, "elapsed_time": "1 day, 0:19:38", "remaining_time": "11:22:12"} +{"current_steps": 1411, "total_steps": 2069, "loss": 0.515, "lr": 2.5277425260441616e-06, "epoch": 0.6821368141165095, "percentage": 68.2, "elapsed_time": "1 day, 0:20:42", "remaining_time": "11:21:11"} +{"current_steps": 1412, "total_steps": 2069, "loss": 0.5225, "lr": 2.520797371238406e-06, "epoch": 0.6826202562243171, "percentage": 68.25, "elapsed_time": "1 day, 0:21:43", "remaining_time": "11:20:07"} +{"current_steps": 1413, "total_steps": 2069, "loss": 0.5191, "lr": 2.513858553474382e-06, "epoch": 0.6831036983321247, "percentage": 68.29, "elapsed_time": "1 day, 0:22:43", "remaining_time": "11:19:05"} +{"current_steps": 1414, "total_steps": 2069, "loss": 0.5306, "lr": 2.506926090488269e-06, "epoch": 0.6835871404399323, "percentage": 68.34, "elapsed_time": "1 day, 0:23:43", "remaining_time": "11:18:02"} +{"current_steps": 1415, "total_steps": 2069, "loss": 0.5248, "lr": 2.5000000000000015e-06, "epoch": 0.6840705825477399, "percentage": 68.39, "elapsed_time": "1 day, 0:24:42", "remaining_time": "11:16:58"} +{"current_steps": 1416, "total_steps": 2069, "loss": 0.5218, "lr": 2.4930802997132213e-06, "epoch": 0.6845540246555475, "percentage": 68.44, "elapsed_time": "1 day, 0:25:47", "remaining_time": "11:15:57"} +{"current_steps": 1417, "total_steps": 2069, "loss": 0.5189, "lr": 2.486167007315243e-06, "epoch": 0.6850374667633551, "percentage": 68.49, "elapsed_time": "1 day, 0:26:51", "remaining_time": "11:14:56"} +{"current_steps": 1418, "total_steps": 2069, "loss": 0.5114, "lr": 2.479260140476999e-06, "epoch": 0.6855209088711627, "percentage": 68.54, "elapsed_time": "1 day, 0:27:52", "remaining_time": "11:13:53"} +{"current_steps": 1419, "total_steps": 2069, "loss": 0.5066, "lr": 2.4723597168529984e-06, "epoch": 0.6860043509789703, "percentage": 68.58, "elapsed_time": "1 day, 0:28:49", "remaining_time": "11:12:49"} +{"current_steps": 1420, "total_steps": 2069, "loss": 0.4888, "lr": 2.465465754081277e-06, "epoch": 0.6864877930867779, "percentage": 68.63, "elapsed_time": "1 day, 0:29:55", "remaining_time": "11:11:49"} +{"current_steps": 1421, "total_steps": 2069, "loss": 0.5155, "lr": 2.458578269783364e-06, "epoch": 0.6869712351945855, "percentage": 68.68, "elapsed_time": "1 day, 0:30:56", "remaining_time": "11:10:46"} +{"current_steps": 1422, "total_steps": 2069, "loss": 0.5143, "lr": 2.4516972815642166e-06, "epoch": 0.6874546773023931, "percentage": 68.73, "elapsed_time": "1 day, 0:31:58", "remaining_time": "11:09:44"} +{"current_steps": 1423, "total_steps": 2069, "loss": 0.5196, "lr": 2.444822807012204e-06, "epoch": 0.6879381194102007, "percentage": 68.78, "elapsed_time": "1 day, 0:33:01", "remaining_time": "11:08:42"} +{"current_steps": 1424, "total_steps": 2069, "loss": 0.5136, "lr": 2.4379548636990343e-06, "epoch": 0.6884215615180083, "percentage": 68.83, "elapsed_time": "1 day, 0:34:03", "remaining_time": "11:07:40"} +{"current_steps": 1425, "total_steps": 2069, "loss": 0.5305, "lr": 2.4310934691797207e-06, "epoch": 0.6889050036258159, "percentage": 68.87, "elapsed_time": "1 day, 0:34:58", "remaining_time": "11:06:35"} +{"current_steps": 1426, "total_steps": 2069, "loss": 0.5048, "lr": 2.4242386409925435e-06, "epoch": 0.6893884457336233, "percentage": 68.92, "elapsed_time": "1 day, 0:36:01", "remaining_time": "11:05:33"} +{"current_steps": 1427, "total_steps": 2069, "loss": 0.5216, "lr": 2.4173903966589957e-06, "epoch": 0.6898718878414309, "percentage": 68.97, "elapsed_time": "1 day, 0:37:03", "remaining_time": "11:04:31"} +{"current_steps": 1428, "total_steps": 2069, "loss": 0.5206, "lr": 2.410548753683743e-06, "epoch": 0.6903553299492385, "percentage": 69.02, "elapsed_time": "1 day, 0:38:04", "remaining_time": "11:03:28"} +{"current_steps": 1429, "total_steps": 2069, "loss": 0.5205, "lr": 2.4037137295545737e-06, "epoch": 0.6908387720570461, "percentage": 69.07, "elapsed_time": "1 day, 0:39:06", "remaining_time": "11:02:26"} +{"current_steps": 1430, "total_steps": 2069, "loss": 0.4804, "lr": 2.396885341742361e-06, "epoch": 0.6913222141648537, "percentage": 69.12, "elapsed_time": "1 day, 0:40:11", "remaining_time": "11:01:25"} +{"current_steps": 1431, "total_steps": 2069, "loss": 0.5194, "lr": 2.390063607701016e-06, "epoch": 0.6918056562726613, "percentage": 69.16, "elapsed_time": "1 day, 0:41:11", "remaining_time": "11:00:22"} +{"current_steps": 1432, "total_steps": 2069, "loss": 0.53, "lr": 2.3832485448674407e-06, "epoch": 0.6922890983804689, "percentage": 69.21, "elapsed_time": "1 day, 0:42:11", "remaining_time": "10:59:19"} +{"current_steps": 1433, "total_steps": 2069, "loss": 0.5144, "lr": 2.3764401706614832e-06, "epoch": 0.6927725404882765, "percentage": 69.26, "elapsed_time": "1 day, 0:43:14", "remaining_time": "10:58:17"} +{"current_steps": 1434, "total_steps": 2069, "loss": 0.5148, "lr": 2.369638502485897e-06, "epoch": 0.6932559825960841, "percentage": 69.31, "elapsed_time": "1 day, 0:44:14", "remaining_time": "10:57:15"} +{"current_steps": 1435, "total_steps": 2069, "loss": 0.5191, "lr": 2.3628435577262947e-06, "epoch": 0.6937394247038917, "percentage": 69.36, "elapsed_time": "1 day, 0:45:14", "remaining_time": "10:56:11"} +{"current_steps": 1436, "total_steps": 2069, "loss": 0.5021, "lr": 2.3560553537511043e-06, "epoch": 0.6942228668116993, "percentage": 69.41, "elapsed_time": "1 day, 0:46:18", "remaining_time": "10:55:10"} +{"current_steps": 1437, "total_steps": 2069, "loss": 0.5061, "lr": 2.3492739079115214e-06, "epoch": 0.6947063089195069, "percentage": 69.45, "elapsed_time": "1 day, 0:47:20", "remaining_time": "10:54:08"} +{"current_steps": 1438, "total_steps": 2069, "loss": 0.5133, "lr": 2.3424992375414655e-06, "epoch": 0.6951897510273145, "percentage": 69.5, "elapsed_time": "1 day, 0:48:22", "remaining_time": "10:53:06"} +{"current_steps": 1439, "total_steps": 2069, "loss": 0.5254, "lr": 2.3357313599575422e-06, "epoch": 0.6956731931351221, "percentage": 69.55, "elapsed_time": "1 day, 0:49:25", "remaining_time": "10:52:04"} +{"current_steps": 1440, "total_steps": 2069, "loss": 0.5143, "lr": 2.3289702924589914e-06, "epoch": 0.6961566352429297, "percentage": 69.6, "elapsed_time": "1 day, 0:50:26", "remaining_time": "10:51:01"} +{"current_steps": 1441, "total_steps": 2069, "loss": 0.5194, "lr": 2.3222160523276486e-06, "epoch": 0.6966400773507373, "percentage": 69.65, "elapsed_time": "1 day, 0:51:31", "remaining_time": "10:50:00"} +{"current_steps": 1442, "total_steps": 2069, "loss": 0.5315, "lr": 2.3154686568278933e-06, "epoch": 0.6971235194585449, "percentage": 69.7, "elapsed_time": "1 day, 0:52:27", "remaining_time": "10:48:56"} +{"current_steps": 1443, "total_steps": 2069, "loss": 0.5109, "lr": 2.3087281232066134e-06, "epoch": 0.6976069615663524, "percentage": 69.74, "elapsed_time": "1 day, 0:53:29", "remaining_time": "10:47:54"} +{"current_steps": 1444, "total_steps": 2069, "loss": 0.5256, "lr": 2.3019944686931554e-06, "epoch": 0.69809040367416, "percentage": 69.79, "elapsed_time": "1 day, 0:54:31", "remaining_time": "10:46:52"} +{"current_steps": 1445, "total_steps": 2069, "loss": 0.5287, "lr": 2.2952677104992855e-06, "epoch": 0.6985738457819676, "percentage": 69.84, "elapsed_time": "1 day, 0:55:35", "remaining_time": "10:45:50"} +{"current_steps": 1446, "total_steps": 2069, "loss": 0.5192, "lr": 2.2885478658191364e-06, "epoch": 0.6990572878897752, "percentage": 69.89, "elapsed_time": "1 day, 0:56:39", "remaining_time": "10:44:49"} +{"current_steps": 1447, "total_steps": 2069, "loss": 0.521, "lr": 2.281834951829174e-06, "epoch": 0.6995407299975828, "percentage": 69.94, "elapsed_time": "1 day, 0:57:43", "remaining_time": "10:43:48"} +{"current_steps": 1448, "total_steps": 2069, "loss": 0.4869, "lr": 2.2751289856881487e-06, "epoch": 0.7000241721053904, "percentage": 69.99, "elapsed_time": "1 day, 0:58:48", "remaining_time": "10:42:47"} +{"current_steps": 1449, "total_steps": 2069, "loss": 0.5216, "lr": 2.268429984537048e-06, "epoch": 0.700507614213198, "percentage": 70.03, "elapsed_time": "1 day, 0:59:50", "remaining_time": "10:41:45"} +{"current_steps": 1450, "total_steps": 2069, "loss": 0.5165, "lr": 2.2617379654990623e-06, "epoch": 0.7009910563210056, "percentage": 70.08, "elapsed_time": "1 day, 1:00:51", "remaining_time": "10:40:42"} +{"current_steps": 1451, "total_steps": 2069, "loss": 0.5183, "lr": 2.255052945679525e-06, "epoch": 0.7014744984288132, "percentage": 70.13, "elapsed_time": "1 day, 1:01:53", "remaining_time": "10:39:40"} +{"current_steps": 1452, "total_steps": 2069, "loss": 0.5231, "lr": 2.248374942165894e-06, "epoch": 0.7019579405366208, "percentage": 70.18, "elapsed_time": "1 day, 1:02:57", "remaining_time": "10:38:39"} +{"current_steps": 1453, "total_steps": 2069, "loss": 0.5168, "lr": 2.241703972027679e-06, "epoch": 0.7024413826444283, "percentage": 70.23, "elapsed_time": "1 day, 1:04:01", "remaining_time": "10:37:37"} +{"current_steps": 1454, "total_steps": 2069, "loss": 0.5158, "lr": 2.23504005231642e-06, "epoch": 0.7029248247522359, "percentage": 70.28, "elapsed_time": "1 day, 1:05:04", "remaining_time": "10:36:35"} +{"current_steps": 1455, "total_steps": 2069, "loss": 0.4941, "lr": 2.2283832000656304e-06, "epoch": 0.7034082668600435, "percentage": 70.32, "elapsed_time": "1 day, 1:06:06", "remaining_time": "10:35:34"} +{"current_steps": 1456, "total_steps": 2069, "loss": 0.5209, "lr": 2.221733432290762e-06, "epoch": 0.7038917089678511, "percentage": 70.37, "elapsed_time": "1 day, 1:07:10", "remaining_time": "10:34:32"} +{"current_steps": 1457, "total_steps": 2069, "loss": 0.5173, "lr": 2.2150907659891566e-06, "epoch": 0.7043751510756587, "percentage": 70.42, "elapsed_time": "1 day, 1:09:18", "remaining_time": "10:33:58"} +{"current_steps": 1458, "total_steps": 2069, "loss": 0.5186, "lr": 2.2084552181400087e-06, "epoch": 0.7048585931834663, "percentage": 70.47, "elapsed_time": "1 day, 1:10:22", "remaining_time": "10:32:56"} +{"current_steps": 1459, "total_steps": 2069, "loss": 0.5125, "lr": 2.201826805704308e-06, "epoch": 0.7053420352912738, "percentage": 70.52, "elapsed_time": "1 day, 1:11:23", "remaining_time": "10:31:54"} +{"current_steps": 1460, "total_steps": 2069, "loss": 0.5243, "lr": 2.195205545624813e-06, "epoch": 0.7058254773990814, "percentage": 70.57, "elapsed_time": "1 day, 1:12:27", "remaining_time": "10:30:52"} +{"current_steps": 1461, "total_steps": 2069, "loss": 0.5135, "lr": 2.188591454826e-06, "epoch": 0.706308919506889, "percentage": 70.61, "elapsed_time": "1 day, 1:13:30", "remaining_time": "10:29:51"} +{"current_steps": 1462, "total_steps": 2069, "loss": 0.5116, "lr": 2.181984550214015e-06, "epoch": 0.7067923616146966, "percentage": 70.66, "elapsed_time": "1 day, 1:14:27", "remaining_time": "10:28:46"} +{"current_steps": 1463, "total_steps": 2069, "loss": 0.5165, "lr": 2.175384848676639e-06, "epoch": 0.7072758037225042, "percentage": 70.71, "elapsed_time": "1 day, 1:15:30", "remaining_time": "10:27:44"} +{"current_steps": 1464, "total_steps": 2069, "loss": 0.5138, "lr": 2.168792367083243e-06, "epoch": 0.7077592458303118, "percentage": 70.76, "elapsed_time": "1 day, 1:16:29", "remaining_time": "10:26:41"} +{"current_steps": 1465, "total_steps": 2069, "loss": 0.5091, "lr": 2.162207122284742e-06, "epoch": 0.7082426879381194, "percentage": 70.81, "elapsed_time": "1 day, 1:17:29", "remaining_time": "10:25:38"} +{"current_steps": 1466, "total_steps": 2069, "loss": 0.5158, "lr": 2.155629131113549e-06, "epoch": 0.708726130045927, "percentage": 70.86, "elapsed_time": "1 day, 1:18:32", "remaining_time": "10:24:36"} +{"current_steps": 1467, "total_steps": 2069, "loss": 0.4847, "lr": 2.1490584103835433e-06, "epoch": 0.7092095721537346, "percentage": 70.9, "elapsed_time": "1 day, 1:19:32", "remaining_time": "10:23:33"} +{"current_steps": 1468, "total_steps": 2069, "loss": 0.5241, "lr": 2.142494976890011e-06, "epoch": 0.7096930142615422, "percentage": 70.95, "elapsed_time": "1 day, 1:20:35", "remaining_time": "10:22:31"} +{"current_steps": 1469, "total_steps": 2069, "loss": 0.5206, "lr": 2.135938847409625e-06, "epoch": 0.7101764563693498, "percentage": 71.0, "elapsed_time": "1 day, 1:21:36", "remaining_time": "10:21:29"} +{"current_steps": 1470, "total_steps": 2069, "loss": 0.4931, "lr": 2.1293900387003742e-06, "epoch": 0.7106598984771574, "percentage": 71.05, "elapsed_time": "1 day, 1:22:41", "remaining_time": "10:20:28"} +{"current_steps": 1471, "total_steps": 2069, "loss": 0.5204, "lr": 2.1228485675015455e-06, "epoch": 0.711143340584965, "percentage": 71.1, "elapsed_time": "1 day, 1:23:40", "remaining_time": "10:19:24"} +{"current_steps": 1472, "total_steps": 2069, "loss": 0.5219, "lr": 2.1163144505336634e-06, "epoch": 0.7116267826927726, "percentage": 71.15, "elapsed_time": "1 day, 1:24:42", "remaining_time": "10:18:22"} +{"current_steps": 1473, "total_steps": 2069, "loss": 0.519, "lr": 2.109787704498459e-06, "epoch": 0.7121102248005802, "percentage": 71.19, "elapsed_time": "1 day, 1:25:47", "remaining_time": "10:17:21"} +{"current_steps": 1474, "total_steps": 2069, "loss": 0.4979, "lr": 2.1032683460788223e-06, "epoch": 0.7125936669083878, "percentage": 71.24, "elapsed_time": "1 day, 1:26:50", "remaining_time": "10:16:19"} +{"current_steps": 1475, "total_steps": 2069, "loss": 0.5256, "lr": 2.0967563919387563e-06, "epoch": 0.7130771090161954, "percentage": 71.29, "elapsed_time": "1 day, 1:27:53", "remaining_time": "10:15:17"} +{"current_steps": 1476, "total_steps": 2069, "loss": 0.5195, "lr": 2.0902518587233418e-06, "epoch": 0.7135605511240029, "percentage": 71.34, "elapsed_time": "1 day, 1:28:53", "remaining_time": "10:14:15"} +{"current_steps": 1477, "total_steps": 2069, "loss": 0.5238, "lr": 2.08375476305869e-06, "epoch": 0.7140439932318104, "percentage": 71.39, "elapsed_time": "1 day, 1:29:55", "remaining_time": "10:13:12"} +{"current_steps": 1478, "total_steps": 2069, "loss": 0.4914, "lr": 2.077265121551903e-06, "epoch": 0.714527435339618, "percentage": 71.44, "elapsed_time": "1 day, 1:30:58", "remaining_time": "10:12:10"} +{"current_steps": 1479, "total_steps": 2069, "loss": 0.5224, "lr": 2.0707829507910237e-06, "epoch": 0.7150108774474256, "percentage": 71.48, "elapsed_time": "1 day, 1:32:00", "remaining_time": "10:11:08"} +{"current_steps": 1480, "total_steps": 2069, "loss": 0.5214, "lr": 2.0643082673450053e-06, "epoch": 0.7154943195552332, "percentage": 71.53, "elapsed_time": "1 day, 1:33:04", "remaining_time": "10:10:07"} +{"current_steps": 1481, "total_steps": 2069, "loss": 0.5098, "lr": 2.05784108776366e-06, "epoch": 0.7159777616630408, "percentage": 71.58, "elapsed_time": "1 day, 1:34:05", "remaining_time": "10:09:04"} +{"current_steps": 1482, "total_steps": 2069, "loss": 0.5213, "lr": 2.051381428577622e-06, "epoch": 0.7164612037708484, "percentage": 71.63, "elapsed_time": "1 day, 1:35:05", "remaining_time": "10:08:01"} +{"current_steps": 1483, "total_steps": 2069, "loss": 0.5169, "lr": 2.044929306298298e-06, "epoch": 0.716944645878656, "percentage": 71.68, "elapsed_time": "1 day, 1:36:07", "remaining_time": "10:06:59"} +{"current_steps": 1484, "total_steps": 2069, "loss": 0.5214, "lr": 2.0384847374178346e-06, "epoch": 0.7174280879864636, "percentage": 71.73, "elapsed_time": "1 day, 1:37:06", "remaining_time": "10:05:56"} +{"current_steps": 1485, "total_steps": 2069, "loss": 0.5002, "lr": 2.0320477384090665e-06, "epoch": 0.7179115300942712, "percentage": 71.77, "elapsed_time": "1 day, 1:38:12", "remaining_time": "10:04:55"} +{"current_steps": 1486, "total_steps": 2069, "loss": 0.5057, "lr": 2.0256183257254837e-06, "epoch": 0.7183949722020788, "percentage": 71.82, "elapsed_time": "1 day, 1:39:11", "remaining_time": "10:03:51"} +{"current_steps": 1487, "total_steps": 2069, "loss": 0.4815, "lr": 2.0191965158011854e-06, "epoch": 0.7188784143098864, "percentage": 71.87, "elapsed_time": "1 day, 1:40:22", "remaining_time": "10:02:53"} +{"current_steps": 1488, "total_steps": 2069, "loss": 0.5283, "lr": 2.012782325050831e-06, "epoch": 0.719361856417694, "percentage": 71.92, "elapsed_time": "1 day, 1:41:24", "remaining_time": "10:01:51"} +{"current_steps": 1489, "total_steps": 2069, "loss": 0.522, "lr": 2.006375769869611e-06, "epoch": 0.7198452985255016, "percentage": 71.97, "elapsed_time": "1 day, 1:42:23", "remaining_time": "10:00:47"} +{"current_steps": 1490, "total_steps": 2069, "loss": 0.5132, "lr": 1.9999768666331974e-06, "epoch": 0.7203287406333092, "percentage": 72.02, "elapsed_time": "1 day, 1:43:24", "remaining_time": "9:59:45"} +{"current_steps": 1491, "total_steps": 2069, "loss": 0.4938, "lr": 1.9935856316977044e-06, "epoch": 0.7208121827411168, "percentage": 72.06, "elapsed_time": "1 day, 1:44:28", "remaining_time": "9:58:43"} +{"current_steps": 1492, "total_steps": 2069, "loss": 0.5251, "lr": 1.987202081399639e-06, "epoch": 0.7212956248489243, "percentage": 72.11, "elapsed_time": "1 day, 1:45:30", "remaining_time": "9:57:41"} +{"current_steps": 1493, "total_steps": 2069, "loss": 0.506, "lr": 1.9808262320558724e-06, "epoch": 0.7217790669567319, "percentage": 72.16, "elapsed_time": "1 day, 1:46:31", "remaining_time": "9:56:39"} +{"current_steps": 1494, "total_steps": 2069, "loss": 0.5143, "lr": 1.9744580999635902e-06, "epoch": 0.7222625090645395, "percentage": 72.21, "elapsed_time": "1 day, 1:47:35", "remaining_time": "9:55:37"} +{"current_steps": 1495, "total_steps": 2069, "loss": 0.5245, "lr": 1.968097701400252e-06, "epoch": 0.7227459511723471, "percentage": 72.26, "elapsed_time": "1 day, 1:48:37", "remaining_time": "9:54:35"} +{"current_steps": 1496, "total_steps": 2069, "loss": 0.5178, "lr": 1.9617450526235464e-06, "epoch": 0.7232293932801547, "percentage": 72.31, "elapsed_time": "1 day, 1:49:38", "remaining_time": "9:53:32"} +{"current_steps": 1497, "total_steps": 2069, "loss": 0.5131, "lr": 1.9554001698713572e-06, "epoch": 0.7237128353879623, "percentage": 72.35, "elapsed_time": "1 day, 1:50:42", "remaining_time": "9:52:31"} +{"current_steps": 1498, "total_steps": 2069, "loss": 0.5136, "lr": 1.949063069361717e-06, "epoch": 0.7241962774957699, "percentage": 72.4, "elapsed_time": "1 day, 1:51:41", "remaining_time": "9:51:28"} +{"current_steps": 1499, "total_steps": 2069, "loss": 0.5146, "lr": 1.9427337672927632e-06, "epoch": 0.7246797196035775, "percentage": 72.45, "elapsed_time": "1 day, 1:52:38", "remaining_time": "9:50:23"} +{"current_steps": 1500, "total_steps": 2069, "loss": 0.4913, "lr": 1.936412279842705e-06, "epoch": 0.7251631617113851, "percentage": 72.5, "elapsed_time": "1 day, 1:53:42", "remaining_time": "9:49:22"} +{"current_steps": 1501, "total_steps": 2069, "loss": 0.5175, "lr": 1.9300986231697705e-06, "epoch": 0.7256466038191927, "percentage": 72.55, "elapsed_time": "1 day, 1:54:42", "remaining_time": "9:48:19"} +{"current_steps": 1502, "total_steps": 2069, "loss": 0.516, "lr": 1.9237928134121757e-06, "epoch": 0.7261300459270003, "percentage": 72.6, "elapsed_time": "1 day, 1:55:47", "remaining_time": "9:47:18"} +{"current_steps": 1503, "total_steps": 2069, "loss": 0.5155, "lr": 1.9174948666880805e-06, "epoch": 0.7266134880348079, "percentage": 72.64, "elapsed_time": "1 day, 1:56:48", "remaining_time": "9:46:15"} +{"current_steps": 1504, "total_steps": 2069, "loss": 0.5136, "lr": 1.9112047990955446e-06, "epoch": 0.7270969301426154, "percentage": 72.69, "elapsed_time": "1 day, 1:57:48", "remaining_time": "9:45:12"} +{"current_steps": 1505, "total_steps": 2069, "loss": 0.5172, "lr": 1.9049226267124844e-06, "epoch": 0.727580372250423, "percentage": 72.74, "elapsed_time": "1 day, 1:58:51", "remaining_time": "9:44:11"} +{"current_steps": 1506, "total_steps": 2069, "loss": 0.5179, "lr": 1.8986483655966408e-06, "epoch": 0.7280638143582306, "percentage": 72.79, "elapsed_time": "1 day, 1:59:53", "remaining_time": "9:43:08"} +{"current_steps": 1507, "total_steps": 2069, "loss": 0.5076, "lr": 1.8923820317855307e-06, "epoch": 0.7285472564660382, "percentage": 72.84, "elapsed_time": "1 day, 2:00:52", "remaining_time": "9:42:05"} +{"current_steps": 1508, "total_steps": 2069, "loss": 0.5172, "lr": 1.8861236412964106e-06, "epoch": 0.7290306985738458, "percentage": 72.89, "elapsed_time": "1 day, 2:01:52", "remaining_time": "9:41:02"} +{"current_steps": 1509, "total_steps": 2069, "loss": 0.5259, "lr": 1.879873210126229e-06, "epoch": 0.7295141406816533, "percentage": 72.93, "elapsed_time": "1 day, 2:02:49", "remaining_time": "9:39:58"} +{"current_steps": 1510, "total_steps": 2069, "loss": 0.5177, "lr": 1.873630754251588e-06, "epoch": 0.7299975827894609, "percentage": 72.98, "elapsed_time": "1 day, 2:03:52", "remaining_time": "9:38:56"} +{"current_steps": 1511, "total_steps": 2069, "loss": 0.5201, "lr": 1.8673962896287152e-06, "epoch": 0.7304810248972685, "percentage": 73.03, "elapsed_time": "1 day, 2:04:55", "remaining_time": "9:37:54"} +{"current_steps": 1512, "total_steps": 2069, "loss": 0.5186, "lr": 1.8611698321933991e-06, "epoch": 0.7309644670050761, "percentage": 73.08, "elapsed_time": "1 day, 2:05:55", "remaining_time": "9:36:52"} +{"current_steps": 1513, "total_steps": 2069, "loss": 0.5111, "lr": 1.8549513978609707e-06, "epoch": 0.7314479091128837, "percentage": 73.13, "elapsed_time": "1 day, 2:06:58", "remaining_time": "9:35:49"} +{"current_steps": 1514, "total_steps": 2069, "loss": 0.5103, "lr": 1.8487410025262436e-06, "epoch": 0.7319313512206913, "percentage": 73.18, "elapsed_time": "1 day, 2:07:58", "remaining_time": "9:34:47"} +{"current_steps": 1515, "total_steps": 2069, "loss": 0.5167, "lr": 1.8425386620634961e-06, "epoch": 0.7324147933284989, "percentage": 73.22, "elapsed_time": "1 day, 2:08:57", "remaining_time": "9:33:43"} +{"current_steps": 1516, "total_steps": 2069, "loss": 0.5125, "lr": 1.8363443923264046e-06, "epoch": 0.7328982354363065, "percentage": 73.27, "elapsed_time": "1 day, 2:09:58", "remaining_time": "9:32:41"} +{"current_steps": 1517, "total_steps": 2069, "loss": 0.5217, "lr": 1.8301582091480264e-06, "epoch": 0.7333816775441141, "percentage": 73.32, "elapsed_time": "1 day, 2:10:58", "remaining_time": "9:31:38"} +{"current_steps": 1518, "total_steps": 2069, "loss": 0.5164, "lr": 1.8239801283407393e-06, "epoch": 0.7338651196519217, "percentage": 73.37, "elapsed_time": "1 day, 2:11:56", "remaining_time": "9:30:34"} +{"current_steps": 1519, "total_steps": 2069, "loss": 0.5205, "lr": 1.8178101656962188e-06, "epoch": 0.7343485617597293, "percentage": 73.42, "elapsed_time": "1 day, 2:12:57", "remaining_time": "9:29:32"} +{"current_steps": 1520, "total_steps": 2069, "loss": 0.4835, "lr": 1.8116483369853853e-06, "epoch": 0.7348320038675369, "percentage": 73.47, "elapsed_time": "1 day, 2:14:04", "remaining_time": "9:28:31"} +{"current_steps": 1521, "total_steps": 2069, "loss": 0.5143, "lr": 1.8054946579583732e-06, "epoch": 0.7353154459753445, "percentage": 73.51, "elapsed_time": "1 day, 2:15:07", "remaining_time": "9:27:29"} +{"current_steps": 1522, "total_steps": 2069, "loss": 0.5129, "lr": 1.7993491443444771e-06, "epoch": 0.7357988880831521, "percentage": 73.56, "elapsed_time": "1 day, 2:16:10", "remaining_time": "9:26:28"} +{"current_steps": 1523, "total_steps": 2069, "loss": 0.5131, "lr": 1.7932118118521274e-06, "epoch": 0.7362823301909597, "percentage": 73.61, "elapsed_time": "1 day, 2:17:13", "remaining_time": "9:25:26"} +{"current_steps": 1524, "total_steps": 2069, "loss": 0.5268, "lr": 1.787082676168842e-06, "epoch": 0.7367657722987673, "percentage": 73.66, "elapsed_time": "1 day, 2:18:13", "remaining_time": "9:24:23"} +{"current_steps": 1525, "total_steps": 2069, "loss": 0.5126, "lr": 1.7809617529611828e-06, "epoch": 0.7372492144065748, "percentage": 73.71, "elapsed_time": "1 day, 2:19:14", "remaining_time": "9:23:20"} +{"current_steps": 1526, "total_steps": 2069, "loss": 0.4945, "lr": 1.7748490578747257e-06, "epoch": 0.7377326565143824, "percentage": 73.76, "elapsed_time": "1 day, 2:20:17", "remaining_time": "9:22:19"} +{"current_steps": 1527, "total_steps": 2069, "loss": 0.5189, "lr": 1.7687446065340074e-06, "epoch": 0.73821609862219, "percentage": 73.8, "elapsed_time": "1 day, 2:21:19", "remaining_time": "9:21:16"} +{"current_steps": 1528, "total_steps": 2069, "loss": 0.5117, "lr": 1.7626484145425038e-06, "epoch": 0.7386995407299976, "percentage": 73.85, "elapsed_time": "1 day, 2:22:22", "remaining_time": "9:20:15"} +{"current_steps": 1529, "total_steps": 2069, "loss": 0.4917, "lr": 1.7565604974825678e-06, "epoch": 0.7391829828378051, "percentage": 73.9, "elapsed_time": "1 day, 2:23:25", "remaining_time": "9:19:13"} +{"current_steps": 1530, "total_steps": 2069, "loss": 0.5187, "lr": 1.7504808709154104e-06, "epoch": 0.7396664249456127, "percentage": 73.95, "elapsed_time": "1 day, 2:24:25", "remaining_time": "9:18:10"} +{"current_steps": 1531, "total_steps": 2069, "loss": 0.529, "lr": 1.744409550381041e-06, "epoch": 0.7401498670534203, "percentage": 74.0, "elapsed_time": "1 day, 2:25:26", "remaining_time": "9:17:07"} +{"current_steps": 1532, "total_steps": 2069, "loss": 0.4906, "lr": 1.7383465513982517e-06, "epoch": 0.7406333091612279, "percentage": 74.05, "elapsed_time": "1 day, 2:26:30", "remaining_time": "9:16:06"} +{"current_steps": 1533, "total_steps": 2069, "loss": 0.5209, "lr": 1.7322918894645525e-06, "epoch": 0.7411167512690355, "percentage": 74.09, "elapsed_time": "1 day, 2:27:32", "remaining_time": "9:15:04"} +{"current_steps": 1534, "total_steps": 2069, "loss": 0.529, "lr": 1.7262455800561456e-06, "epoch": 0.7416001933768431, "percentage": 74.14, "elapsed_time": "1 day, 2:28:34", "remaining_time": "9:14:01"} +{"current_steps": 1535, "total_steps": 2069, "loss": 0.5218, "lr": 1.7202076386278876e-06, "epoch": 0.7420836354846507, "percentage": 74.19, "elapsed_time": "1 day, 2:29:34", "remaining_time": "9:12:59"} +{"current_steps": 1536, "total_steps": 2069, "loss": 0.5038, "lr": 1.7141780806132429e-06, "epoch": 0.7425670775924583, "percentage": 74.24, "elapsed_time": "1 day, 2:30:36", "remaining_time": "9:11:56"} +{"current_steps": 1537, "total_steps": 2069, "loss": 0.5094, "lr": 1.70815692142425e-06, "epoch": 0.7430505197002659, "percentage": 74.29, "elapsed_time": "1 day, 2:31:37", "remaining_time": "9:10:54"} +{"current_steps": 1538, "total_steps": 2069, "loss": 0.4909, "lr": 1.702144176451473e-06, "epoch": 0.7435339618080735, "percentage": 74.34, "elapsed_time": "1 day, 2:32:37", "remaining_time": "9:09:51"} +{"current_steps": 1539, "total_steps": 2069, "loss": 0.5231, "lr": 1.696139861063974e-06, "epoch": 0.7440174039158811, "percentage": 74.38, "elapsed_time": "1 day, 2:33:38", "remaining_time": "9:08:49"} +{"current_steps": 1540, "total_steps": 2069, "loss": 0.5116, "lr": 1.690143990609268e-06, "epoch": 0.7445008460236887, "percentage": 74.43, "elapsed_time": "1 day, 2:34:40", "remaining_time": "9:07:46"} +{"current_steps": 1541, "total_steps": 2069, "loss": 0.5159, "lr": 1.6841565804132843e-06, "epoch": 0.7449842881314963, "percentage": 74.48, "elapsed_time": "1 day, 2:35:41", "remaining_time": "9:06:44"} +{"current_steps": 1542, "total_steps": 2069, "loss": 0.5146, "lr": 1.6781776457803227e-06, "epoch": 0.7454677302393038, "percentage": 74.53, "elapsed_time": "1 day, 2:36:45", "remaining_time": "9:05:42"} +{"current_steps": 1543, "total_steps": 2069, "loss": 0.4841, "lr": 1.6722072019930242e-06, "epoch": 0.7459511723471114, "percentage": 74.58, "elapsed_time": "1 day, 2:37:48", "remaining_time": "9:04:41"} +{"current_steps": 1544, "total_steps": 2069, "loss": 0.5181, "lr": 1.6662452643123234e-06, "epoch": 0.746434614454919, "percentage": 74.63, "elapsed_time": "1 day, 2:38:51", "remaining_time": "9:03:39"} +{"current_steps": 1545, "total_steps": 2069, "loss": 0.5056, "lr": 1.660291847977415e-06, "epoch": 0.7469180565627266, "percentage": 74.67, "elapsed_time": "1 day, 2:39:54", "remaining_time": "9:02:37"} +{"current_steps": 1546, "total_steps": 2069, "loss": 0.5102, "lr": 1.6543469682057105e-06, "epoch": 0.7474014986705342, "percentage": 74.72, "elapsed_time": "1 day, 2:40:52", "remaining_time": "9:01:33"} +{"current_steps": 1547, "total_steps": 2069, "loss": 0.517, "lr": 1.6484106401927991e-06, "epoch": 0.7478849407783418, "percentage": 74.77, "elapsed_time": "1 day, 2:41:54", "remaining_time": "9:00:31"} +{"current_steps": 1548, "total_steps": 2069, "loss": 0.5162, "lr": 1.6424828791124159e-06, "epoch": 0.7483683828861494, "percentage": 74.82, "elapsed_time": "1 day, 2:43:01", "remaining_time": "8:59:31"} +{"current_steps": 1549, "total_steps": 2069, "loss": 0.4694, "lr": 1.6365637001163958e-06, "epoch": 0.748851824993957, "percentage": 74.87, "elapsed_time": "1 day, 2:44:04", "remaining_time": "8:58:29"} +{"current_steps": 1550, "total_steps": 2069, "loss": 0.5172, "lr": 1.6306531183346387e-06, "epoch": 0.7493352671017646, "percentage": 74.92, "elapsed_time": "1 day, 2:45:03", "remaining_time": "8:57:25"} +{"current_steps": 1551, "total_steps": 2069, "loss": 0.5227, "lr": 1.624751148875065e-06, "epoch": 0.7498187092095722, "percentage": 74.96, "elapsed_time": "1 day, 2:46:02", "remaining_time": "8:56:23"} +{"current_steps": 1552, "total_steps": 2069, "loss": 0.5227, "lr": 1.6188578068235855e-06, "epoch": 0.7503021513173798, "percentage": 75.01, "elapsed_time": "1 day, 2:47:04", "remaining_time": "8:55:20"} +{"current_steps": 1553, "total_steps": 2069, "loss": 0.5197, "lr": 1.6129731072440586e-06, "epoch": 0.7507855934251874, "percentage": 75.06, "elapsed_time": "1 day, 2:48:05", "remaining_time": "8:54:18"} +{"current_steps": 1554, "total_steps": 2069, "loss": 0.5234, "lr": 1.6070970651782514e-06, "epoch": 0.751269035532995, "percentage": 75.11, "elapsed_time": "1 day, 2:49:06", "remaining_time": "8:53:15"} +{"current_steps": 1555, "total_steps": 2069, "loss": 0.5224, "lr": 1.6012296956457972e-06, "epoch": 0.7517524776408026, "percentage": 75.16, "elapsed_time": "1 day, 2:50:08", "remaining_time": "8:52:13"} +{"current_steps": 1556, "total_steps": 2069, "loss": 0.5222, "lr": 1.5953710136441685e-06, "epoch": 0.7522359197486101, "percentage": 75.21, "elapsed_time": "1 day, 2:51:10", "remaining_time": "8:51:11"} +{"current_steps": 1557, "total_steps": 2069, "loss": 0.4697, "lr": 1.5895210341486279e-06, "epoch": 0.7527193618564177, "percentage": 75.25, "elapsed_time": "1 day, 2:52:14", "remaining_time": "8:50:09"} +{"current_steps": 1558, "total_steps": 2069, "loss": 0.5256, "lr": 1.583679772112196e-06, "epoch": 0.7532028039642252, "percentage": 75.3, "elapsed_time": "1 day, 2:53:11", "remaining_time": "8:49:06"} +{"current_steps": 1559, "total_steps": 2069, "loss": 0.5185, "lr": 1.5778472424656083e-06, "epoch": 0.7536862460720328, "percentage": 75.35, "elapsed_time": "1 day, 2:54:12", "remaining_time": "8:48:03"} +{"current_steps": 1560, "total_steps": 2069, "loss": 0.5203, "lr": 1.5720234601172767e-06, "epoch": 0.7541696881798404, "percentage": 75.4, "elapsed_time": "1 day, 2:55:12", "remaining_time": "8:47:00"} +{"current_steps": 1561, "total_steps": 2069, "loss": 0.5189, "lr": 1.566208439953265e-06, "epoch": 0.754653130287648, "percentage": 75.45, "elapsed_time": "1 day, 2:56:10", "remaining_time": "8:45:57"} +{"current_steps": 1562, "total_steps": 2069, "loss": 0.5111, "lr": 1.5604021968372286e-06, "epoch": 0.7551365723954556, "percentage": 75.5, "elapsed_time": "1 day, 2:57:15", "remaining_time": "8:44:56"} +{"current_steps": 1563, "total_steps": 2069, "loss": 0.5147, "lr": 1.5546047456103964e-06, "epoch": 0.7556200145032632, "percentage": 75.54, "elapsed_time": "1 day, 2:58:17", "remaining_time": "8:43:54"} +{"current_steps": 1564, "total_steps": 2069, "loss": 0.5149, "lr": 1.548816101091517e-06, "epoch": 0.7561034566110708, "percentage": 75.59, "elapsed_time": "1 day, 2:59:20", "remaining_time": "8:42:52"} +{"current_steps": 1565, "total_steps": 2069, "loss": 0.5117, "lr": 1.5430362780768343e-06, "epoch": 0.7565868987188784, "percentage": 75.64, "elapsed_time": "1 day, 3:00:23", "remaining_time": "8:41:50"} +{"current_steps": 1566, "total_steps": 2069, "loss": 0.5074, "lr": 1.537265291340042e-06, "epoch": 0.757070340826686, "percentage": 75.69, "elapsed_time": "1 day, 3:01:24", "remaining_time": "8:40:47"} +{"current_steps": 1567, "total_steps": 2069, "loss": 0.5223, "lr": 1.531503155632249e-06, "epoch": 0.7575537829344936, "percentage": 75.74, "elapsed_time": "1 day, 3:02:22", "remaining_time": "8:39:44"} +{"current_steps": 1568, "total_steps": 2069, "loss": 0.5158, "lr": 1.5257498856819353e-06, "epoch": 0.7580372250423012, "percentage": 75.79, "elapsed_time": "1 day, 3:03:26", "remaining_time": "8:38:42"} +{"current_steps": 1569, "total_steps": 2069, "loss": 0.5049, "lr": 1.5200054961949233e-06, "epoch": 0.7585206671501088, "percentage": 75.83, "elapsed_time": "1 day, 3:04:25", "remaining_time": "8:37:39"} +{"current_steps": 1570, "total_steps": 2069, "loss": 0.5305, "lr": 1.5142700018543382e-06, "epoch": 0.7590041092579164, "percentage": 75.88, "elapsed_time": "1 day, 3:05:26", "remaining_time": "8:36:37"} +{"current_steps": 1571, "total_steps": 2069, "loss": 0.5212, "lr": 1.508543417320562e-06, "epoch": 0.759487551365724, "percentage": 75.93, "elapsed_time": "1 day, 3:06:27", "remaining_time": "8:35:34"} +{"current_steps": 1572, "total_steps": 2069, "loss": 0.4883, "lr": 1.5028257572312105e-06, "epoch": 0.7599709934735316, "percentage": 75.98, "elapsed_time": "1 day, 3:07:33", "remaining_time": "8:34:34"} +{"current_steps": 1573, "total_steps": 2069, "loss": 0.5225, "lr": 1.4971170362010774e-06, "epoch": 0.7604544355813392, "percentage": 76.03, "elapsed_time": "1 day, 3:08:33", "remaining_time": "8:33:31"} +{"current_steps": 1574, "total_steps": 2069, "loss": 0.5195, "lr": 1.4914172688221213e-06, "epoch": 0.7609378776891468, "percentage": 76.08, "elapsed_time": "1 day, 3:09:39", "remaining_time": "8:32:30"} +{"current_steps": 1575, "total_steps": 2069, "loss": 0.5294, "lr": 1.485726469663401e-06, "epoch": 0.7614213197969543, "percentage": 76.12, "elapsed_time": "1 day, 3:10:42", "remaining_time": "8:31:28"} +{"current_steps": 1576, "total_steps": 2069, "loss": 0.5143, "lr": 1.4800446532710627e-06, "epoch": 0.7619047619047619, "percentage": 76.17, "elapsed_time": "1 day, 3:11:45", "remaining_time": "8:30:26"} +{"current_steps": 1577, "total_steps": 2069, "loss": 0.5242, "lr": 1.4743718341682806e-06, "epoch": 0.7623882040125695, "percentage": 76.22, "elapsed_time": "1 day, 3:12:50", "remaining_time": "8:29:25"} +{"current_steps": 1578, "total_steps": 2069, "loss": 0.4927, "lr": 1.468708026855245e-06, "epoch": 0.7628716461203771, "percentage": 76.27, "elapsed_time": "1 day, 3:13:53", "remaining_time": "8:28:23"} +{"current_steps": 1579, "total_steps": 2069, "loss": 0.5248, "lr": 1.463053245809099e-06, "epoch": 0.7633550882281847, "percentage": 76.32, "elapsed_time": "1 day, 3:14:57", "remaining_time": "8:27:22"} +{"current_steps": 1580, "total_steps": 2069, "loss": 0.5187, "lr": 1.457407505483921e-06, "epoch": 0.7638385303359922, "percentage": 76.37, "elapsed_time": "1 day, 3:15:57", "remaining_time": "8:26:19"} +{"current_steps": 1581, "total_steps": 2069, "loss": 0.523, "lr": 1.4517708203106763e-06, "epoch": 0.7643219724437998, "percentage": 76.41, "elapsed_time": "1 day, 3:17:02", "remaining_time": "8:25:17"} +{"current_steps": 1582, "total_steps": 2069, "loss": 0.5233, "lr": 1.446143204697187e-06, "epoch": 0.7648054145516074, "percentage": 76.46, "elapsed_time": "1 day, 3:18:02", "remaining_time": "8:24:14"} +{"current_steps": 1583, "total_steps": 2069, "loss": 0.5172, "lr": 1.4405246730280946e-06, "epoch": 0.765288856659415, "percentage": 76.51, "elapsed_time": "1 day, 3:19:03", "remaining_time": "8:23:12"} +{"current_steps": 1584, "total_steps": 2069, "loss": 0.5183, "lr": 1.4349152396648153e-06, "epoch": 0.7657722987672226, "percentage": 76.56, "elapsed_time": "1 day, 3:20:04", "remaining_time": "8:22:10"} +{"current_steps": 1585, "total_steps": 2069, "loss": 0.5161, "lr": 1.4293149189455146e-06, "epoch": 0.7662557408750302, "percentage": 76.61, "elapsed_time": "1 day, 3:21:04", "remaining_time": "8:21:07"} +{"current_steps": 1586, "total_steps": 2069, "loss": 0.5107, "lr": 1.4237237251850634e-06, "epoch": 0.7667391829828378, "percentage": 76.66, "elapsed_time": "1 day, 3:22:09", "remaining_time": "8:20:06"} +{"current_steps": 1587, "total_steps": 2069, "loss": 0.5146, "lr": 1.4181416726750052e-06, "epoch": 0.7672226250906454, "percentage": 76.7, "elapsed_time": "1 day, 3:23:12", "remaining_time": "8:19:04"} +{"current_steps": 1588, "total_steps": 2069, "loss": 0.4812, "lr": 1.4125687756835132e-06, "epoch": 0.767706067198453, "percentage": 76.75, "elapsed_time": "1 day, 3:24:18", "remaining_time": "8:18:03"} +{"current_steps": 1589, "total_steps": 2069, "loss": 0.5129, "lr": 1.4070050484553644e-06, "epoch": 0.7681895093062606, "percentage": 76.8, "elapsed_time": "1 day, 3:25:17", "remaining_time": "8:17:00"} +{"current_steps": 1590, "total_steps": 2069, "loss": 0.5236, "lr": 1.4014505052118893e-06, "epoch": 0.7686729514140682, "percentage": 76.85, "elapsed_time": "1 day, 3:26:17", "remaining_time": "8:15:57"} +{"current_steps": 1591, "total_steps": 2069, "loss": 0.5345, "lr": 1.3959051601509537e-06, "epoch": 0.7691563935218757, "percentage": 76.9, "elapsed_time": "1 day, 3:27:17", "remaining_time": "8:14:54"} +{"current_steps": 1592, "total_steps": 2069, "loss": 0.5115, "lr": 1.3903690274469029e-06, "epoch": 0.7696398356296833, "percentage": 76.95, "elapsed_time": "1 day, 3:28:20", "remaining_time": "8:13:52"} +{"current_steps": 1593, "total_steps": 2069, "loss": 0.5168, "lr": 1.3848421212505404e-06, "epoch": 0.7701232777374909, "percentage": 76.99, "elapsed_time": "1 day, 3:29:26", "remaining_time": "8:12:51"} +{"current_steps": 1594, "total_steps": 2069, "loss": 0.5125, "lr": 1.37932445568908e-06, "epoch": 0.7706067198452985, "percentage": 77.04, "elapsed_time": "1 day, 3:30:30", "remaining_time": "8:11:50"} +{"current_steps": 1595, "total_steps": 2069, "loss": 0.5267, "lr": 1.3738160448661253e-06, "epoch": 0.7710901619531061, "percentage": 77.09, "elapsed_time": "1 day, 3:31:32", "remaining_time": "8:10:48"} +{"current_steps": 1596, "total_steps": 2069, "loss": 0.5178, "lr": 1.3683169028616155e-06, "epoch": 0.7715736040609137, "percentage": 77.14, "elapsed_time": "1 day, 3:32:37", "remaining_time": "8:09:46"} +{"current_steps": 1597, "total_steps": 2069, "loss": 0.5211, "lr": 1.3628270437317993e-06, "epoch": 0.7720570461687213, "percentage": 77.19, "elapsed_time": "1 day, 3:33:36", "remaining_time": "8:08:43"} +{"current_steps": 1598, "total_steps": 2069, "loss": 0.5055, "lr": 1.3573464815092003e-06, "epoch": 0.7725404882765289, "percentage": 77.24, "elapsed_time": "1 day, 3:34:36", "remaining_time": "8:07:40"} +{"current_steps": 1599, "total_steps": 2069, "loss": 0.5279, "lr": 1.3518752302025773e-06, "epoch": 0.7730239303843365, "percentage": 77.28, "elapsed_time": "1 day, 3:35:32", "remaining_time": "8:06:37"} +{"current_steps": 1600, "total_steps": 2069, "loss": 0.5239, "lr": 1.3464133037968914e-06, "epoch": 0.7735073724921441, "percentage": 77.33, "elapsed_time": "1 day, 3:36:38", "remaining_time": "8:05:36"} +{"current_steps": 1601, "total_steps": 2069, "loss": 0.4987, "lr": 1.3409607162532628e-06, "epoch": 0.7739908145999517, "percentage": 77.38, "elapsed_time": "1 day, 3:37:44", "remaining_time": "8:04:35"} +{"current_steps": 1602, "total_steps": 2069, "loss": 0.5273, "lr": 1.3355174815089477e-06, "epoch": 0.7744742567077593, "percentage": 77.43, "elapsed_time": "1 day, 3:38:48", "remaining_time": "8:03:33"} +{"current_steps": 1603, "total_steps": 2069, "loss": 0.5162, "lr": 1.3300836134772916e-06, "epoch": 0.7749576988155669, "percentage": 77.48, "elapsed_time": "1 day, 3:39:50", "remaining_time": "8:02:31"} +{"current_steps": 1604, "total_steps": 2069, "loss": 0.5167, "lr": 1.3246591260477015e-06, "epoch": 0.7754411409233745, "percentage": 77.53, "elapsed_time": "1 day, 3:40:49", "remaining_time": "8:01:28"} +{"current_steps": 1605, "total_steps": 2069, "loss": 0.5251, "lr": 1.3192440330856005e-06, "epoch": 0.7759245830311821, "percentage": 77.57, "elapsed_time": "1 day, 3:41:49", "remaining_time": "8:00:25"} +{"current_steps": 1606, "total_steps": 2069, "loss": 0.5252, "lr": 1.3138383484324063e-06, "epoch": 0.7764080251389897, "percentage": 77.62, "elapsed_time": "1 day, 3:42:49", "remaining_time": "7:59:23"} +{"current_steps": 1607, "total_steps": 2069, "loss": 0.5101, "lr": 1.308442085905482e-06, "epoch": 0.7768914672467973, "percentage": 77.67, "elapsed_time": "1 day, 3:43:52", "remaining_time": "7:58:21"} +{"current_steps": 1608, "total_steps": 2069, "loss": 0.5224, "lr": 1.30305525929811e-06, "epoch": 0.7773749093546047, "percentage": 77.72, "elapsed_time": "1 day, 3:44:55", "remaining_time": "7:57:19"} +{"current_steps": 1609, "total_steps": 2069, "loss": 0.5191, "lr": 1.297677882379455e-06, "epoch": 0.7778583514624123, "percentage": 77.77, "elapsed_time": "1 day, 3:45:52", "remaining_time": "7:56:15"} +{"current_steps": 1610, "total_steps": 2069, "loss": 0.5096, "lr": 1.2923099688945234e-06, "epoch": 0.7783417935702199, "percentage": 77.82, "elapsed_time": "1 day, 3:46:55", "remaining_time": "7:55:13"} +{"current_steps": 1611, "total_steps": 2069, "loss": 0.4812, "lr": 1.2869515325641357e-06, "epoch": 0.7788252356780275, "percentage": 77.86, "elapsed_time": "1 day, 3:48:00", "remaining_time": "7:54:12"} +{"current_steps": 1612, "total_steps": 2069, "loss": 0.5211, "lr": 1.281602587084887e-06, "epoch": 0.7793086777858351, "percentage": 77.91, "elapsed_time": "1 day, 3:49:01", "remaining_time": "7:53:10"} +{"current_steps": 1613, "total_steps": 2069, "loss": 0.5294, "lr": 1.2762631461291148e-06, "epoch": 0.7797921198936427, "percentage": 77.96, "elapsed_time": "1 day, 3:50:00", "remaining_time": "7:52:06"} +{"current_steps": 1614, "total_steps": 2069, "loss": 0.5096, "lr": 1.2709332233448573e-06, "epoch": 0.7802755620014503, "percentage": 78.01, "elapsed_time": "1 day, 3:51:01", "remaining_time": "7:51:04"} +{"current_steps": 1615, "total_steps": 2069, "loss": 0.5135, "lr": 1.2656128323558286e-06, "epoch": 0.7807590041092579, "percentage": 78.06, "elapsed_time": "1 day, 3:52:03", "remaining_time": "7:50:02"} +{"current_steps": 1616, "total_steps": 2069, "loss": 0.5162, "lr": 1.2603019867613764e-06, "epoch": 0.7812424462170655, "percentage": 78.11, "elapsed_time": "1 day, 3:53:01", "remaining_time": "7:48:59"} +{"current_steps": 1617, "total_steps": 2069, "loss": 0.5064, "lr": 1.2550007001364518e-06, "epoch": 0.7817258883248731, "percentage": 78.15, "elapsed_time": "1 day, 3:54:04", "remaining_time": "7:47:57"} +{"current_steps": 1618, "total_steps": 2069, "loss": 0.5057, "lr": 1.2497089860315675e-06, "epoch": 0.7822093304326807, "percentage": 78.2, "elapsed_time": "1 day, 3:55:09", "remaining_time": "7:46:55"} +{"current_steps": 1619, "total_steps": 2069, "loss": 0.5125, "lr": 1.244426857972773e-06, "epoch": 0.7826927725404883, "percentage": 78.25, "elapsed_time": "1 day, 3:56:13", "remaining_time": "7:45:54"} +{"current_steps": 1620, "total_steps": 2069, "loss": 0.5146, "lr": 1.239154329461615e-06, "epoch": 0.7831762146482959, "percentage": 78.3, "elapsed_time": "1 day, 3:57:14", "remaining_time": "7:44:51"} +{"current_steps": 1621, "total_steps": 2069, "loss": 0.5138, "lr": 1.233891413975098e-06, "epoch": 0.7836596567561035, "percentage": 78.35, "elapsed_time": "1 day, 3:58:15", "remaining_time": "7:43:49"} +{"current_steps": 1622, "total_steps": 2069, "loss": 0.5111, "lr": 1.228638124965661e-06, "epoch": 0.7841430988639111, "percentage": 78.4, "elapsed_time": "1 day, 3:59:18", "remaining_time": "7:42:47"} +{"current_steps": 1623, "total_steps": 2069, "loss": 0.5134, "lr": 1.223394475861131e-06, "epoch": 0.7846265409717187, "percentage": 78.44, "elapsed_time": "1 day, 4:00:17", "remaining_time": "7:41:44"} +{"current_steps": 1624, "total_steps": 2069, "loss": 0.5092, "lr": 1.2181604800646996e-06, "epoch": 0.7851099830795262, "percentage": 78.49, "elapsed_time": "1 day, 4:01:19", "remaining_time": "7:40:42"} +{"current_steps": 1625, "total_steps": 2069, "loss": 0.498, "lr": 1.212936150954882e-06, "epoch": 0.7855934251873338, "percentage": 78.54, "elapsed_time": "1 day, 4:02:22", "remaining_time": "7:39:40"} +{"current_steps": 1626, "total_steps": 2069, "loss": 0.5063, "lr": 1.207721501885486e-06, "epoch": 0.7860768672951414, "percentage": 78.59, "elapsed_time": "1 day, 4:03:23", "remaining_time": "7:38:38"} +{"current_steps": 1627, "total_steps": 2069, "loss": 0.5212, "lr": 1.2025165461855714e-06, "epoch": 0.786560309402949, "percentage": 78.64, "elapsed_time": "1 day, 4:04:26", "remaining_time": "7:37:36"} +{"current_steps": 1628, "total_steps": 2069, "loss": 0.5155, "lr": 1.1973212971594262e-06, "epoch": 0.7870437515107566, "percentage": 78.69, "elapsed_time": "1 day, 4:05:27", "remaining_time": "7:36:33"} +{"current_steps": 1629, "total_steps": 2069, "loss": 0.5183, "lr": 1.1921357680865258e-06, "epoch": 0.7875271936185642, "percentage": 78.73, "elapsed_time": "1 day, 4:06:31", "remaining_time": "7:35:32"} +{"current_steps": 1630, "total_steps": 2069, "loss": 0.4949, "lr": 1.1869599722215013e-06, "epoch": 0.7880106357263718, "percentage": 78.78, "elapsed_time": "1 day, 4:07:35", "remaining_time": "7:34:30"} +{"current_steps": 1631, "total_steps": 2069, "loss": 0.5206, "lr": 1.181793922794102e-06, "epoch": 0.7884940778341794, "percentage": 78.83, "elapsed_time": "1 day, 4:08:36", "remaining_time": "7:33:28"} +{"current_steps": 1632, "total_steps": 2069, "loss": 0.503, "lr": 1.1766376330091684e-06, "epoch": 0.788977519941987, "percentage": 78.88, "elapsed_time": "1 day, 4:09:40", "remaining_time": "7:32:26"} +{"current_steps": 1633, "total_steps": 2069, "loss": 0.5255, "lr": 1.1714911160465924e-06, "epoch": 0.7894609620497945, "percentage": 78.93, "elapsed_time": "1 day, 4:10:41", "remaining_time": "7:31:24"} +{"current_steps": 1634, "total_steps": 2069, "loss": 0.5169, "lr": 1.1663543850612847e-06, "epoch": 0.7899444041576021, "percentage": 78.98, "elapsed_time": "1 day, 4:11:41", "remaining_time": "7:30:21"} +{"current_steps": 1635, "total_steps": 2069, "loss": 0.4938, "lr": 1.1612274531831463e-06, "epoch": 0.7904278462654097, "percentage": 79.02, "elapsed_time": "1 day, 4:12:43", "remaining_time": "7:29:19"} +{"current_steps": 1636, "total_steps": 2069, "loss": 0.5222, "lr": 1.1561103335170242e-06, "epoch": 0.7909112883732173, "percentage": 79.07, "elapsed_time": "1 day, 4:13:41", "remaining_time": "7:28:16"} +{"current_steps": 1637, "total_steps": 2069, "loss": 0.5192, "lr": 1.1510030391426941e-06, "epoch": 0.7913947304810249, "percentage": 79.12, "elapsed_time": "1 day, 4:14:45", "remaining_time": "7:27:14"} +{"current_steps": 1638, "total_steps": 2069, "loss": 0.5232, "lr": 1.1459055831148074e-06, "epoch": 0.7918781725888325, "percentage": 79.17, "elapsed_time": "1 day, 4:15:45", "remaining_time": "7:26:11"} +{"current_steps": 1639, "total_steps": 2069, "loss": 0.5212, "lr": 1.140817978462876e-06, "epoch": 0.7923616146966401, "percentage": 79.22, "elapsed_time": "1 day, 4:16:43", "remaining_time": "7:25:08"} +{"current_steps": 1640, "total_steps": 2069, "loss": 0.4873, "lr": 1.1357402381912224e-06, "epoch": 0.7928450568044476, "percentage": 79.27, "elapsed_time": "1 day, 4:17:48", "remaining_time": "7:24:07"} +{"current_steps": 1641, "total_steps": 2069, "loss": 0.5211, "lr": 1.1306723752789672e-06, "epoch": 0.7933284989122552, "percentage": 79.31, "elapsed_time": "1 day, 4:18:50", "remaining_time": "7:23:05"} +{"current_steps": 1642, "total_steps": 2069, "loss": 0.5179, "lr": 1.1256144026799703e-06, "epoch": 0.7938119410200628, "percentage": 79.36, "elapsed_time": "1 day, 4:19:52", "remaining_time": "7:22:02"} +{"current_steps": 1643, "total_steps": 2069, "loss": 0.4911, "lr": 1.1205663333228217e-06, "epoch": 0.7942953831278704, "percentage": 79.41, "elapsed_time": "1 day, 4:20:58", "remaining_time": "7:21:01"} +{"current_steps": 1644, "total_steps": 2069, "loss": 0.5146, "lr": 1.1155281801107897e-06, "epoch": 0.794778825235678, "percentage": 79.46, "elapsed_time": "1 day, 4:21:59", "remaining_time": "7:19:59"} +{"current_steps": 1645, "total_steps": 2069, "loss": 0.5063, "lr": 1.1104999559218022e-06, "epoch": 0.7952622673434856, "percentage": 79.51, "elapsed_time": "1 day, 4:23:05", "remaining_time": "7:18:58"} +{"current_steps": 1646, "total_steps": 2069, "loss": 0.5122, "lr": 1.1054816736084057e-06, "epoch": 0.7957457094512932, "percentage": 79.56, "elapsed_time": "1 day, 4:24:09", "remaining_time": "7:17:56"} +{"current_steps": 1647, "total_steps": 2069, "loss": 0.5089, "lr": 1.1004733459977325e-06, "epoch": 0.7962291515591008, "percentage": 79.6, "elapsed_time": "1 day, 4:25:09", "remaining_time": "7:16:54"} +{"current_steps": 1648, "total_steps": 2069, "loss": 0.5177, "lr": 1.0954749858914727e-06, "epoch": 0.7967125936669084, "percentage": 79.65, "elapsed_time": "1 day, 4:26:10", "remaining_time": "7:15:51"} +{"current_steps": 1649, "total_steps": 2069, "loss": 0.5211, "lr": 1.0904866060658376e-06, "epoch": 0.797196035774716, "percentage": 79.7, "elapsed_time": "1 day, 4:27:12", "remaining_time": "7:14:49"} +{"current_steps": 1650, "total_steps": 2069, "loss": 0.5174, "lr": 1.0855082192715294e-06, "epoch": 0.7976794778825236, "percentage": 79.75, "elapsed_time": "1 day, 4:28:16", "remaining_time": "7:13:47"} +{"current_steps": 1651, "total_steps": 2069, "loss": 0.5049, "lr": 1.0805398382337035e-06, "epoch": 0.7981629199903312, "percentage": 79.8, "elapsed_time": "1 day, 4:29:19", "remaining_time": "7:12:45"} +{"current_steps": 1652, "total_steps": 2069, "loss": 0.5226, "lr": 1.0755814756519445e-06, "epoch": 0.7986463620981388, "percentage": 79.85, "elapsed_time": "1 day, 4:30:25", "remaining_time": "7:11:44"} +{"current_steps": 1653, "total_steps": 2069, "loss": 0.5095, "lr": 1.0706331442002226e-06, "epoch": 0.7991298042059464, "percentage": 79.89, "elapsed_time": "1 day, 4:31:27", "remaining_time": "7:10:42"} +{"current_steps": 1654, "total_steps": 2069, "loss": 0.5168, "lr": 1.0656948565268782e-06, "epoch": 0.799613246313754, "percentage": 79.94, "elapsed_time": "1 day, 4:32:28", "remaining_time": "7:09:40"} +{"current_steps": 1655, "total_steps": 2069, "loss": 0.5128, "lr": 1.0607666252545673e-06, "epoch": 0.8000966884215616, "percentage": 79.99, "elapsed_time": "1 day, 4:33:29", "remaining_time": "7:08:38"} +{"current_steps": 1656, "total_steps": 2069, "loss": 0.514, "lr": 1.0558484629802502e-06, "epoch": 0.8005801305293692, "percentage": 80.04, "elapsed_time": "1 day, 4:34:32", "remaining_time": "7:07:36"} +{"current_steps": 1657, "total_steps": 2069, "loss": 0.512, "lr": 1.0509403822751425e-06, "epoch": 0.8010635726371766, "percentage": 80.09, "elapsed_time": "1 day, 4:35:29", "remaining_time": "7:06:32"} +{"current_steps": 1658, "total_steps": 2069, "loss": 0.4941, "lr": 1.0460423956846955e-06, "epoch": 0.8015470147449842, "percentage": 80.14, "elapsed_time": "1 day, 4:36:32", "remaining_time": "7:05:30"} +{"current_steps": 1659, "total_steps": 2069, "loss": 0.5088, "lr": 1.041154515728559e-06, "epoch": 0.8020304568527918, "percentage": 80.18, "elapsed_time": "1 day, 4:37:31", "remaining_time": "7:04:27"} +{"current_steps": 1660, "total_steps": 2069, "loss": 0.5119, "lr": 1.0362767549005454e-06, "epoch": 0.8025138989605994, "percentage": 80.23, "elapsed_time": "1 day, 4:38:31", "remaining_time": "7:03:25"} +{"current_steps": 1661, "total_steps": 2069, "loss": 0.5212, "lr": 1.0314091256686065e-06, "epoch": 0.802997341068407, "percentage": 80.28, "elapsed_time": "1 day, 4:39:33", "remaining_time": "7:02:23"} +{"current_steps": 1662, "total_steps": 2069, "loss": 0.5052, "lr": 1.0265516404747943e-06, "epoch": 0.8034807831762146, "percentage": 80.33, "elapsed_time": "1 day, 4:40:36", "remaining_time": "7:01:21"} +{"current_steps": 1663, "total_steps": 2069, "loss": 0.5109, "lr": 1.0217043117352337e-06, "epoch": 0.8039642252840222, "percentage": 80.38, "elapsed_time": "1 day, 4:41:41", "remaining_time": "7:00:19"} +{"current_steps": 1664, "total_steps": 2069, "loss": 0.5118, "lr": 1.0168671518400853e-06, "epoch": 0.8044476673918298, "percentage": 80.43, "elapsed_time": "1 day, 4:42:42", "remaining_time": "6:59:17"} +{"current_steps": 1665, "total_steps": 2069, "loss": 0.4879, "lr": 1.0120401731535213e-06, "epoch": 0.8049311094996374, "percentage": 80.47, "elapsed_time": "1 day, 4:44:45", "remaining_time": "6:58:30"} +{"current_steps": 1666, "total_steps": 2069, "loss": 0.5104, "lr": 1.0072233880136872e-06, "epoch": 0.805414551607445, "percentage": 80.52, "elapsed_time": "1 day, 4:45:43", "remaining_time": "6:57:26"} +{"current_steps": 1667, "total_steps": 2069, "loss": 0.5235, "lr": 1.0024168087326764e-06, "epoch": 0.8058979937152526, "percentage": 80.57, "elapsed_time": "1 day, 4:46:45", "remaining_time": "6:56:24"} +{"current_steps": 1668, "total_steps": 2069, "loss": 0.5149, "lr": 9.976204475964907e-07, "epoch": 0.8063814358230602, "percentage": 80.62, "elapsed_time": "1 day, 4:47:47", "remaining_time": "6:55:22"} +{"current_steps": 1669, "total_steps": 2069, "loss": 0.5191, "lr": 9.92834316865015e-07, "epoch": 0.8068648779308678, "percentage": 80.67, "elapsed_time": "1 day, 4:48:46", "remaining_time": "6:54:19"} +{"current_steps": 1670, "total_steps": 2069, "loss": 0.5188, "lr": 9.88058428771987e-07, "epoch": 0.8073483200386754, "percentage": 80.72, "elapsed_time": "1 day, 4:49:49", "remaining_time": "6:53:17"} +{"current_steps": 1671, "total_steps": 2069, "loss": 0.518, "lr": 9.832927955249605e-07, "epoch": 0.807831762146483, "percentage": 80.76, "elapsed_time": "1 day, 4:50:53", "remaining_time": "6:52:15"} +{"current_steps": 1672, "total_steps": 2069, "loss": 0.5279, "lr": 9.785374293052802e-07, "epoch": 0.8083152042542906, "percentage": 80.81, "elapsed_time": "1 day, 4:51:55", "remaining_time": "6:51:13"} +{"current_steps": 1673, "total_steps": 2069, "loss": 0.5267, "lr": 9.737923422680424e-07, "epoch": 0.8087986463620981, "percentage": 80.86, "elapsed_time": "1 day, 4:52:55", "remaining_time": "6:50:11"} +{"current_steps": 1674, "total_steps": 2069, "loss": 0.5098, "lr": 9.690575465420733e-07, "epoch": 0.8092820884699057, "percentage": 80.91, "elapsed_time": "1 day, 4:54:00", "remaining_time": "6:49:09"} +{"current_steps": 1675, "total_steps": 2069, "loss": 0.5171, "lr": 9.643330542298929e-07, "epoch": 0.8097655305777133, "percentage": 80.96, "elapsed_time": "1 day, 4:55:02", "remaining_time": "6:48:07"} +{"current_steps": 1676, "total_steps": 2069, "loss": 0.5164, "lr": 9.596188774076849e-07, "epoch": 0.8102489726855209, "percentage": 81.01, "elapsed_time": "1 day, 4:56:01", "remaining_time": "6:47:04"} +{"current_steps": 1677, "total_steps": 2069, "loss": 0.5167, "lr": 9.549150281252633e-07, "epoch": 0.8107324147933285, "percentage": 81.05, "elapsed_time": "1 day, 4:57:03", "remaining_time": "6:46:02"} +{"current_steps": 1678, "total_steps": 2069, "loss": 0.5198, "lr": 9.50221518406047e-07, "epoch": 0.8112158569011361, "percentage": 81.1, "elapsed_time": "1 day, 4:58:07", "remaining_time": "6:45:00"} +{"current_steps": 1679, "total_steps": 2069, "loss": 0.5194, "lr": 9.455383602470247e-07, "epoch": 0.8116992990089437, "percentage": 81.15, "elapsed_time": "1 day, 4:59:08", "remaining_time": "6:43:58"} +{"current_steps": 1680, "total_steps": 2069, "loss": 0.5154, "lr": 9.408655656187282e-07, "epoch": 0.8121827411167513, "percentage": 81.2, "elapsed_time": "1 day, 5:00:09", "remaining_time": "6:42:55"} +{"current_steps": 1681, "total_steps": 2069, "loss": 0.5111, "lr": 9.362031464651955e-07, "epoch": 0.8126661832245589, "percentage": 81.25, "elapsed_time": "1 day, 5:01:16", "remaining_time": "6:41:54"} +{"current_steps": 1682, "total_steps": 2069, "loss": 0.5175, "lr": 9.31551114703943e-07, "epoch": 0.8131496253323665, "percentage": 81.3, "elapsed_time": "1 day, 5:02:16", "remaining_time": "6:40:52"} +{"current_steps": 1683, "total_steps": 2069, "loss": 0.5219, "lr": 9.269094822259439e-07, "epoch": 0.813633067440174, "percentage": 81.34, "elapsed_time": "1 day, 5:03:16", "remaining_time": "6:39:49"} +{"current_steps": 1684, "total_steps": 2069, "loss": 0.5257, "lr": 9.22278260895581e-07, "epoch": 0.8141165095479816, "percentage": 81.39, "elapsed_time": "1 day, 5:04:17", "remaining_time": "6:38:47"} +{"current_steps": 1685, "total_steps": 2069, "loss": 0.5065, "lr": 9.176574625506324e-07, "epoch": 0.8145999516557892, "percentage": 81.44, "elapsed_time": "1 day, 5:05:20", "remaining_time": "6:37:45"} +{"current_steps": 1686, "total_steps": 2069, "loss": 0.5198, "lr": 9.130470990022283e-07, "epoch": 0.8150833937635968, "percentage": 81.49, "elapsed_time": "1 day, 5:06:23", "remaining_time": "6:36:43"} +{"current_steps": 1687, "total_steps": 2069, "loss": 0.5054, "lr": 9.084471820348306e-07, "epoch": 0.8155668358714044, "percentage": 81.54, "elapsed_time": "1 day, 5:07:21", "remaining_time": "6:35:40"} +{"current_steps": 1688, "total_steps": 2069, "loss": 0.481, "lr": 9.038577234061979e-07, "epoch": 0.816050277979212, "percentage": 81.59, "elapsed_time": "1 day, 5:08:22", "remaining_time": "6:34:37"} +{"current_steps": 1689, "total_steps": 2069, "loss": 0.512, "lr": 8.992787348473575e-07, "epoch": 0.8165337200870196, "percentage": 81.63, "elapsed_time": "1 day, 5:09:25", "remaining_time": "6:33:35"} +{"current_steps": 1690, "total_steps": 2069, "loss": 0.4919, "lr": 8.947102280625708e-07, "epoch": 0.8170171621948271, "percentage": 81.68, "elapsed_time": "1 day, 5:10:31", "remaining_time": "6:32:34"} +{"current_steps": 1691, "total_steps": 2069, "loss": 0.5063, "lr": 8.901522147293107e-07, "epoch": 0.8175006043026347, "percentage": 81.73, "elapsed_time": "1 day, 5:11:31", "remaining_time": "6:31:31"} +{"current_steps": 1692, "total_steps": 2069, "loss": 0.4969, "lr": 8.856047064982276e-07, "epoch": 0.8179840464104423, "percentage": 81.78, "elapsed_time": "1 day, 5:12:39", "remaining_time": "6:30:30"} +{"current_steps": 1693, "total_steps": 2069, "loss": 0.5123, "lr": 8.810677149931168e-07, "epoch": 0.8184674885182499, "percentage": 81.83, "elapsed_time": "1 day, 5:13:43", "remaining_time": "6:29:29"} +{"current_steps": 1694, "total_steps": 2069, "loss": 0.5222, "lr": 8.765412518108957e-07, "epoch": 0.8189509306260575, "percentage": 81.88, "elapsed_time": "1 day, 5:14:43", "remaining_time": "6:28:26"} +{"current_steps": 1695, "total_steps": 2069, "loss": 0.5245, "lr": 8.720253285215685e-07, "epoch": 0.8194343727338651, "percentage": 81.92, "elapsed_time": "1 day, 5:15:43", "remaining_time": "6:27:24"} +{"current_steps": 1696, "total_steps": 2069, "loss": 0.4987, "lr": 8.675199566682002e-07, "epoch": 0.8199178148416727, "percentage": 81.97, "elapsed_time": "1 day, 5:16:46", "remaining_time": "6:26:21"} +{"current_steps": 1697, "total_steps": 2069, "loss": 0.4956, "lr": 8.630251477668828e-07, "epoch": 0.8204012569494803, "percentage": 82.02, "elapsed_time": "1 day, 5:17:54", "remaining_time": "6:25:21"} +{"current_steps": 1698, "total_steps": 2069, "loss": 0.5096, "lr": 8.585409133067119e-07, "epoch": 0.8208846990572879, "percentage": 82.07, "elapsed_time": "1 day, 5:18:55", "remaining_time": "6:24:18"} +{"current_steps": 1699, "total_steps": 2069, "loss": 0.5136, "lr": 8.540672647497483e-07, "epoch": 0.8213681411650955, "percentage": 82.12, "elapsed_time": "1 day, 5:19:57", "remaining_time": "6:23:16"} +{"current_steps": 1700, "total_steps": 2069, "loss": 0.5213, "lr": 8.49604213531004e-07, "epoch": 0.8218515832729031, "percentage": 82.17, "elapsed_time": "1 day, 5:20:58", "remaining_time": "6:22:14"} +{"current_steps": 1701, "total_steps": 2069, "loss": 0.5051, "lr": 8.451517710583934e-07, "epoch": 0.8223350253807107, "percentage": 82.21, "elapsed_time": "1 day, 5:21:59", "remaining_time": "6:21:11"} +{"current_steps": 1702, "total_steps": 2069, "loss": 0.5257, "lr": 8.407099487127207e-07, "epoch": 0.8228184674885183, "percentage": 82.26, "elapsed_time": "1 day, 5:22:59", "remaining_time": "6:20:09"} +{"current_steps": 1703, "total_steps": 2069, "loss": 0.5249, "lr": 8.362787578476395e-07, "epoch": 0.8233019095963259, "percentage": 82.31, "elapsed_time": "1 day, 5:24:03", "remaining_time": "6:19:07"} +{"current_steps": 1704, "total_steps": 2069, "loss": 0.5058, "lr": 8.318582097896316e-07, "epoch": 0.8237853517041335, "percentage": 82.36, "elapsed_time": "1 day, 5:25:05", "remaining_time": "6:18:05"} +{"current_steps": 1705, "total_steps": 2069, "loss": 0.5111, "lr": 8.274483158379759e-07, "epoch": 0.8242687938119411, "percentage": 82.41, "elapsed_time": "1 day, 5:26:07", "remaining_time": "6:17:02"} +{"current_steps": 1706, "total_steps": 2069, "loss": 0.4938, "lr": 8.230490872647146e-07, "epoch": 0.8247522359197486, "percentage": 82.46, "elapsed_time": "1 day, 5:27:05", "remaining_time": "6:15:59"} +{"current_steps": 1707, "total_steps": 2069, "loss": 0.5183, "lr": 8.18660535314631e-07, "epoch": 0.8252356780275562, "percentage": 82.5, "elapsed_time": "1 day, 5:28:06", "remaining_time": "6:14:57"} +{"current_steps": 1708, "total_steps": 2069, "loss": 0.5131, "lr": 8.142826712052177e-07, "epoch": 0.8257191201353637, "percentage": 82.55, "elapsed_time": "1 day, 5:29:06", "remaining_time": "6:13:54"} +{"current_steps": 1709, "total_steps": 2069, "loss": 0.5104, "lr": 8.099155061266495e-07, "epoch": 0.8262025622431713, "percentage": 82.6, "elapsed_time": "1 day, 5:30:07", "remaining_time": "6:12:52"} +{"current_steps": 1710, "total_steps": 2069, "loss": 0.504, "lr": 8.055590512417499e-07, "epoch": 0.8266860043509789, "percentage": 82.65, "elapsed_time": "1 day, 5:31:07", "remaining_time": "6:11:49"} +{"current_steps": 1711, "total_steps": 2069, "loss": 0.5183, "lr": 8.012133176859705e-07, "epoch": 0.8271694464587865, "percentage": 82.7, "elapsed_time": "1 day, 5:32:06", "remaining_time": "6:10:47"} +{"current_steps": 1712, "total_steps": 2069, "loss": 0.5134, "lr": 7.968783165673554e-07, "epoch": 0.8276528885665941, "percentage": 82.75, "elapsed_time": "1 day, 5:33:07", "remaining_time": "6:09:44"} +{"current_steps": 1713, "total_steps": 2069, "loss": 0.5074, "lr": 7.925540589665187e-07, "epoch": 0.8281363306744017, "percentage": 82.79, "elapsed_time": "1 day, 5:34:08", "remaining_time": "6:08:42"} +{"current_steps": 1714, "total_steps": 2069, "loss": 0.4907, "lr": 7.882405559366091e-07, "epoch": 0.8286197727822093, "percentage": 82.84, "elapsed_time": "1 day, 5:35:08", "remaining_time": "6:07:39"} +{"current_steps": 1715, "total_steps": 2069, "loss": 0.5107, "lr": 7.839378185032897e-07, "epoch": 0.8291032148900169, "percentage": 82.89, "elapsed_time": "1 day, 5:36:07", "remaining_time": "6:06:37"} +{"current_steps": 1716, "total_steps": 2069, "loss": 0.5185, "lr": 7.796458576647015e-07, "epoch": 0.8295866569978245, "percentage": 82.94, "elapsed_time": "1 day, 5:37:10", "remaining_time": "6:05:35"} +{"current_steps": 1717, "total_steps": 2069, "loss": 0.5182, "lr": 7.753646843914465e-07, "epoch": 0.8300700991056321, "percentage": 82.99, "elapsed_time": "1 day, 5:38:10", "remaining_time": "6:04:32"} +{"current_steps": 1718, "total_steps": 2069, "loss": 0.5029, "lr": 7.710943096265461e-07, "epoch": 0.8305535412134397, "percentage": 83.04, "elapsed_time": "1 day, 5:39:11", "remaining_time": "6:03:30"} +{"current_steps": 1719, "total_steps": 2069, "loss": 0.5021, "lr": 7.668347442854218e-07, "epoch": 0.8310369833212473, "percentage": 83.08, "elapsed_time": "1 day, 5:40:09", "remaining_time": "6:02:27"} +{"current_steps": 1720, "total_steps": 2069, "loss": 0.5206, "lr": 7.625859992558665e-07, "epoch": 0.8315204254290549, "percentage": 83.13, "elapsed_time": "1 day, 5:41:07", "remaining_time": "6:01:24"} +{"current_steps": 1721, "total_steps": 2069, "loss": 0.5134, "lr": 7.583480853980158e-07, "epoch": 0.8320038675368625, "percentage": 83.18, "elapsed_time": "1 day, 5:42:08", "remaining_time": "6:00:21"} +{"current_steps": 1722, "total_steps": 2069, "loss": 0.5184, "lr": 7.541210135443188e-07, "epoch": 0.8324873096446701, "percentage": 83.23, "elapsed_time": "1 day, 5:43:10", "remaining_time": "5:59:19"} +{"current_steps": 1723, "total_steps": 2069, "loss": 0.5222, "lr": 7.499047944995108e-07, "epoch": 0.8329707517524776, "percentage": 83.28, "elapsed_time": "1 day, 5:44:13", "remaining_time": "5:58:17"} +{"current_steps": 1724, "total_steps": 2069, "loss": 0.5149, "lr": 7.45699439040588e-07, "epoch": 0.8334541938602852, "percentage": 83.33, "elapsed_time": "1 day, 5:45:15", "remaining_time": "5:57:15"} +{"current_steps": 1725, "total_steps": 2069, "loss": 0.5086, "lr": 7.415049579167783e-07, "epoch": 0.8339376359680928, "percentage": 83.37, "elapsed_time": "1 day, 5:46:17", "remaining_time": "5:56:13"} +{"current_steps": 1726, "total_steps": 2069, "loss": 0.5171, "lr": 7.37321361849514e-07, "epoch": 0.8344210780759004, "percentage": 83.42, "elapsed_time": "1 day, 5:47:17", "remaining_time": "5:55:10"} +{"current_steps": 1727, "total_steps": 2069, "loss": 0.4931, "lr": 7.331486615324024e-07, "epoch": 0.834904520183708, "percentage": 83.47, "elapsed_time": "1 day, 5:48:20", "remaining_time": "5:54:08"} +{"current_steps": 1728, "total_steps": 2069, "loss": 0.4895, "lr": 7.289868676312023e-07, "epoch": 0.8353879622915156, "percentage": 83.52, "elapsed_time": "1 day, 5:49:22", "remaining_time": "5:53:06"} +{"current_steps": 1729, "total_steps": 2069, "loss": 0.5141, "lr": 7.248359907837959e-07, "epoch": 0.8358714043993232, "percentage": 83.57, "elapsed_time": "1 day, 5:50:24", "remaining_time": "5:52:04"} +{"current_steps": 1730, "total_steps": 2069, "loss": 0.5053, "lr": 7.206960416001563e-07, "epoch": 0.8363548465071308, "percentage": 83.62, "elapsed_time": "1 day, 5:51:24", "remaining_time": "5:51:02"} +{"current_steps": 1731, "total_steps": 2069, "loss": 0.515, "lr": 7.165670306623296e-07, "epoch": 0.8368382886149384, "percentage": 83.66, "elapsed_time": "1 day, 5:52:28", "remaining_time": "5:50:00"} +{"current_steps": 1732, "total_steps": 2069, "loss": 0.5084, "lr": 7.124489685243985e-07, "epoch": 0.837321730722746, "percentage": 83.71, "elapsed_time": "1 day, 5:53:30", "remaining_time": "5:48:58"} +{"current_steps": 1733, "total_steps": 2069, "loss": 0.5149, "lr": 7.08341865712463e-07, "epoch": 0.8378051728305536, "percentage": 83.76, "elapsed_time": "1 day, 5:54:31", "remaining_time": "5:47:55"} +{"current_steps": 1734, "total_steps": 2069, "loss": 0.5272, "lr": 7.042457327246088e-07, "epoch": 0.8382886149383612, "percentage": 83.81, "elapsed_time": "1 day, 5:55:34", "remaining_time": "5:46:53"} +{"current_steps": 1735, "total_steps": 2069, "loss": 0.5173, "lr": 7.001605800308825e-07, "epoch": 0.8387720570461688, "percentage": 83.86, "elapsed_time": "1 day, 5:56:36", "remaining_time": "5:45:51"} +{"current_steps": 1736, "total_steps": 2069, "loss": 0.5182, "lr": 6.960864180732618e-07, "epoch": 0.8392554991539763, "percentage": 83.91, "elapsed_time": "1 day, 5:57:38", "remaining_time": "5:44:49"} +{"current_steps": 1737, "total_steps": 2069, "loss": 0.5145, "lr": 6.920232572656349e-07, "epoch": 0.8397389412617839, "percentage": 83.95, "elapsed_time": "1 day, 5:58:40", "remaining_time": "5:43:47"} +{"current_steps": 1738, "total_steps": 2069, "loss": 0.5079, "lr": 6.879711079937667e-07, "epoch": 0.8402223833695915, "percentage": 84.0, "elapsed_time": "1 day, 5:59:44", "remaining_time": "5:42:45"} +{"current_steps": 1739, "total_steps": 2069, "loss": 0.5061, "lr": 6.839299806152799e-07, "epoch": 0.840705825477399, "percentage": 84.05, "elapsed_time": "1 day, 6:00:46", "remaining_time": "5:41:43"} +{"current_steps": 1740, "total_steps": 2069, "loss": 0.5174, "lr": 6.79899885459619e-07, "epoch": 0.8411892675852066, "percentage": 84.1, "elapsed_time": "1 day, 6:01:49", "remaining_time": "5:40:41"} +{"current_steps": 1741, "total_steps": 2069, "loss": 0.4981, "lr": 6.758808328280325e-07, "epoch": 0.8416727096930142, "percentage": 84.15, "elapsed_time": "1 day, 6:02:52", "remaining_time": "5:39:39"} +{"current_steps": 1742, "total_steps": 2069, "loss": 0.5216, "lr": 6.718728329935448e-07, "epoch": 0.8421561518008218, "percentage": 84.2, "elapsed_time": "1 day, 6:03:51", "remaining_time": "5:38:36"} +{"current_steps": 1743, "total_steps": 2069, "loss": 0.5154, "lr": 6.678758962009241e-07, "epoch": 0.8426395939086294, "percentage": 84.24, "elapsed_time": "1 day, 6:04:54", "remaining_time": "5:37:34"} +{"current_steps": 1744, "total_steps": 2069, "loss": 0.5181, "lr": 6.638900326666653e-07, "epoch": 0.843123036016437, "percentage": 84.29, "elapsed_time": "1 day, 6:05:54", "remaining_time": "5:36:32"} +{"current_steps": 1745, "total_steps": 2069, "loss": 0.4772, "lr": 6.599152525789531e-07, "epoch": 0.8436064781242446, "percentage": 84.34, "elapsed_time": "1 day, 6:06:57", "remaining_time": "5:35:30"} +{"current_steps": 1746, "total_steps": 2069, "loss": 0.5153, "lr": 6.559515660976506e-07, "epoch": 0.8440899202320522, "percentage": 84.39, "elapsed_time": "1 day, 6:07:59", "remaining_time": "5:34:28"} +{"current_steps": 1747, "total_steps": 2069, "loss": 0.5218, "lr": 6.519989833542567e-07, "epoch": 0.8445733623398598, "percentage": 84.44, "elapsed_time": "1 day, 6:09:00", "remaining_time": "5:33:25"} +{"current_steps": 1748, "total_steps": 2069, "loss": 0.5267, "lr": 6.480575144518931e-07, "epoch": 0.8450568044476674, "percentage": 84.49, "elapsed_time": "1 day, 6:09:58", "remaining_time": "5:32:22"} +{"current_steps": 1749, "total_steps": 2069, "loss": 0.517, "lr": 6.441271694652701e-07, "epoch": 0.845540246555475, "percentage": 84.53, "elapsed_time": "1 day, 6:10:56", "remaining_time": "5:31:20"} +{"current_steps": 1750, "total_steps": 2069, "loss": 0.523, "lr": 6.402079584406673e-07, "epoch": 0.8460236886632826, "percentage": 84.58, "elapsed_time": "1 day, 6:11:57", "remaining_time": "5:30:17"} +{"current_steps": 1751, "total_steps": 2069, "loss": 0.4943, "lr": 6.36299891395904e-07, "epoch": 0.8465071307710902, "percentage": 84.63, "elapsed_time": "1 day, 6:13:04", "remaining_time": "5:29:16"} +{"current_steps": 1752, "total_steps": 2069, "loss": 0.519, "lr": 6.32402978320315e-07, "epoch": 0.8469905728788978, "percentage": 84.68, "elapsed_time": "1 day, 6:14:06", "remaining_time": "5:28:14"} +{"current_steps": 1753, "total_steps": 2069, "loss": 0.5087, "lr": 6.285172291747232e-07, "epoch": 0.8474740149867054, "percentage": 84.73, "elapsed_time": "1 day, 6:15:09", "remaining_time": "5:27:12"} +{"current_steps": 1754, "total_steps": 2069, "loss": 0.5135, "lr": 6.246426538914174e-07, "epoch": 0.847957457094513, "percentage": 84.78, "elapsed_time": "1 day, 6:16:12", "remaining_time": "5:26:10"} +{"current_steps": 1755, "total_steps": 2069, "loss": 0.5181, "lr": 6.207792623741249e-07, "epoch": 0.8484408992023206, "percentage": 84.82, "elapsed_time": "1 day, 6:17:15", "remaining_time": "5:25:08"} +{"current_steps": 1756, "total_steps": 2069, "loss": 0.4718, "lr": 6.169270644979836e-07, "epoch": 0.8489243413101281, "percentage": 84.87, "elapsed_time": "1 day, 6:18:24", "remaining_time": "5:24:07"} +{"current_steps": 1757, "total_steps": 2069, "loss": 0.5093, "lr": 6.130860701095226e-07, "epoch": 0.8494077834179357, "percentage": 84.92, "elapsed_time": "1 day, 6:19:29", "remaining_time": "5:23:05"} +{"current_steps": 1758, "total_steps": 2069, "loss": 0.5245, "lr": 6.092562890266341e-07, "epoch": 0.8498912255257433, "percentage": 84.97, "elapsed_time": "1 day, 6:20:25", "remaining_time": "5:22:02"} +{"current_steps": 1759, "total_steps": 2069, "loss": 0.5067, "lr": 6.054377310385479e-07, "epoch": 0.8503746676335509, "percentage": 85.02, "elapsed_time": "1 day, 6:21:26", "remaining_time": "5:21:00"} +{"current_steps": 1760, "total_steps": 2069, "loss": 0.5169, "lr": 6.016304059058031e-07, "epoch": 0.8508581097413584, "percentage": 85.07, "elapsed_time": "1 day, 6:22:25", "remaining_time": "5:19:57"} +{"current_steps": 1761, "total_steps": 2069, "loss": 0.5243, "lr": 5.97834323360233e-07, "epoch": 0.851341551849166, "percentage": 85.11, "elapsed_time": "1 day, 6:23:26", "remaining_time": "5:18:55"} +{"current_steps": 1762, "total_steps": 2069, "loss": 0.5194, "lr": 5.940494931049262e-07, "epoch": 0.8518249939569736, "percentage": 85.16, "elapsed_time": "1 day, 6:24:27", "remaining_time": "5:17:52"} +{"current_steps": 1763, "total_steps": 2069, "loss": 0.4975, "lr": 5.902759248142187e-07, "epoch": 0.8523084360647812, "percentage": 85.21, "elapsed_time": "1 day, 6:25:28", "remaining_time": "5:16:50"} +{"current_steps": 1764, "total_steps": 2069, "loss": 0.5154, "lr": 5.86513628133652e-07, "epoch": 0.8527918781725888, "percentage": 85.26, "elapsed_time": "1 day, 6:26:31", "remaining_time": "5:15:48"} +{"current_steps": 1765, "total_steps": 2069, "loss": 0.5154, "lr": 5.827626126799613e-07, "epoch": 0.8532753202803964, "percentage": 85.31, "elapsed_time": "1 day, 6:27:32", "remaining_time": "5:14:46"} +{"current_steps": 1766, "total_steps": 2069, "loss": 0.5163, "lr": 5.790228880410426e-07, "epoch": 0.853758762388204, "percentage": 85.36, "elapsed_time": "1 day, 6:28:29", "remaining_time": "5:13:43"} +{"current_steps": 1767, "total_steps": 2069, "loss": 0.517, "lr": 5.75294463775935e-07, "epoch": 0.8542422044960116, "percentage": 85.4, "elapsed_time": "1 day, 6:29:30", "remaining_time": "5:12:41"} +{"current_steps": 1768, "total_steps": 2069, "loss": 0.4929, "lr": 5.715773494147919e-07, "epoch": 0.8547256466038192, "percentage": 85.45, "elapsed_time": "1 day, 6:30:34", "remaining_time": "5:11:39"} +{"current_steps": 1769, "total_steps": 2069, "loss": 0.5088, "lr": 5.678715544588547e-07, "epoch": 0.8552090887116268, "percentage": 85.5, "elapsed_time": "1 day, 6:31:37", "remaining_time": "5:10:37"} +{"current_steps": 1770, "total_steps": 2069, "loss": 0.5258, "lr": 5.641770883804365e-07, "epoch": 0.8556925308194344, "percentage": 85.55, "elapsed_time": "1 day, 6:32:38", "remaining_time": "5:09:34"} +{"current_steps": 1771, "total_steps": 2069, "loss": 0.5095, "lr": 5.604939606228887e-07, "epoch": 0.856175972927242, "percentage": 85.6, "elapsed_time": "1 day, 6:33:41", "remaining_time": "5:08:32"} +{"current_steps": 1772, "total_steps": 2069, "loss": 0.5128, "lr": 5.568221806005847e-07, "epoch": 0.8566594150350495, "percentage": 85.65, "elapsed_time": "1 day, 6:34:47", "remaining_time": "5:07:31"} +{"current_steps": 1773, "total_steps": 2069, "loss": 0.5114, "lr": 5.531617576988879e-07, "epoch": 0.8571428571428571, "percentage": 85.69, "elapsed_time": "1 day, 6:35:47", "remaining_time": "5:06:28"} +{"current_steps": 1774, "total_steps": 2069, "loss": 0.5188, "lr": 5.495127012741352e-07, "epoch": 0.8576262992506647, "percentage": 85.74, "elapsed_time": "1 day, 6:36:53", "remaining_time": "5:05:27"} +{"current_steps": 1775, "total_steps": 2069, "loss": 0.4882, "lr": 5.45875020653609e-07, "epoch": 0.8581097413584723, "percentage": 85.79, "elapsed_time": "1 day, 6:37:57", "remaining_time": "5:04:25"} +{"current_steps": 1776, "total_steps": 2069, "loss": 0.5088, "lr": 5.422487251355146e-07, "epoch": 0.8585931834662799, "percentage": 85.84, "elapsed_time": "1 day, 6:38:55", "remaining_time": "5:03:22"} +{"current_steps": 1777, "total_steps": 2069, "loss": 0.5136, "lr": 5.386338239889549e-07, "epoch": 0.8590766255740875, "percentage": 85.89, "elapsed_time": "1 day, 6:39:53", "remaining_time": "5:02:20"} +{"current_steps": 1778, "total_steps": 2069, "loss": 0.4692, "lr": 5.350303264539091e-07, "epoch": 0.8595600676818951, "percentage": 85.94, "elapsed_time": "1 day, 6:41:01", "remaining_time": "5:01:18"} +{"current_steps": 1779, "total_steps": 2069, "loss": 0.516, "lr": 5.314382417412062e-07, "epoch": 0.8600435097897027, "percentage": 85.98, "elapsed_time": "1 day, 6:42:05", "remaining_time": "5:00:17"} +{"current_steps": 1780, "total_steps": 2069, "loss": 0.5146, "lr": 5.278575790325052e-07, "epoch": 0.8605269518975103, "percentage": 86.03, "elapsed_time": "1 day, 6:43:11", "remaining_time": "4:59:15"} +{"current_steps": 1781, "total_steps": 2069, "loss": 0.5125, "lr": 5.242883474802696e-07, "epoch": 0.8610103940053179, "percentage": 86.08, "elapsed_time": "1 day, 6:44:09", "remaining_time": "4:58:12"} +{"current_steps": 1782, "total_steps": 2069, "loss": 0.5177, "lr": 5.207305562077403e-07, "epoch": 0.8614938361131255, "percentage": 86.13, "elapsed_time": "1 day, 6:45:07", "remaining_time": "4:57:10"} +{"current_steps": 1783, "total_steps": 2069, "loss": 0.5304, "lr": 5.1718421430892e-07, "epoch": 0.8619772782209331, "percentage": 86.18, "elapsed_time": "1 day, 6:46:09", "remaining_time": "4:56:07"} +{"current_steps": 1784, "total_steps": 2069, "loss": 0.486, "lr": 5.136493308485446e-07, "epoch": 0.8624607203287407, "percentage": 86.23, "elapsed_time": "1 day, 6:47:15", "remaining_time": "4:55:06"} +{"current_steps": 1785, "total_steps": 2069, "loss": 0.4959, "lr": 5.101259148620618e-07, "epoch": 0.8629441624365483, "percentage": 86.27, "elapsed_time": "1 day, 6:48:16", "remaining_time": "4:54:04"} +{"current_steps": 1786, "total_steps": 2069, "loss": 0.4993, "lr": 5.066139753556049e-07, "epoch": 0.8634276045443559, "percentage": 86.32, "elapsed_time": "1 day, 6:49:22", "remaining_time": "4:53:02"} +{"current_steps": 1787, "total_steps": 2069, "loss": 0.5153, "lr": 5.031135213059756e-07, "epoch": 0.8639110466521635, "percentage": 86.37, "elapsed_time": "1 day, 6:50:25", "remaining_time": "4:52:00"} +{"current_steps": 1788, "total_steps": 2069, "loss": 0.4871, "lr": 4.99624561660616e-07, "epoch": 0.864394488759971, "percentage": 86.42, "elapsed_time": "1 day, 6:51:28", "remaining_time": "4:50:58"} +{"current_steps": 1789, "total_steps": 2069, "loss": 0.512, "lr": 4.961471053375899e-07, "epoch": 0.8648779308677785, "percentage": 86.47, "elapsed_time": "1 day, 6:52:30", "remaining_time": "4:49:56"} +{"current_steps": 1790, "total_steps": 2069, "loss": 0.5121, "lr": 4.926811612255539e-07, "epoch": 0.8653613729755861, "percentage": 86.52, "elapsed_time": "1 day, 6:53:31", "remaining_time": "4:48:54"} +{"current_steps": 1791, "total_steps": 2069, "loss": 0.5011, "lr": 4.892267381837396e-07, "epoch": 0.8658448150833937, "percentage": 86.56, "elapsed_time": "1 day, 6:54:33", "remaining_time": "4:47:51"} +{"current_steps": 1792, "total_steps": 2069, "loss": 0.5103, "lr": 4.857838450419339e-07, "epoch": 0.8663282571912013, "percentage": 86.61, "elapsed_time": "1 day, 6:55:35", "remaining_time": "4:46:49"} +{"current_steps": 1793, "total_steps": 2069, "loss": 0.5138, "lr": 4.823524906004468e-07, "epoch": 0.8668116992990089, "percentage": 86.66, "elapsed_time": "1 day, 6:56:34", "remaining_time": "4:45:47"} +{"current_steps": 1794, "total_steps": 2069, "loss": 0.5151, "lr": 4.789326836300983e-07, "epoch": 0.8672951414068165, "percentage": 86.71, "elapsed_time": "1 day, 6:57:38", "remaining_time": "4:44:45"} +{"current_steps": 1795, "total_steps": 2069, "loss": 0.5098, "lr": 4.7552443287218866e-07, "epoch": 0.8677785835146241, "percentage": 86.76, "elapsed_time": "1 day, 6:58:42", "remaining_time": "4:43:43"} +{"current_steps": 1796, "total_steps": 2069, "loss": 0.508, "lr": 4.7212774703848273e-07, "epoch": 0.8682620256224317, "percentage": 86.81, "elapsed_time": "1 day, 6:59:45", "remaining_time": "4:42:41"} +{"current_steps": 1797, "total_steps": 2069, "loss": 0.5122, "lr": 4.687426348111834e-07, "epoch": 0.8687454677302393, "percentage": 86.85, "elapsed_time": "1 day, 7:00:44", "remaining_time": "4:41:38"} +{"current_steps": 1798, "total_steps": 2069, "loss": 0.52, "lr": 4.65369104842911e-07, "epoch": 0.8692289098380469, "percentage": 86.9, "elapsed_time": "1 day, 7:01:45", "remaining_time": "4:40:36"} +{"current_steps": 1799, "total_steps": 2069, "loss": 0.5072, "lr": 4.620071657566777e-07, "epoch": 0.8697123519458545, "percentage": 86.95, "elapsed_time": "1 day, 7:02:48", "remaining_time": "4:39:34"} +{"current_steps": 1800, "total_steps": 2069, "loss": 0.5096, "lr": 4.586568261458729e-07, "epoch": 0.8701957940536621, "percentage": 87.0, "elapsed_time": "1 day, 7:03:52", "remaining_time": "4:38:32"} +{"current_steps": 1801, "total_steps": 2069, "loss": 0.4861, "lr": 4.553180945742336e-07, "epoch": 0.8706792361614697, "percentage": 87.05, "elapsed_time": "1 day, 7:04:55", "remaining_time": "4:37:30"} +{"current_steps": 1802, "total_steps": 2069, "loss": 0.5136, "lr": 4.5199097957582816e-07, "epoch": 0.8711626782692773, "percentage": 87.1, "elapsed_time": "1 day, 7:05:57", "remaining_time": "4:36:28"} +{"current_steps": 1803, "total_steps": 2069, "loss": 0.5012, "lr": 4.486754896550288e-07, "epoch": 0.8716461203770849, "percentage": 87.14, "elapsed_time": "1 day, 7:06:57", "remaining_time": "4:35:26"} +{"current_steps": 1804, "total_steps": 2069, "loss": 0.5185, "lr": 4.45371633286496e-07, "epoch": 0.8721295624848925, "percentage": 87.19, "elapsed_time": "1 day, 7:07:53", "remaining_time": "4:34:23"} +{"current_steps": 1805, "total_steps": 2069, "loss": 0.5135, "lr": 4.4207941891515335e-07, "epoch": 0.8726130045927, "percentage": 87.24, "elapsed_time": "1 day, 7:08:53", "remaining_time": "4:33:20"} +{"current_steps": 1806, "total_steps": 2069, "loss": 0.512, "lr": 4.3879885495616505e-07, "epoch": 0.8730964467005076, "percentage": 87.29, "elapsed_time": "1 day, 7:09:50", "remaining_time": "4:32:17"} +{"current_steps": 1807, "total_steps": 2069, "loss": 0.5131, "lr": 4.3552994979491836e-07, "epoch": 0.8735798888083152, "percentage": 87.34, "elapsed_time": "1 day, 7:10:51", "remaining_time": "4:31:15"} +{"current_steps": 1808, "total_steps": 2069, "loss": 0.5156, "lr": 4.322727117869951e-07, "epoch": 0.8740633309161228, "percentage": 87.39, "elapsed_time": "1 day, 7:11:49", "remaining_time": "4:30:12"} +{"current_steps": 1809, "total_steps": 2069, "loss": 0.5225, "lr": 4.290271492581627e-07, "epoch": 0.8745467730239304, "percentage": 87.43, "elapsed_time": "1 day, 7:12:50", "remaining_time": "4:29:10"} +{"current_steps": 1810, "total_steps": 2069, "loss": 0.5276, "lr": 4.257932705043372e-07, "epoch": 0.875030215131738, "percentage": 87.48, "elapsed_time": "1 day, 7:13:54", "remaining_time": "4:28:08"} +{"current_steps": 1811, "total_steps": 2069, "loss": 0.5224, "lr": 4.2257108379157586e-07, "epoch": 0.8755136572395456, "percentage": 87.53, "elapsed_time": "1 day, 7:14:55", "remaining_time": "4:27:06"} +{"current_steps": 1812, "total_steps": 2069, "loss": 0.5161, "lr": 4.1936059735604497e-07, "epoch": 0.8759970993473531, "percentage": 87.58, "elapsed_time": "1 day, 7:15:54", "remaining_time": "4:26:03"} +{"current_steps": 1813, "total_steps": 2069, "loss": 0.5277, "lr": 4.161618194040079e-07, "epoch": 0.8764805414551607, "percentage": 87.63, "elapsed_time": "1 day, 7:17:00", "remaining_time": "4:25:02"} +{"current_steps": 1814, "total_steps": 2069, "loss": 0.5053, "lr": 4.129747581117993e-07, "epoch": 0.8769639835629683, "percentage": 87.68, "elapsed_time": "1 day, 7:18:04", "remaining_time": "4:24:00"} +{"current_steps": 1815, "total_steps": 2069, "loss": 0.516, "lr": 4.0979942162580387e-07, "epoch": 0.8774474256707759, "percentage": 87.72, "elapsed_time": "1 day, 7:19:05", "remaining_time": "4:22:58"} +{"current_steps": 1816, "total_steps": 2069, "loss": 0.5278, "lr": 4.06635818062438e-07, "epoch": 0.8779308677785835, "percentage": 87.77, "elapsed_time": "1 day, 7:20:12", "remaining_time": "4:21:56"} +{"current_steps": 1817, "total_steps": 2069, "loss": 0.5294, "lr": 4.0348395550812713e-07, "epoch": 0.8784143098863911, "percentage": 87.82, "elapsed_time": "1 day, 7:21:14", "remaining_time": "4:20:54"} +{"current_steps": 1818, "total_steps": 2069, "loss": 0.5158, "lr": 4.003438420192873e-07, "epoch": 0.8788977519941987, "percentage": 87.87, "elapsed_time": "1 day, 7:22:16", "remaining_time": "4:19:52"} +{"current_steps": 1819, "total_steps": 2069, "loss": 0.5114, "lr": 3.9721548562229985e-07, "epoch": 0.8793811941020063, "percentage": 87.92, "elapsed_time": "1 day, 7:23:16", "remaining_time": "4:18:50"} +{"current_steps": 1820, "total_steps": 2069, "loss": 0.5116, "lr": 3.9409889431349656e-07, "epoch": 0.8798646362098139, "percentage": 87.97, "elapsed_time": "1 day, 7:24:19", "remaining_time": "4:17:48"} +{"current_steps": 1821, "total_steps": 2069, "loss": 0.5099, "lr": 3.9099407605913576e-07, "epoch": 0.8803480783176215, "percentage": 88.01, "elapsed_time": "1 day, 7:25:24", "remaining_time": "4:16:46"} +{"current_steps": 1822, "total_steps": 2069, "loss": 0.5175, "lr": 3.879010387953841e-07, "epoch": 0.880831520425429, "percentage": 88.06, "elapsed_time": "1 day, 7:26:26", "remaining_time": "4:15:44"} +{"current_steps": 1823, "total_steps": 2069, "loss": 0.498, "lr": 3.84819790428293e-07, "epoch": 0.8813149625332366, "percentage": 88.11, "elapsed_time": "1 day, 7:27:32", "remaining_time": "4:14:42"} +{"current_steps": 1824, "total_steps": 2069, "loss": 0.518, "lr": 3.8175033883378233e-07, "epoch": 0.8817984046410442, "percentage": 88.16, "elapsed_time": "1 day, 7:28:34", "remaining_time": "4:13:40"} +{"current_steps": 1825, "total_steps": 2069, "loss": 0.5216, "lr": 3.7869269185761613e-07, "epoch": 0.8822818467488518, "percentage": 88.21, "elapsed_time": "1 day, 7:29:33", "remaining_time": "4:12:37"} +{"current_steps": 1826, "total_steps": 2069, "loss": 0.5066, "lr": 3.7564685731538985e-07, "epoch": 0.8827652888566594, "percentage": 88.26, "elapsed_time": "1 day, 7:30:32", "remaining_time": "4:11:35"} +{"current_steps": 1827, "total_steps": 2069, "loss": 0.517, "lr": 3.7261284299249967e-07, "epoch": 0.883248730964467, "percentage": 88.3, "elapsed_time": "1 day, 7:31:28", "remaining_time": "4:10:32"} +{"current_steps": 1828, "total_steps": 2069, "loss": 0.4959, "lr": 3.695906566441304e-07, "epoch": 0.8837321730722746, "percentage": 88.35, "elapsed_time": "1 day, 7:32:29", "remaining_time": "4:09:30"} +{"current_steps": 1829, "total_steps": 2069, "loss": 0.4871, "lr": 3.665803059952344e-07, "epoch": 0.8842156151800822, "percentage": 88.4, "elapsed_time": "1 day, 7:33:34", "remaining_time": "4:08:28"} +{"current_steps": 1830, "total_steps": 2069, "loss": 0.5143, "lr": 3.63581798740511e-07, "epoch": 0.8846990572878898, "percentage": 88.45, "elapsed_time": "1 day, 7:34:33", "remaining_time": "4:07:25"} +{"current_steps": 1831, "total_steps": 2069, "loss": 0.4936, "lr": 3.605951425443871e-07, "epoch": 0.8851824993956974, "percentage": 88.5, "elapsed_time": "1 day, 7:35:34", "remaining_time": "4:06:23"} +{"current_steps": 1832, "total_steps": 2069, "loss": 0.4684, "lr": 3.576203450409943e-07, "epoch": 0.885665941503505, "percentage": 88.55, "elapsed_time": "1 day, 7:36:40", "remaining_time": "4:05:22"} +{"current_steps": 1833, "total_steps": 2069, "loss": 0.5104, "lr": 3.5465741383415684e-07, "epoch": 0.8861493836113126, "percentage": 88.59, "elapsed_time": "1 day, 7:37:43", "remaining_time": "4:04:20"} +{"current_steps": 1834, "total_steps": 2069, "loss": 0.519, "lr": 3.5170635649736497e-07, "epoch": 0.8866328257191202, "percentage": 88.64, "elapsed_time": "1 day, 7:38:43", "remaining_time": "4:03:17"} +{"current_steps": 1835, "total_steps": 2069, "loss": 0.508, "lr": 3.487671805737597e-07, "epoch": 0.8871162678269278, "percentage": 88.69, "elapsed_time": "1 day, 7:39:44", "remaining_time": "4:02:15"} +{"current_steps": 1836, "total_steps": 2069, "loss": 0.5135, "lr": 3.4583989357611037e-07, "epoch": 0.8875997099347354, "percentage": 88.74, "elapsed_time": "1 day, 7:40:44", "remaining_time": "4:01:12"} +{"current_steps": 1837, "total_steps": 2069, "loss": 0.5075, "lr": 3.4292450298679945e-07, "epoch": 0.888083152042543, "percentage": 88.79, "elapsed_time": "1 day, 7:41:46", "remaining_time": "4:00:10"} +{"current_steps": 1838, "total_steps": 2069, "loss": 0.5166, "lr": 3.400210162577999e-07, "epoch": 0.8885665941503504, "percentage": 88.84, "elapsed_time": "1 day, 7:42:47", "remaining_time": "3:59:08"} +{"current_steps": 1839, "total_steps": 2069, "loss": 0.523, "lr": 3.371294408106585e-07, "epoch": 0.889050036258158, "percentage": 88.88, "elapsed_time": "1 day, 7:43:47", "remaining_time": "3:58:06"} +{"current_steps": 1840, "total_steps": 2069, "loss": 0.5138, "lr": 3.3424978403647443e-07, "epoch": 0.8895334783659656, "percentage": 88.93, "elapsed_time": "1 day, 7:44:54", "remaining_time": "3:57:04"} +{"current_steps": 1841, "total_steps": 2069, "loss": 0.5274, "lr": 3.313820532958817e-07, "epoch": 0.8900169204737732, "percentage": 88.98, "elapsed_time": "1 day, 7:45:54", "remaining_time": "3:56:02"} +{"current_steps": 1842, "total_steps": 2069, "loss": 0.4991, "lr": 3.285262559190322e-07, "epoch": 0.8905003625815808, "percentage": 89.03, "elapsed_time": "1 day, 7:46:56", "remaining_time": "3:55:00"} +{"current_steps": 1843, "total_steps": 2069, "loss": 0.5009, "lr": 3.256823992055741e-07, "epoch": 0.8909838046893884, "percentage": 89.08, "elapsed_time": "1 day, 7:47:56", "remaining_time": "3:53:57"} +{"current_steps": 1844, "total_steps": 2069, "loss": 0.5238, "lr": 3.228504904246349e-07, "epoch": 0.891467246797196, "percentage": 89.13, "elapsed_time": "1 day, 7:48:51", "remaining_time": "3:52:54"} +{"current_steps": 1845, "total_steps": 2069, "loss": 0.5202, "lr": 3.20030536814801e-07, "epoch": 0.8919506889050036, "percentage": 89.17, "elapsed_time": "1 day, 7:49:52", "remaining_time": "3:51:52"} +{"current_steps": 1846, "total_steps": 2069, "loss": 0.5104, "lr": 3.1722254558410047e-07, "epoch": 0.8924341310128112, "percentage": 89.22, "elapsed_time": "1 day, 7:50:54", "remaining_time": "3:50:50"} +{"current_steps": 1847, "total_steps": 2069, "loss": 0.5152, "lr": 3.144265239099864e-07, "epoch": 0.8929175731206188, "percentage": 89.27, "elapsed_time": "1 day, 7:51:53", "remaining_time": "3:49:48"} +{"current_steps": 1848, "total_steps": 2069, "loss": 0.5071, "lr": 3.1164247893931575e-07, "epoch": 0.8934010152284264, "percentage": 89.32, "elapsed_time": "1 day, 7:52:56", "remaining_time": "3:48:46"} +{"current_steps": 1849, "total_steps": 2069, "loss": 0.5181, "lr": 3.088704177883306e-07, "epoch": 0.893884457336234, "percentage": 89.37, "elapsed_time": "1 day, 7:53:55", "remaining_time": "3:47:43"} +{"current_steps": 1850, "total_steps": 2069, "loss": 0.5235, "lr": 3.06110347542643e-07, "epoch": 0.8943678994440416, "percentage": 89.42, "elapsed_time": "1 day, 7:54:58", "remaining_time": "3:46:41"} +{"current_steps": 1851, "total_steps": 2069, "loss": 0.5019, "lr": 3.033622752572157e-07, "epoch": 0.8948513415518492, "percentage": 89.46, "elapsed_time": "1 day, 7:55:59", "remaining_time": "3:45:39"} +{"current_steps": 1852, "total_steps": 2069, "loss": 0.5263, "lr": 3.0062620795634214e-07, "epoch": 0.8953347836596568, "percentage": 89.51, "elapsed_time": "1 day, 7:56:59", "remaining_time": "3:44:36"} +{"current_steps": 1853, "total_steps": 2069, "loss": 0.5222, "lr": 2.9790215263363174e-07, "epoch": 0.8958182257674644, "percentage": 89.56, "elapsed_time": "1 day, 7:57:59", "remaining_time": "3:43:34"} +{"current_steps": 1854, "total_steps": 2069, "loss": 0.5233, "lr": 2.951901162519877e-07, "epoch": 0.896301667875272, "percentage": 89.61, "elapsed_time": "1 day, 7:59:04", "remaining_time": "3:42:32"} +{"current_steps": 1855, "total_steps": 2069, "loss": 0.5212, "lr": 2.9249010574359636e-07, "epoch": 0.8967851099830795, "percentage": 89.66, "elapsed_time": "1 day, 8:00:05", "remaining_time": "3:41:30"} +{"current_steps": 1856, "total_steps": 2069, "loss": 0.5168, "lr": 2.898021280098995e-07, "epoch": 0.8972685520908871, "percentage": 89.71, "elapsed_time": "1 day, 8:01:06", "remaining_time": "3:40:28"} +{"current_steps": 1857, "total_steps": 2069, "loss": 0.5084, "lr": 2.8712618992158656e-07, "epoch": 0.8977519941986947, "percentage": 89.75, "elapsed_time": "1 day, 8:02:08", "remaining_time": "3:39:26"} +{"current_steps": 1858, "total_steps": 2069, "loss": 0.5088, "lr": 2.8446229831856964e-07, "epoch": 0.8982354363065023, "percentage": 89.8, "elapsed_time": "1 day, 8:03:09", "remaining_time": "3:38:23"} +{"current_steps": 1859, "total_steps": 2069, "loss": 0.521, "lr": 2.8181046000997136e-07, "epoch": 0.8987188784143099, "percentage": 89.85, "elapsed_time": "1 day, 8:04:06", "remaining_time": "3:37:21"} +{"current_steps": 1860, "total_steps": 2069, "loss": 0.5072, "lr": 2.791706817741041e-07, "epoch": 0.8992023205221175, "percentage": 89.9, "elapsed_time": "1 day, 8:05:08", "remaining_time": "3:36:19"} +{"current_steps": 1861, "total_steps": 2069, "loss": 0.5148, "lr": 2.765429703584538e-07, "epoch": 0.8996857626299251, "percentage": 89.95, "elapsed_time": "1 day, 8:06:07", "remaining_time": "3:35:16"} +{"current_steps": 1862, "total_steps": 2069, "loss": 0.5262, "lr": 2.739273324796621e-07, "epoch": 0.9001692047377327, "percentage": 90.0, "elapsed_time": "1 day, 8:07:11", "remaining_time": "3:34:14"} +{"current_steps": 1863, "total_steps": 2069, "loss": 0.5147, "lr": 2.7132377482351037e-07, "epoch": 0.9006526468455403, "percentage": 90.04, "elapsed_time": "1 day, 8:08:09", "remaining_time": "3:33:12"} +{"current_steps": 1864, "total_steps": 2069, "loss": 0.5172, "lr": 2.687323040449025e-07, "epoch": 0.9011360889533478, "percentage": 90.09, "elapsed_time": "1 day, 8:09:13", "remaining_time": "3:32:10"} +{"current_steps": 1865, "total_steps": 2069, "loss": 0.5191, "lr": 2.6615292676784533e-07, "epoch": 0.9016195310611554, "percentage": 90.14, "elapsed_time": "1 day, 8:10:16", "remaining_time": "3:31:08"} +{"current_steps": 1866, "total_steps": 2069, "loss": 0.5116, "lr": 2.635856495854372e-07, "epoch": 0.902102973168963, "percentage": 90.19, "elapsed_time": "1 day, 8:11:16", "remaining_time": "3:30:06"} +{"current_steps": 1867, "total_steps": 2069, "loss": 0.5243, "lr": 2.6103047905984224e-07, "epoch": 0.9025864152767706, "percentage": 90.24, "elapsed_time": "1 day, 8:12:14", "remaining_time": "3:29:03"} +{"current_steps": 1868, "total_steps": 2069, "loss": 0.516, "lr": 2.584874217222855e-07, "epoch": 0.9030698573845782, "percentage": 90.29, "elapsed_time": "1 day, 8:13:18", "remaining_time": "3:28:01"} +{"current_steps": 1869, "total_steps": 2069, "loss": 0.5299, "lr": 2.5595648407302496e-07, "epoch": 0.9035532994923858, "percentage": 90.33, "elapsed_time": "1 day, 8:14:16", "remaining_time": "3:26:59"} +{"current_steps": 1870, "total_steps": 2069, "loss": 0.5192, "lr": 2.53437672581342e-07, "epoch": 0.9040367416001934, "percentage": 90.38, "elapsed_time": "1 day, 8:15:19", "remaining_time": "3:25:57"} +{"current_steps": 1871, "total_steps": 2069, "loss": 0.5135, "lr": 2.5093099368551974e-07, "epoch": 0.9045201837080009, "percentage": 90.43, "elapsed_time": "1 day, 8:16:15", "remaining_time": "3:24:54"} +{"current_steps": 1872, "total_steps": 2069, "loss": 0.5248, "lr": 2.484364537928341e-07, "epoch": 0.9050036258158085, "percentage": 90.48, "elapsed_time": "1 day, 8:17:14", "remaining_time": "3:23:51"} +{"current_steps": 1873, "total_steps": 2069, "loss": 0.5198, "lr": 2.45954059279529e-07, "epoch": 0.9054870679236161, "percentage": 90.53, "elapsed_time": "1 day, 8:19:15", "remaining_time": "3:22:55"} +{"current_steps": 1874, "total_steps": 2069, "loss": 0.5163, "lr": 2.4348381649080486e-07, "epoch": 0.9059705100314237, "percentage": 90.58, "elapsed_time": "1 day, 8:20:15", "remaining_time": "3:21:53"} +{"current_steps": 1875, "total_steps": 2069, "loss": 0.511, "lr": 2.41025731740801e-07, "epoch": 0.9064539521392313, "percentage": 90.62, "elapsed_time": "1 day, 8:21:18", "remaining_time": "3:20:51"} +{"current_steps": 1876, "total_steps": 2069, "loss": 0.5114, "lr": 2.3857981131258037e-07, "epoch": 0.9069373942470389, "percentage": 90.67, "elapsed_time": "1 day, 8:22:16", "remaining_time": "3:19:49"} +{"current_steps": 1877, "total_steps": 2069, "loss": 0.4992, "lr": 2.3614606145811347e-07, "epoch": 0.9074208363548465, "percentage": 90.72, "elapsed_time": "1 day, 8:23:19", "remaining_time": "3:18:47"} +{"current_steps": 1878, "total_steps": 2069, "loss": 0.4887, "lr": 2.3372448839825978e-07, "epoch": 0.9079042784626541, "percentage": 90.77, "elapsed_time": "1 day, 8:24:24", "remaining_time": "3:17:45"} +{"current_steps": 1879, "total_steps": 2069, "loss": 0.5122, "lr": 2.3131509832275633e-07, "epoch": 0.9083877205704617, "percentage": 90.82, "elapsed_time": "1 day, 8:25:23", "remaining_time": "3:16:42"} +{"current_steps": 1880, "total_steps": 2069, "loss": 0.5102, "lr": 2.2891789739019733e-07, "epoch": 0.9088711626782693, "percentage": 90.87, "elapsed_time": "1 day, 8:26:22", "remaining_time": "3:15:40"} +{"current_steps": 1881, "total_steps": 2069, "loss": 0.5049, "lr": 2.2653289172802295e-07, "epoch": 0.9093546047860769, "percentage": 90.91, "elapsed_time": "1 day, 8:27:24", "remaining_time": "3:14:38"} +{"current_steps": 1882, "total_steps": 2069, "loss": 0.5144, "lr": 2.241600874324984e-07, "epoch": 0.9098380468938845, "percentage": 90.96, "elapsed_time": "1 day, 8:28:25", "remaining_time": "3:13:35"} +{"current_steps": 1883, "total_steps": 2069, "loss": 0.5184, "lr": 2.2179949056870432e-07, "epoch": 0.9103214890016921, "percentage": 91.01, "elapsed_time": "1 day, 8:29:24", "remaining_time": "3:12:33"} +{"current_steps": 1884, "total_steps": 2069, "loss": 0.5131, "lr": 2.194511071705141e-07, "epoch": 0.9108049311094997, "percentage": 91.06, "elapsed_time": "1 day, 8:30:23", "remaining_time": "3:11:31"} +{"current_steps": 1885, "total_steps": 2069, "loss": 0.5147, "lr": 2.1711494324058724e-07, "epoch": 0.9112883732173073, "percentage": 91.11, "elapsed_time": "1 day, 8:31:22", "remaining_time": "3:10:28"} +{"current_steps": 1886, "total_steps": 2069, "loss": 0.5084, "lr": 2.1479100475034598e-07, "epoch": 0.9117718153251149, "percentage": 91.16, "elapsed_time": "1 day, 8:32:23", "remaining_time": "3:09:26"} +{"current_steps": 1887, "total_steps": 2069, "loss": 0.4832, "lr": 2.1247929763996534e-07, "epoch": 0.9122552574329225, "percentage": 91.2, "elapsed_time": "1 day, 8:33:29", "remaining_time": "3:08:24"} +{"current_steps": 1888, "total_steps": 2069, "loss": 0.5144, "lr": 2.101798278183542e-07, "epoch": 0.91273869954073, "percentage": 91.25, "elapsed_time": "1 day, 8:34:29", "remaining_time": "3:07:22"} +{"current_steps": 1889, "total_steps": 2069, "loss": 0.5081, "lr": 2.0789260116314215e-07, "epoch": 0.9132221416485375, "percentage": 91.3, "elapsed_time": "1 day, 8:35:29", "remaining_time": "3:06:20"} +{"current_steps": 1890, "total_steps": 2069, "loss": 0.5109, "lr": 2.0561762352066638e-07, "epoch": 0.9137055837563451, "percentage": 91.35, "elapsed_time": "1 day, 8:36:35", "remaining_time": "3:05:18"} +{"current_steps": 1891, "total_steps": 2069, "loss": 0.5186, "lr": 2.0335490070595208e-07, "epoch": 0.9141890258641527, "percentage": 91.4, "elapsed_time": "1 day, 8:37:35", "remaining_time": "3:04:16"} +{"current_steps": 1892, "total_steps": 2069, "loss": 0.5101, "lr": 2.011044385027011e-07, "epoch": 0.9146724679719603, "percentage": 91.45, "elapsed_time": "1 day, 8:38:35", "remaining_time": "3:03:13"} +{"current_steps": 1893, "total_steps": 2069, "loss": 0.5078, "lr": 1.988662426632765e-07, "epoch": 0.9151559100797679, "percentage": 91.49, "elapsed_time": "1 day, 8:39:36", "remaining_time": "3:02:11"} +{"current_steps": 1894, "total_steps": 2069, "loss": 0.5223, "lr": 1.9664031890868795e-07, "epoch": 0.9156393521875755, "percentage": 91.54, "elapsed_time": "1 day, 8:40:37", "remaining_time": "3:01:09"} +{"current_steps": 1895, "total_steps": 2069, "loss": 0.509, "lr": 1.9442667292857432e-07, "epoch": 0.9161227942953831, "percentage": 91.59, "elapsed_time": "1 day, 8:41:35", "remaining_time": "3:00:06"} +{"current_steps": 1896, "total_steps": 2069, "loss": 0.4972, "lr": 1.922253103811944e-07, "epoch": 0.9166062364031907, "percentage": 91.64, "elapsed_time": "1 day, 8:42:42", "remaining_time": "2:59:05"} +{"current_steps": 1897, "total_steps": 2069, "loss": 0.5143, "lr": 1.9003623689340777e-07, "epoch": 0.9170896785109983, "percentage": 91.69, "elapsed_time": "1 day, 8:43:46", "remaining_time": "2:58:03"} +{"current_steps": 1898, "total_steps": 2069, "loss": 0.5186, "lr": 1.8785945806066297e-07, "epoch": 0.9175731206188059, "percentage": 91.74, "elapsed_time": "1 day, 8:44:47", "remaining_time": "2:57:01"} +{"current_steps": 1899, "total_steps": 2069, "loss": 0.5167, "lr": 1.85694979446982e-07, "epoch": 0.9180565627266135, "percentage": 91.78, "elapsed_time": "1 day, 8:45:50", "remaining_time": "2:55:59"} +{"current_steps": 1900, "total_steps": 2069, "loss": 0.5141, "lr": 1.835428065849465e-07, "epoch": 0.9185400048344211, "percentage": 91.83, "elapsed_time": "1 day, 8:46:53", "remaining_time": "2:54:57"} +{"current_steps": 1901, "total_steps": 2069, "loss": 0.5231, "lr": 1.814029449756849e-07, "epoch": 0.9190234469422287, "percentage": 91.88, "elapsed_time": "1 day, 8:47:55", "remaining_time": "2:53:54"} +{"current_steps": 1902, "total_steps": 2069, "loss": 0.5088, "lr": 1.7927540008885414e-07, "epoch": 0.9195068890500363, "percentage": 91.93, "elapsed_time": "1 day, 8:48:59", "remaining_time": "2:52:52"} +{"current_steps": 1903, "total_steps": 2069, "loss": 0.5129, "lr": 1.7716017736263192e-07, "epoch": 0.9199903311578439, "percentage": 91.98, "elapsed_time": "1 day, 8:50:02", "remaining_time": "2:51:50"} +{"current_steps": 1904, "total_steps": 2069, "loss": 0.5203, "lr": 1.7505728220369667e-07, "epoch": 0.9204737732656514, "percentage": 92.03, "elapsed_time": "1 day, 8:51:01", "remaining_time": "2:50:48"} +{"current_steps": 1905, "total_steps": 2069, "loss": 0.5223, "lr": 1.729667199872187e-07, "epoch": 0.920957215373459, "percentage": 92.07, "elapsed_time": "1 day, 8:52:03", "remaining_time": "2:49:46"} +{"current_steps": 1906, "total_steps": 2069, "loss": 0.5107, "lr": 1.70888496056843e-07, "epoch": 0.9214406574812666, "percentage": 92.12, "elapsed_time": "1 day, 8:53:04", "remaining_time": "2:48:44"} +{"current_steps": 1907, "total_steps": 2069, "loss": 0.5142, "lr": 1.6882261572467862e-07, "epoch": 0.9219240995890742, "percentage": 92.17, "elapsed_time": "1 day, 8:54:04", "remaining_time": "2:47:41"} +{"current_steps": 1908, "total_steps": 2069, "loss": 0.4847, "lr": 1.6676908427128103e-07, "epoch": 0.9224075416968818, "percentage": 92.22, "elapsed_time": "1 day, 8:55:00", "remaining_time": "2:46:39"} +{"current_steps": 1909, "total_steps": 2069, "loss": 0.5063, "lr": 1.64727906945642e-07, "epoch": 0.9228909838046894, "percentage": 92.27, "elapsed_time": "1 day, 8:55:57", "remaining_time": "2:45:36"} +{"current_steps": 1910, "total_steps": 2069, "loss": 0.5035, "lr": 1.6269908896517638e-07, "epoch": 0.923374425912497, "percentage": 92.32, "elapsed_time": "1 day, 8:57:00", "remaining_time": "2:44:34"} +{"current_steps": 1911, "total_steps": 2069, "loss": 0.4855, "lr": 1.6068263551570596e-07, "epoch": 0.9238578680203046, "percentage": 92.36, "elapsed_time": "1 day, 8:58:05", "remaining_time": "2:43:32"} +{"current_steps": 1912, "total_steps": 2069, "loss": 0.4863, "lr": 1.5867855175144885e-07, "epoch": 0.9243413101281122, "percentage": 92.41, "elapsed_time": "1 day, 8:59:08", "remaining_time": "2:42:30"} +{"current_steps": 1913, "total_steps": 2069, "loss": 0.5077, "lr": 1.5668684279500245e-07, "epoch": 0.9248247522359198, "percentage": 92.46, "elapsed_time": "1 day, 9:00:11", "remaining_time": "2:41:28"} +{"current_steps": 1914, "total_steps": 2069, "loss": 0.5184, "lr": 1.5470751373733773e-07, "epoch": 0.9253081943437274, "percentage": 92.51, "elapsed_time": "1 day, 9:01:14", "remaining_time": "2:40:26"} +{"current_steps": 1915, "total_steps": 2069, "loss": 0.5094, "lr": 1.5274056963777817e-07, "epoch": 0.925791636451535, "percentage": 92.56, "elapsed_time": "1 day, 9:02:16", "remaining_time": "2:39:24"} +{"current_steps": 1916, "total_steps": 2069, "loss": 0.5154, "lr": 1.507860155239921e-07, "epoch": 0.9262750785593425, "percentage": 92.61, "elapsed_time": "1 day, 9:03:15", "remaining_time": "2:38:22"} +{"current_steps": 1917, "total_steps": 2069, "loss": 0.4938, "lr": 1.488438563919764e-07, "epoch": 0.9267585206671501, "percentage": 92.65, "elapsed_time": "1 day, 9:04:18", "remaining_time": "2:37:20"} +{"current_steps": 1918, "total_steps": 2069, "loss": 0.5077, "lr": 1.4691409720604732e-07, "epoch": 0.9272419627749577, "percentage": 92.7, "elapsed_time": "1 day, 9:05:19", "remaining_time": "2:36:18"} +{"current_steps": 1919, "total_steps": 2069, "loss": 0.5145, "lr": 1.449967428988247e-07, "epoch": 0.9277254048827653, "percentage": 92.75, "elapsed_time": "1 day, 9:06:21", "remaining_time": "2:35:15"} +{"current_steps": 1920, "total_steps": 2069, "loss": 0.5291, "lr": 1.4309179837122045e-07, "epoch": 0.9282088469905729, "percentage": 92.8, "elapsed_time": "1 day, 9:07:20", "remaining_time": "2:34:13"} +{"current_steps": 1921, "total_steps": 2069, "loss": 0.5119, "lr": 1.411992684924257e-07, "epoch": 0.9286922890983804, "percentage": 92.85, "elapsed_time": "1 day, 9:08:23", "remaining_time": "2:33:11"} +{"current_steps": 1922, "total_steps": 2069, "loss": 0.5106, "lr": 1.3931915809990039e-07, "epoch": 0.929175731206188, "percentage": 92.9, "elapsed_time": "1 day, 9:09:24", "remaining_time": "2:32:09"} +{"current_steps": 1923, "total_steps": 2069, "loss": 0.5126, "lr": 1.374514719993575e-07, "epoch": 0.9296591733139956, "percentage": 92.94, "elapsed_time": "1 day, 9:10:22", "remaining_time": "2:31:06"} +{"current_steps": 1924, "total_steps": 2069, "loss": 0.5145, "lr": 1.3559621496475438e-07, "epoch": 0.9301426154218032, "percentage": 92.99, "elapsed_time": "1 day, 9:11:26", "remaining_time": "2:30:04"} +{"current_steps": 1925, "total_steps": 2069, "loss": 0.5261, "lr": 1.3375339173827551e-07, "epoch": 0.9306260575296108, "percentage": 93.04, "elapsed_time": "1 day, 9:12:28", "remaining_time": "2:29:02"} +{"current_steps": 1926, "total_steps": 2069, "loss": 0.474, "lr": 1.3192300703032733e-07, "epoch": 0.9311094996374184, "percentage": 93.09, "elapsed_time": "1 day, 9:13:34", "remaining_time": "2:28:01"} +{"current_steps": 1927, "total_steps": 2069, "loss": 0.5134, "lr": 1.3010506551952018e-07, "epoch": 0.931592941745226, "percentage": 93.14, "elapsed_time": "1 day, 9:14:35", "remaining_time": "2:26:58"} +{"current_steps": 1928, "total_steps": 2069, "loss": 0.52, "lr": 1.2829957185265863e-07, "epoch": 0.9320763838530336, "percentage": 93.19, "elapsed_time": "1 day, 9:15:40", "remaining_time": "2:25:56"} +{"current_steps": 1929, "total_steps": 2069, "loss": 0.5031, "lr": 1.2650653064473106e-07, "epoch": 0.9325598259608412, "percentage": 93.23, "elapsed_time": "1 day, 9:16:44", "remaining_time": "2:24:54"} +{"current_steps": 1930, "total_steps": 2069, "loss": 0.5092, "lr": 1.2472594647889357e-07, "epoch": 0.9330432680686488, "percentage": 93.28, "elapsed_time": "1 day, 9:17:47", "remaining_time": "2:23:52"} +{"current_steps": 1931, "total_steps": 2069, "loss": 0.5177, "lr": 1.2295782390646494e-07, "epoch": 0.9335267101764564, "percentage": 93.33, "elapsed_time": "1 day, 9:18:49", "remaining_time": "2:22:50"} +{"current_steps": 1932, "total_steps": 2069, "loss": 0.5133, "lr": 1.2120216744690716e-07, "epoch": 0.934010152284264, "percentage": 93.38, "elapsed_time": "1 day, 9:19:49", "remaining_time": "2:21:48"} +{"current_steps": 1933, "total_steps": 2069, "loss": 0.5163, "lr": 1.194589815878211e-07, "epoch": 0.9344935943920716, "percentage": 93.43, "elapsed_time": "1 day, 9:20:48", "remaining_time": "2:20:46"} +{"current_steps": 1934, "total_steps": 2069, "loss": 0.5181, "lr": 1.177282707849281e-07, "epoch": 0.9349770364998792, "percentage": 93.48, "elapsed_time": "1 day, 9:21:47", "remaining_time": "2:19:43"} +{"current_steps": 1935, "total_steps": 2069, "loss": 0.5181, "lr": 1.1601003946206723e-07, "epoch": 0.9354604786076868, "percentage": 93.52, "elapsed_time": "1 day, 9:22:45", "remaining_time": "2:18:41"} +{"current_steps": 1936, "total_steps": 2069, "loss": 0.5032, "lr": 1.1430429201117476e-07, "epoch": 0.9359439207154944, "percentage": 93.57, "elapsed_time": "1 day, 9:23:47", "remaining_time": "2:17:39"} +{"current_steps": 1937, "total_steps": 2069, "loss": 0.5178, "lr": 1.1261103279227858e-07, "epoch": 0.9364273628233019, "percentage": 93.62, "elapsed_time": "1 day, 9:24:46", "remaining_time": "2:16:37"} +{"current_steps": 1938, "total_steps": 2069, "loss": 0.5196, "lr": 1.1093026613348601e-07, "epoch": 0.9369108049311095, "percentage": 93.67, "elapsed_time": "1 day, 9:25:44", "remaining_time": "2:15:34"} +{"current_steps": 1939, "total_steps": 2069, "loss": 0.4919, "lr": 1.0926199633097156e-07, "epoch": 0.937394247038917, "percentage": 93.72, "elapsed_time": "1 day, 9:26:50", "remaining_time": "2:14:32"} +{"current_steps": 1940, "total_steps": 2069, "loss": 0.5147, "lr": 1.0760622764896866e-07, "epoch": 0.9378776891467246, "percentage": 93.77, "elapsed_time": "1 day, 9:27:48", "remaining_time": "2:13:30"} +{"current_steps": 1941, "total_steps": 2069, "loss": 0.5156, "lr": 1.0596296431975406e-07, "epoch": 0.9383611312545322, "percentage": 93.81, "elapsed_time": "1 day, 9:28:47", "remaining_time": "2:12:28"} +{"current_steps": 1942, "total_steps": 2069, "loss": 0.5065, "lr": 1.0433221054364174e-07, "epoch": 0.9388445733623398, "percentage": 93.86, "elapsed_time": "1 day, 9:29:50", "remaining_time": "2:11:26"} +{"current_steps": 1943, "total_steps": 2069, "loss": 0.5053, "lr": 1.0271397048897014e-07, "epoch": 0.9393280154701474, "percentage": 93.91, "elapsed_time": "1 day, 9:30:52", "remaining_time": "2:10:24"} +{"current_steps": 1944, "total_steps": 2069, "loss": 0.4939, "lr": 1.0110824829209164e-07, "epoch": 0.939811457577955, "percentage": 93.96, "elapsed_time": "1 day, 9:31:58", "remaining_time": "2:09:22"} +{"current_steps": 1945, "total_steps": 2069, "loss": 0.5106, "lr": 9.951504805735979e-08, "epoch": 0.9402948996857626, "percentage": 94.01, "elapsed_time": "1 day, 9:32:56", "remaining_time": "2:08:19"} +{"current_steps": 1946, "total_steps": 2069, "loss": 0.5153, "lr": 9.793437385712479e-08, "epoch": 0.9407783417935702, "percentage": 94.06, "elapsed_time": "1 day, 9:33:56", "remaining_time": "2:07:17"} +{"current_steps": 1947, "total_steps": 2069, "loss": 0.51, "lr": 9.636622973171583e-08, "epoch": 0.9412617839013778, "percentage": 94.1, "elapsed_time": "1 day, 9:34:53", "remaining_time": "2:06:15"} +{"current_steps": 1948, "total_steps": 2069, "loss": 0.516, "lr": 9.481061968943717e-08, "epoch": 0.9417452260091854, "percentage": 94.15, "elapsed_time": "1 day, 9:35:51", "remaining_time": "2:05:12"} +{"current_steps": 1949, "total_steps": 2069, "loss": 0.5051, "lr": 9.3267547706552e-08, "epoch": 0.942228668116993, "percentage": 94.2, "elapsed_time": "1 day, 9:36:55", "remaining_time": "2:04:10"} +{"current_steps": 1950, "total_steps": 2069, "loss": 0.5055, "lr": 9.17370177272775e-08, "epoch": 0.9427121102248006, "percentage": 94.25, "elapsed_time": "1 day, 9:37:58", "remaining_time": "2:03:08"} +{"current_steps": 1951, "total_steps": 2069, "loss": 0.5164, "lr": 9.021903366377093e-08, "epoch": 0.9431955523326082, "percentage": 94.3, "elapsed_time": "1 day, 9:38:59", "remaining_time": "2:02:06"} +{"current_steps": 1952, "total_steps": 2069, "loss": 0.5098, "lr": 8.8713599396123e-08, "epoch": 0.9436789944404158, "percentage": 94.35, "elapsed_time": "1 day, 9:40:01", "remaining_time": "2:01:04"} +{"current_steps": 1953, "total_steps": 2069, "loss": 0.5211, "lr": 8.72207187723445e-08, "epoch": 0.9441624365482234, "percentage": 94.39, "elapsed_time": "1 day, 9:41:03", "remaining_time": "2:00:02"} +{"current_steps": 1954, "total_steps": 2069, "loss": 0.5121, "lr": 8.5740395608358e-08, "epoch": 0.9446458786560309, "percentage": 94.44, "elapsed_time": "1 day, 9:42:01", "remaining_time": "1:59:00"} +{"current_steps": 1955, "total_steps": 2069, "loss": 0.5256, "lr": 8.427263368798955e-08, "epoch": 0.9451293207638385, "percentage": 94.49, "elapsed_time": "1 day, 9:43:03", "remaining_time": "1:57:58"} +{"current_steps": 1956, "total_steps": 2069, "loss": 0.5183, "lr": 8.281743676295639e-08, "epoch": 0.9456127628716461, "percentage": 94.54, "elapsed_time": "1 day, 9:44:04", "remaining_time": "1:56:55"} +{"current_steps": 1957, "total_steps": 2069, "loss": 0.5135, "lr": 8.13748085528604e-08, "epoch": 0.9460962049794537, "percentage": 94.59, "elapsed_time": "1 day, 9:45:01", "remaining_time": "1:55:53"} +{"current_steps": 1958, "total_steps": 2069, "loss": 0.4903, "lr": 7.99447527451741e-08, "epoch": 0.9465796470872613, "percentage": 94.64, "elapsed_time": "1 day, 9:46:05", "remaining_time": "1:54:51"} +{"current_steps": 1959, "total_steps": 2069, "loss": 0.5068, "lr": 7.852727299523577e-08, "epoch": 0.9470630891950689, "percentage": 94.68, "elapsed_time": "1 day, 9:47:05", "remaining_time": "1:53:49"} +{"current_steps": 1960, "total_steps": 2069, "loss": 0.5127, "lr": 7.71223729262377e-08, "epoch": 0.9475465313028765, "percentage": 94.73, "elapsed_time": "1 day, 9:48:08", "remaining_time": "1:52:47"} +{"current_steps": 1961, "total_steps": 2069, "loss": 0.5121, "lr": 7.573005612921903e-08, "epoch": 0.9480299734106841, "percentage": 94.78, "elapsed_time": "1 day, 9:49:09", "remaining_time": "1:51:45"} +{"current_steps": 1962, "total_steps": 2069, "loss": 0.5178, "lr": 7.435032616305238e-08, "epoch": 0.9485134155184917, "percentage": 94.83, "elapsed_time": "1 day, 9:50:12", "remaining_time": "1:50:43"} +{"current_steps": 1963, "total_steps": 2069, "loss": 0.5078, "lr": 7.298318655443893e-08, "epoch": 0.9489968576262993, "percentage": 94.88, "elapsed_time": "1 day, 9:51:12", "remaining_time": "1:49:40"} +{"current_steps": 1964, "total_steps": 2069, "loss": 0.5137, "lr": 7.162864079789777e-08, "epoch": 0.9494802997341069, "percentage": 94.93, "elapsed_time": "1 day, 9:52:12", "remaining_time": "1:48:38"} +{"current_steps": 1965, "total_steps": 2069, "loss": 0.496, "lr": 7.028669235575714e-08, "epoch": 0.9499637418419145, "percentage": 94.97, "elapsed_time": "1 day, 9:53:20", "remaining_time": "1:47:37"} +{"current_steps": 1966, "total_steps": 2069, "loss": 0.5257, "lr": 6.895734465814597e-08, "epoch": 0.950447183949722, "percentage": 95.02, "elapsed_time": "1 day, 9:54:23", "remaining_time": "1:46:34"} +{"current_steps": 1967, "total_steps": 2069, "loss": 0.5208, "lr": 6.764060110298287e-08, "epoch": 0.9509306260575296, "percentage": 95.07, "elapsed_time": "1 day, 9:55:23", "remaining_time": "1:45:32"} +{"current_steps": 1968, "total_steps": 2069, "loss": 0.5224, "lr": 6.633646505597113e-08, "epoch": 0.9514140681653372, "percentage": 95.12, "elapsed_time": "1 day, 9:56:22", "remaining_time": "1:44:30"} +{"current_steps": 1969, "total_steps": 2069, "loss": 0.5108, "lr": 6.504493985058813e-08, "epoch": 0.9518975102731448, "percentage": 95.17, "elapsed_time": "1 day, 9:57:20", "remaining_time": "1:43:28"} +{"current_steps": 1970, "total_steps": 2069, "loss": 0.5134, "lr": 6.376602878807592e-08, "epoch": 0.9523809523809523, "percentage": 95.22, "elapsed_time": "1 day, 9:58:20", "remaining_time": "1:42:26"} +{"current_steps": 1971, "total_steps": 2069, "loss": 0.5079, "lr": 6.249973513743345e-08, "epoch": 0.9528643944887599, "percentage": 95.26, "elapsed_time": "1 day, 9:59:21", "remaining_time": "1:41:23"} +{"current_steps": 1972, "total_steps": 2069, "loss": 0.5196, "lr": 6.124606213541052e-08, "epoch": 0.9533478365965675, "percentage": 95.31, "elapsed_time": "1 day, 10:00:23", "remaining_time": "1:40:21"} +{"current_steps": 1973, "total_steps": 2069, "loss": 0.5197, "lr": 6.000501298649653e-08, "epoch": 0.9538312787043751, "percentage": 95.36, "elapsed_time": "1 day, 10:01:24", "remaining_time": "1:39:19"} +{"current_steps": 1974, "total_steps": 2069, "loss": 0.5135, "lr": 5.8776590862911764e-08, "epoch": 0.9543147208121827, "percentage": 95.41, "elapsed_time": "1 day, 10:02:23", "remaining_time": "1:38:17"} +{"current_steps": 1975, "total_steps": 2069, "loss": 0.5137, "lr": 5.756079890460342e-08, "epoch": 0.9547981629199903, "percentage": 95.46, "elapsed_time": "1 day, 10:03:24", "remaining_time": "1:37:15"} +{"current_steps": 1976, "total_steps": 2069, "loss": 0.5121, "lr": 5.635764021923229e-08, "epoch": 0.9552816050277979, "percentage": 95.51, "elapsed_time": "1 day, 10:04:31", "remaining_time": "1:36:13"} +{"current_steps": 1977, "total_steps": 2069, "loss": 0.516, "lr": 5.5167117882171104e-08, "epoch": 0.9557650471356055, "percentage": 95.55, "elapsed_time": "1 day, 10:05:34", "remaining_time": "1:35:11"} +{"current_steps": 1978, "total_steps": 2069, "loss": 0.5055, "lr": 5.3989234936489556e-08, "epoch": 0.9562484892434131, "percentage": 95.6, "elapsed_time": "1 day, 10:06:40", "remaining_time": "1:34:09"} +{"current_steps": 1979, "total_steps": 2069, "loss": 0.5094, "lr": 5.2823994392951497e-08, "epoch": 0.9567319313512207, "percentage": 95.65, "elapsed_time": "1 day, 10:07:43", "remaining_time": "1:33:07"} +{"current_steps": 1980, "total_steps": 2069, "loss": 0.4933, "lr": 5.167139923000553e-08, "epoch": 0.9572153734590283, "percentage": 95.7, "elapsed_time": "1 day, 10:08:49", "remaining_time": "1:32:05"} +{"current_steps": 1981, "total_steps": 2069, "loss": 0.4936, "lr": 5.053145239377777e-08, "epoch": 0.9576988155668359, "percentage": 95.75, "elapsed_time": "1 day, 10:09:52", "remaining_time": "1:31:03"} +{"current_steps": 1982, "total_steps": 2069, "loss": 0.5124, "lr": 4.940415679806465e-08, "epoch": 0.9581822576746435, "percentage": 95.8, "elapsed_time": "1 day, 10:10:55", "remaining_time": "1:30:01"} +{"current_steps": 1983, "total_steps": 2069, "loss": 0.5151, "lr": 4.828951532432457e-08, "epoch": 0.9586656997824511, "percentage": 95.84, "elapsed_time": "1 day, 10:11:53", "remaining_time": "1:28:59"} +{"current_steps": 1984, "total_steps": 2069, "loss": 0.5191, "lr": 4.718753082167071e-08, "epoch": 0.9591491418902587, "percentage": 95.89, "elapsed_time": "1 day, 10:12:54", "remaining_time": "1:27:57"} +{"current_steps": 1985, "total_steps": 2069, "loss": 0.515, "lr": 4.6098206106863774e-08, "epoch": 0.9596325839980663, "percentage": 95.94, "elapsed_time": "1 day, 10:13:57", "remaining_time": "1:26:55"} +{"current_steps": 1986, "total_steps": 2069, "loss": 0.5123, "lr": 4.5021543964306466e-08, "epoch": 0.9601160261058739, "percentage": 95.99, "elapsed_time": "1 day, 10:14:57", "remaining_time": "1:25:52"} +{"current_steps": 1987, "total_steps": 2069, "loss": 0.5133, "lr": 4.395754714603351e-08, "epoch": 0.9605994682136814, "percentage": 96.04, "elapsed_time": "1 day, 10:15:57", "remaining_time": "1:24:50"} +{"current_steps": 1988, "total_steps": 2069, "loss": 0.5236, "lr": 4.290621837170661e-08, "epoch": 0.961082910321489, "percentage": 96.09, "elapsed_time": "1 day, 10:17:00", "remaining_time": "1:23:48"} +{"current_steps": 1989, "total_steps": 2069, "loss": 0.5137, "lr": 4.186756032860728e-08, "epoch": 0.9615663524292966, "percentage": 96.13, "elapsed_time": "1 day, 10:18:01", "remaining_time": "1:22:46"} +{"current_steps": 1990, "total_steps": 2069, "loss": 0.5093, "lr": 4.08415756716285e-08, "epoch": 0.9620497945371042, "percentage": 96.18, "elapsed_time": "1 day, 10:19:02", "remaining_time": "1:21:44"} +{"current_steps": 1991, "total_steps": 2069, "loss": 0.4505, "lr": 3.9828267023269696e-08, "epoch": 0.9625332366449117, "percentage": 96.23, "elapsed_time": "1 day, 10:20:16", "remaining_time": "1:20:42"} +{"current_steps": 1992, "total_steps": 2069, "loss": 0.5101, "lr": 3.8827636973630126e-08, "epoch": 0.9630166787527193, "percentage": 96.28, "elapsed_time": "1 day, 10:21:16", "remaining_time": "1:19:40"} +{"current_steps": 1993, "total_steps": 2069, "loss": 0.5245, "lr": 3.783968808039995e-08, "epoch": 0.9635001208605269, "percentage": 96.33, "elapsed_time": "1 day, 10:22:18", "remaining_time": "1:18:38"} +{"current_steps": 1994, "total_steps": 2069, "loss": 0.514, "lr": 3.68644228688575e-08, "epoch": 0.9639835629683345, "percentage": 96.38, "elapsed_time": "1 day, 10:23:22", "remaining_time": "1:17:36"} +{"current_steps": 1995, "total_steps": 2069, "loss": 0.507, "lr": 3.590184383185758e-08, "epoch": 0.9644670050761421, "percentage": 96.42, "elapsed_time": "1 day, 10:24:26", "remaining_time": "1:16:34"} +{"current_steps": 1996, "total_steps": 2069, "loss": 0.5093, "lr": 3.4951953429831484e-08, "epoch": 0.9649504471839497, "percentage": 96.47, "elapsed_time": "1 day, 10:25:28", "remaining_time": "1:15:32"} +{"current_steps": 1997, "total_steps": 2069, "loss": 0.4987, "lr": 3.401475409077426e-08, "epoch": 0.9654338892917573, "percentage": 96.52, "elapsed_time": "1 day, 10:26:28", "remaining_time": "1:14:30"} +{"current_steps": 1998, "total_steps": 2069, "loss": 0.5099, "lr": 3.309024821024354e-08, "epoch": 0.9659173313995649, "percentage": 96.57, "elapsed_time": "1 day, 10:27:25", "remaining_time": "1:13:28"} +{"current_steps": 1999, "total_steps": 2069, "loss": 0.5181, "lr": 3.2178438151350685e-08, "epoch": 0.9664007735073725, "percentage": 96.62, "elapsed_time": "1 day, 10:28:26", "remaining_time": "1:12:25"} +{"current_steps": 2000, "total_steps": 2069, "loss": 0.5118, "lr": 3.127932624475638e-08, "epoch": 0.9668842156151801, "percentage": 96.67, "elapsed_time": "1 day, 10:29:23", "remaining_time": "1:11:23"} +{"current_steps": 2001, "total_steps": 2069, "loss": 0.5265, "lr": 3.039291478866169e-08, "epoch": 0.9673676577229877, "percentage": 96.71, "elapsed_time": "1 day, 10:30:23", "remaining_time": "1:10:21"} +{"current_steps": 2002, "total_steps": 2069, "loss": 0.5173, "lr": 2.9519206048807535e-08, "epoch": 0.9678510998307953, "percentage": 96.76, "elapsed_time": "1 day, 10:31:15", "remaining_time": "1:09:19"} +{"current_steps": 2003, "total_steps": 2069, "loss": 0.5199, "lr": 2.8658202258462498e-08, "epoch": 0.9683345419386028, "percentage": 96.81, "elapsed_time": "1 day, 10:32:15", "remaining_time": "1:08:16"} +{"current_steps": 2004, "total_steps": 2069, "loss": 0.4667, "lr": 2.7809905618422227e-08, "epoch": 0.9688179840464104, "percentage": 96.86, "elapsed_time": "1 day, 10:33:23", "remaining_time": "1:07:15"} +{"current_steps": 2005, "total_steps": 2069, "loss": 0.5208, "lr": 2.6974318297001144e-08, "epoch": 0.969301426154218, "percentage": 96.91, "elapsed_time": "1 day, 10:34:25", "remaining_time": "1:06:12"} +{"current_steps": 2006, "total_steps": 2069, "loss": 0.5049, "lr": 2.615144243002743e-08, "epoch": 0.9697848682620256, "percentage": 96.96, "elapsed_time": "1 day, 10:35:25", "remaining_time": "1:05:10"} +{"current_steps": 2007, "total_steps": 2069, "loss": 0.5215, "lr": 2.534128012083914e-08, "epoch": 0.9702683103698332, "percentage": 97.0, "elapsed_time": "1 day, 10:36:25", "remaining_time": "1:04:08"} +{"current_steps": 2008, "total_steps": 2069, "loss": 0.5096, "lr": 2.4543833440275332e-08, "epoch": 0.9707517524776408, "percentage": 97.05, "elapsed_time": "1 day, 10:37:27", "remaining_time": "1:03:06"} +{"current_steps": 2009, "total_steps": 2069, "loss": 0.5111, "lr": 2.375910442667495e-08, "epoch": 0.9712351945854484, "percentage": 97.1, "elapsed_time": "1 day, 10:38:27", "remaining_time": "1:02:04"} +{"current_steps": 2010, "total_steps": 2069, "loss": 0.5136, "lr": 2.298709508586794e-08, "epoch": 0.971718636693256, "percentage": 97.15, "elapsed_time": "1 day, 10:39:29", "remaining_time": "1:01:02"} +{"current_steps": 2011, "total_steps": 2069, "loss": 0.5239, "lr": 2.2227807391172474e-08, "epoch": 0.9722020788010636, "percentage": 97.2, "elapsed_time": "1 day, 10:40:31", "remaining_time": "1:00:00"} +{"current_steps": 2012, "total_steps": 2069, "loss": 0.514, "lr": 2.1481243283389408e-08, "epoch": 0.9726855209088712, "percentage": 97.25, "elapsed_time": "1 day, 10:41:31", "remaining_time": "0:58:58"} +{"current_steps": 2013, "total_steps": 2069, "loss": 0.5174, "lr": 2.074740467079672e-08, "epoch": 0.9731689630166788, "percentage": 97.29, "elapsed_time": "1 day, 10:42:32", "remaining_time": "0:57:56"} +{"current_steps": 2014, "total_steps": 2069, "loss": 0.5173, "lr": 2.002629342914453e-08, "epoch": 0.9736524051244864, "percentage": 97.34, "elapsed_time": "1 day, 10:43:27", "remaining_time": "0:56:53"} +{"current_steps": 2015, "total_steps": 2069, "loss": 0.5035, "lr": 1.9317911401651734e-08, "epoch": 0.974135847232294, "percentage": 97.39, "elapsed_time": "1 day, 10:44:34", "remaining_time": "0:55:51"} +{"current_steps": 2016, "total_steps": 2069, "loss": 0.5194, "lr": 1.862226039899995e-08, "epoch": 0.9746192893401016, "percentage": 97.44, "elapsed_time": "1 day, 10:45:36", "remaining_time": "0:54:49"} +{"current_steps": 2017, "total_steps": 2069, "loss": 0.5081, "lr": 1.7939342199329023e-08, "epoch": 0.9751027314479092, "percentage": 97.49, "elapsed_time": "1 day, 10:46:36", "remaining_time": "0:53:47"} +{"current_steps": 2018, "total_steps": 2069, "loss": 0.514, "lr": 1.7269158548232633e-08, "epoch": 0.9755861735557168, "percentage": 97.54, "elapsed_time": "1 day, 10:47:39", "remaining_time": "0:52:45"} +{"current_steps": 2019, "total_steps": 2069, "loss": 0.5086, "lr": 1.661171115875493e-08, "epoch": 0.9760696156635243, "percentage": 97.58, "elapsed_time": "1 day, 10:48:41", "remaining_time": "0:51:43"} +{"current_steps": 2020, "total_steps": 2069, "loss": 0.5074, "lr": 1.5967001711383877e-08, "epoch": 0.9765530577713318, "percentage": 97.63, "elapsed_time": "1 day, 10:49:43", "remaining_time": "0:50:41"} +{"current_steps": 2021, "total_steps": 2069, "loss": 0.5164, "lr": 1.5335031854049055e-08, "epoch": 0.9770364998791394, "percentage": 97.68, "elapsed_time": "1 day, 10:50:47", "remaining_time": "0:49:39"} +{"current_steps": 2022, "total_steps": 2069, "loss": 0.516, "lr": 1.4715803202116075e-08, "epoch": 0.977519941986947, "percentage": 97.73, "elapsed_time": "1 day, 10:51:49", "remaining_time": "0:48:37"} +{"current_steps": 2023, "total_steps": 2069, "loss": 0.4864, "lr": 1.4109317338383832e-08, "epoch": 0.9780033840947546, "percentage": 97.78, "elapsed_time": "1 day, 10:52:52", "remaining_time": "0:47:35"} +{"current_steps": 2024, "total_steps": 2069, "loss": 0.5276, "lr": 1.3515575813078386e-08, "epoch": 0.9784868262025622, "percentage": 97.83, "elapsed_time": "1 day, 10:53:52", "remaining_time": "0:46:33"} +{"current_steps": 2025, "total_steps": 2069, "loss": 0.5116, "lr": 1.2934580143851294e-08, "epoch": 0.9789702683103698, "percentage": 97.87, "elapsed_time": "1 day, 10:54:56", "remaining_time": "0:45:31"} +{"current_steps": 2026, "total_steps": 2069, "loss": 0.5169, "lr": 1.2366331815774069e-08, "epoch": 0.9794537104181774, "percentage": 97.92, "elapsed_time": "1 day, 10:55:55", "remaining_time": "0:44:29"} +{"current_steps": 2027, "total_steps": 2069, "loss": 0.5221, "lr": 1.1810832281335394e-08, "epoch": 0.979937152525985, "percentage": 97.97, "elapsed_time": "1 day, 10:56:55", "remaining_time": "0:43:26"} +{"current_steps": 2028, "total_steps": 2069, "loss": 0.526, "lr": 1.1268082960436688e-08, "epoch": 0.9804205946337926, "percentage": 98.02, "elapsed_time": "1 day, 10:58:00", "remaining_time": "0:42:24"} +{"current_steps": 2029, "total_steps": 2069, "loss": 0.5158, "lr": 1.0738085240389883e-08, "epoch": 0.9809040367416002, "percentage": 98.07, "elapsed_time": "1 day, 10:59:02", "remaining_time": "0:41:22"} +{"current_steps": 2030, "total_steps": 2069, "loss": 0.5148, "lr": 1.0220840475910765e-08, "epoch": 0.9813874788494078, "percentage": 98.12, "elapsed_time": "1 day, 11:00:07", "remaining_time": "0:40:20"} +{"current_steps": 2031, "total_steps": 2069, "loss": 0.4977, "lr": 9.716349989118412e-09, "epoch": 0.9818709209572154, "percentage": 98.16, "elapsed_time": "1 day, 11:01:11", "remaining_time": "0:39:18"} +{"current_steps": 2032, "total_steps": 2069, "loss": 0.5183, "lr": 9.224615069532428e-09, "epoch": 0.982354363065023, "percentage": 98.21, "elapsed_time": "1 day, 11:02:12", "remaining_time": "0:38:16"} +{"current_steps": 2033, "total_steps": 2069, "loss": 0.5151, "lr": 8.745636974066274e-09, "epoch": 0.9828378051728306, "percentage": 98.26, "elapsed_time": "1 day, 11:03:10", "remaining_time": "0:37:14"} +{"current_steps": 2034, "total_steps": 2069, "loss": 0.4852, "lr": 8.279416927026163e-09, "epoch": 0.9833212472806382, "percentage": 98.31, "elapsed_time": "1 day, 11:04:13", "remaining_time": "0:36:12"} +{"current_steps": 2035, "total_steps": 2069, "loss": 0.5088, "lr": 7.82595612010828e-09, "epoch": 0.9838046893884458, "percentage": 98.36, "elapsed_time": "1 day, 11:05:14", "remaining_time": "0:35:10"} +{"current_steps": 2036, "total_steps": 2069, "loss": 0.5092, "lr": 7.385255712395456e-09, "epoch": 0.9842881314962533, "percentage": 98.41, "elapsed_time": "1 day, 11:06:18", "remaining_time": "0:34:08"} +{"current_steps": 2037, "total_steps": 2069, "loss": 0.5048, "lr": 6.9573168303532775e-09, "epoch": 0.9847715736040609, "percentage": 98.45, "elapsed_time": "1 day, 11:07:23", "remaining_time": "0:33:06"} +{"current_steps": 2038, "total_steps": 2069, "loss": 0.5166, "lr": 6.542140567827871e-09, "epoch": 0.9852550157118685, "percentage": 98.5, "elapsed_time": "1 day, 11:08:24", "remaining_time": "0:32:04"} +{"current_steps": 2039, "total_steps": 2069, "loss": 0.4846, "lr": 6.1397279860431205e-09, "epoch": 0.9857384578196761, "percentage": 98.55, "elapsed_time": "1 day, 11:09:25", "remaining_time": "0:31:02"} +{"current_steps": 2040, "total_steps": 2069, "loss": 0.5191, "lr": 5.750080113598455e-09, "epoch": 0.9862218999274837, "percentage": 98.6, "elapsed_time": "1 day, 11:10:25", "remaining_time": "0:30:00"} +{"current_steps": 2041, "total_steps": 2069, "loss": 0.509, "lr": 5.373197946464403e-09, "epoch": 0.9867053420352913, "percentage": 98.65, "elapsed_time": "1 day, 11:11:27", "remaining_time": "0:28:57"} +{"current_steps": 2042, "total_steps": 2069, "loss": 0.52, "lr": 5.009082447983149e-09, "epoch": 0.9871887841430989, "percentage": 98.7, "elapsed_time": "1 day, 11:12:26", "remaining_time": "0:27:55"} +{"current_steps": 2043, "total_steps": 2069, "loss": 0.5131, "lr": 4.65773454886298e-09, "epoch": 0.9876722262509064, "percentage": 98.74, "elapsed_time": "1 day, 11:13:27", "remaining_time": "0:26:53"} +{"current_steps": 2044, "total_steps": 2069, "loss": 0.515, "lr": 4.319155147176624e-09, "epoch": 0.988155668358714, "percentage": 98.79, "elapsed_time": "1 day, 11:14:26", "remaining_time": "0:25:51"} +{"current_steps": 2045, "total_steps": 2069, "loss": 0.5001, "lr": 3.9933451083612464e-09, "epoch": 0.9886391104665216, "percentage": 98.84, "elapsed_time": "1 day, 11:15:28", "remaining_time": "0:24:49"} +{"current_steps": 2046, "total_steps": 2069, "loss": 0.5077, "lr": 3.6803052652134572e-09, "epoch": 0.9891225525743292, "percentage": 98.89, "elapsed_time": "1 day, 11:16:28", "remaining_time": "0:23:47"} +{"current_steps": 2047, "total_steps": 2069, "loss": 0.5112, "lr": 3.3800364178881996e-09, "epoch": 0.9896059946821368, "percentage": 98.94, "elapsed_time": "1 day, 11:17:29", "remaining_time": "0:22:45"} +{"current_steps": 2048, "total_steps": 2069, "loss": 0.5087, "lr": 3.092539333896527e-09, "epoch": 0.9900894367899444, "percentage": 98.99, "elapsed_time": "1 day, 11:18:28", "remaining_time": "0:21:43"} +{"current_steps": 2049, "total_steps": 2069, "loss": 0.5044, "lr": 2.817814748104497e-09, "epoch": 0.990572878897752, "percentage": 99.03, "elapsed_time": "1 day, 11:19:25", "remaining_time": "0:20:41"} +{"current_steps": 2050, "total_steps": 2069, "loss": 0.5217, "lr": 2.555863362730393e-09, "epoch": 0.9910563210055596, "percentage": 99.08, "elapsed_time": "1 day, 11:20:27", "remaining_time": "0:19:39"} +{"current_steps": 2051, "total_steps": 2069, "loss": 0.5117, "lr": 2.30668584734306e-09, "epoch": 0.9915397631133672, "percentage": 99.13, "elapsed_time": "1 day, 11:21:29", "remaining_time": "0:18:37"} +{"current_steps": 2052, "total_steps": 2069, "loss": 0.5056, "lr": 2.070282838859683e-09, "epoch": 0.9920232052211748, "percentage": 99.18, "elapsed_time": "1 day, 11:22:35", "remaining_time": "0:17:35"} +{"current_steps": 2053, "total_steps": 2069, "loss": 0.5319, "lr": 1.8466549415463442e-09, "epoch": 0.9925066473289823, "percentage": 99.23, "elapsed_time": "1 day, 11:23:36", "remaining_time": "0:16:33"} +{"current_steps": 2054, "total_steps": 2069, "loss": 0.5138, "lr": 1.635802727013025e-09, "epoch": 0.9929900894367899, "percentage": 99.28, "elapsed_time": "1 day, 11:24:34", "remaining_time": "0:15:30"} +{"current_steps": 2055, "total_steps": 2069, "loss": 0.4883, "lr": 1.4377267342158274e-09, "epoch": 0.9934735315445975, "percentage": 99.32, "elapsed_time": "1 day, 11:25:30", "remaining_time": "0:14:28"} +{"current_steps": 2056, "total_steps": 2069, "loss": 0.5225, "lr": 1.2524274694525329e-09, "epoch": 0.9939569736524051, "percentage": 99.37, "elapsed_time": "1 day, 11:26:35", "remaining_time": "0:13:26"} +{"current_steps": 2057, "total_steps": 2069, "loss": 0.5232, "lr": 1.0799054063626024e-09, "epoch": 0.9944404157602127, "percentage": 99.42, "elapsed_time": "1 day, 11:27:39", "remaining_time": "0:12:24"} +{"current_steps": 2058, "total_steps": 2069, "loss": 0.5132, "lr": 9.201609859271765e-10, "epoch": 0.9949238578680203, "percentage": 99.47, "elapsed_time": "1 day, 11:28:42", "remaining_time": "0:11:22"} +{"current_steps": 2059, "total_steps": 2069, "loss": 0.5083, "lr": 7.731946164657445e-10, "epoch": 0.9954072999758279, "percentage": 99.52, "elapsed_time": "1 day, 11:29:44", "remaining_time": "0:10:20"} +{"current_steps": 2060, "total_steps": 2069, "loss": 0.5138, "lr": 6.390066736355893e-10, "epoch": 0.9958907420836355, "percentage": 99.57, "elapsed_time": "1 day, 11:30:47", "remaining_time": "0:09:18"} +{"current_steps": 2061, "total_steps": 2069, "loss": 0.5134, "lr": 5.17597500432343e-10, "epoch": 0.9963741841914431, "percentage": 99.61, "elapsed_time": "1 day, 11:31:46", "remaining_time": "0:08:16"} +{"current_steps": 2062, "total_steps": 2069, "loss": 0.5204, "lr": 4.089674071872107e-10, "epoch": 0.9968576262992507, "percentage": 99.66, "elapsed_time": "1 day, 11:32:51", "remaining_time": "0:07:14"} +{"current_steps": 2063, "total_steps": 2069, "loss": 0.5134, "lr": 3.131166715680811e-10, "epoch": 0.9973410684070583, "percentage": 99.71, "elapsed_time": "1 day, 11:33:53", "remaining_time": "0:06:12"} +{"current_steps": 2064, "total_steps": 2069, "loss": 0.5136, "lr": 2.3004553857675082e-10, "epoch": 0.9978245105148659, "percentage": 99.76, "elapsed_time": "1 day, 11:34:50", "remaining_time": "0:05:10"} +{"current_steps": 2065, "total_steps": 2069, "loss": 0.5088, "lr": 1.5975422055003465e-10, "epoch": 0.9983079526226735, "percentage": 99.81, "elapsed_time": "1 day, 11:35:56", "remaining_time": "0:04:08"} +{"current_steps": 2066, "total_steps": 2069, "loss": 0.5106, "lr": 1.022428971581002e-10, "epoch": 0.9987913947304811, "percentage": 99.86, "elapsed_time": "1 day, 11:36:56", "remaining_time": "0:03:06"} +{"current_steps": 2067, "total_steps": 2069, "loss": 0.513, "lr": 5.751171540391287e-11, "epoch": 0.9992748368382887, "percentage": 99.9, "elapsed_time": "1 day, 11:37:58", "remaining_time": "0:02:04"} +{"current_steps": 2068, "total_steps": 2069, "loss": 0.5278, "lr": 2.556078962490105e-11, "epoch": 0.9997582789460963, "percentage": 99.95, "elapsed_time": "1 day, 11:39:00", "remaining_time": "0:01:02"} +{"current_steps": 2069, "total_steps": 2069, "loss": 0.4723, "lr": 6.390201489625547e-12, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "1 day, 11:39:35", "remaining_time": "0:00:00"} +{"current_steps": 2069, "total_steps": 2069, "epoch": 1.0, "percentage": 100.0, "elapsed_time": "1 day, 11:40:35", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..21cd362 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,14526 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 2069, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00048344210780759005, + "grad_norm": 2.4757904153326913, + "learning_rate": 0.0, + "loss": 0.9924, + "step": 1 + }, + { + "epoch": 0.0009668842156151801, + "grad_norm": 2.3793618820940923, + "learning_rate": 9.615384615384617e-08, + "loss": 0.9738, + "step": 2 + }, + { + "epoch": 0.0014503263234227702, + "grad_norm": 2.375691201697703, + "learning_rate": 1.9230769230769234e-07, + "loss": 0.9588, + "step": 3 + }, + { + "epoch": 0.0019337684312303602, + "grad_norm": 2.3403619553808497, + "learning_rate": 2.884615384615385e-07, + "loss": 0.9862, + "step": 4 + }, + { + "epoch": 0.00241721053903795, + "grad_norm": 2.3613475552419394, + "learning_rate": 3.846153846153847e-07, + "loss": 0.9758, + "step": 5 + }, + { + "epoch": 0.0029006526468455403, + "grad_norm": 2.374422358129782, + "learning_rate": 4.807692307692308e-07, + "loss": 0.9716, + "step": 6 + }, + { + "epoch": 0.00338409475465313, + "grad_norm": 2.478706471115894, + "learning_rate": 5.76923076923077e-07, + "loss": 0.976, + "step": 7 + }, + { + "epoch": 0.0038675368624607204, + "grad_norm": 2.3811968693026198, + "learning_rate": 6.730769230769231e-07, + "loss": 0.9873, + "step": 8 + }, + { + "epoch": 0.00435097897026831, + "grad_norm": 2.2147341913024956, + "learning_rate": 7.692307692307694e-07, + "loss": 0.9286, + "step": 9 + }, + { + "epoch": 0.0048344210780759, + "grad_norm": 2.335255162349414, + "learning_rate": 8.653846153846154e-07, + "loss": 0.9845, + "step": 10 + }, + { + "epoch": 0.005317863185883491, + "grad_norm": 2.218894644037587, + "learning_rate": 9.615384615384617e-07, + "loss": 0.95, + "step": 11 + }, + { + "epoch": 0.005801305293691081, + "grad_norm": 2.2403773948516226, + "learning_rate": 1.0576923076923078e-06, + "loss": 0.9715, + "step": 12 + }, + { + "epoch": 0.0062847474014986705, + "grad_norm": 2.164785866254398, + "learning_rate": 1.153846153846154e-06, + "loss": 0.9422, + "step": 13 + }, + { + "epoch": 0.00676818950930626, + "grad_norm": 2.2075110374685947, + "learning_rate": 1.25e-06, + "loss": 0.965, + "step": 14 + }, + { + "epoch": 0.007251631617113851, + "grad_norm": 1.9308798071113116, + "learning_rate": 1.3461538461538462e-06, + "loss": 0.9372, + "step": 15 + }, + { + "epoch": 0.007735073724921441, + "grad_norm": 1.8705357350667309, + "learning_rate": 1.4423076923076922e-06, + "loss": 0.9443, + "step": 16 + }, + { + "epoch": 0.00821851583272903, + "grad_norm": 1.775691766227149, + "learning_rate": 1.5384615384615387e-06, + "loss": 0.9362, + "step": 17 + }, + { + "epoch": 0.00870195794053662, + "grad_norm": 1.7290053738093054, + "learning_rate": 1.6346153846153848e-06, + "loss": 0.9298, + "step": 18 + }, + { + "epoch": 0.00918540004834421, + "grad_norm": 1.6541389612298973, + "learning_rate": 1.7307692307692308e-06, + "loss": 0.9336, + "step": 19 + }, + { + "epoch": 0.0096688421561518, + "grad_norm": 1.2338607620225968, + "learning_rate": 1.826923076923077e-06, + "loss": 0.9055, + "step": 20 + }, + { + "epoch": 0.01015228426395939, + "grad_norm": 1.1808086522456918, + "learning_rate": 1.9230769230769234e-06, + "loss": 0.8962, + "step": 21 + }, + { + "epoch": 0.010635726371766982, + "grad_norm": 1.090531117286559, + "learning_rate": 2.0192307692307692e-06, + "loss": 0.8702, + "step": 22 + }, + { + "epoch": 0.011119168479574571, + "grad_norm": 1.095517820053717, + "learning_rate": 2.1153846153846155e-06, + "loss": 0.8816, + "step": 23 + }, + { + "epoch": 0.011602610587382161, + "grad_norm": 1.0208393908518454, + "learning_rate": 2.211538461538462e-06, + "loss": 0.8699, + "step": 24 + }, + { + "epoch": 0.012086052695189751, + "grad_norm": 1.004109121666044, + "learning_rate": 2.307692307692308e-06, + "loss": 0.8669, + "step": 25 + }, + { + "epoch": 0.012569494802997341, + "grad_norm": 0.98169412760157, + "learning_rate": 2.403846153846154e-06, + "loss": 0.8371, + "step": 26 + }, + { + "epoch": 0.01305293691080493, + "grad_norm": 0.9209444270757048, + "learning_rate": 2.5e-06, + "loss": 0.8388, + "step": 27 + }, + { + "epoch": 0.01353637901861252, + "grad_norm": 0.8619822284316448, + "learning_rate": 2.5961538461538465e-06, + "loss": 0.8041, + "step": 28 + }, + { + "epoch": 0.01401982112642011, + "grad_norm": 0.9241232197315488, + "learning_rate": 2.6923076923076923e-06, + "loss": 0.8091, + "step": 29 + }, + { + "epoch": 0.014503263234227702, + "grad_norm": 0.917429451582305, + "learning_rate": 2.7884615384615386e-06, + "loss": 0.7749, + "step": 30 + }, + { + "epoch": 0.014986705342035292, + "grad_norm": 0.9043786370452085, + "learning_rate": 2.8846153846153845e-06, + "loss": 0.8144, + "step": 31 + }, + { + "epoch": 0.015470147449842882, + "grad_norm": 0.820031258272968, + "learning_rate": 2.980769230769231e-06, + "loss": 0.7931, + "step": 32 + }, + { + "epoch": 0.01595358955765047, + "grad_norm": 0.7726701862119408, + "learning_rate": 3.0769230769230774e-06, + "loss": 0.7903, + "step": 33 + }, + { + "epoch": 0.01643703166545806, + "grad_norm": 0.6871638945331215, + "learning_rate": 3.1730769230769233e-06, + "loss": 0.7256, + "step": 34 + }, + { + "epoch": 0.01692047377326565, + "grad_norm": 0.7302580243312591, + "learning_rate": 3.2692307692307696e-06, + "loss": 0.7956, + "step": 35 + }, + { + "epoch": 0.01740391588107324, + "grad_norm": 0.6737498928543134, + "learning_rate": 3.365384615384616e-06, + "loss": 0.7478, + "step": 36 + }, + { + "epoch": 0.01788735798888083, + "grad_norm": 0.6914440787905148, + "learning_rate": 3.4615384615384617e-06, + "loss": 0.7621, + "step": 37 + }, + { + "epoch": 0.01837080009668842, + "grad_norm": 0.6869329802424697, + "learning_rate": 3.557692307692308e-06, + "loss": 0.7706, + "step": 38 + }, + { + "epoch": 0.01885424220449601, + "grad_norm": 0.6549740713679569, + "learning_rate": 3.653846153846154e-06, + "loss": 0.7237, + "step": 39 + }, + { + "epoch": 0.0193376843123036, + "grad_norm": 0.6922145753217636, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.7537, + "step": 40 + }, + { + "epoch": 0.01982112642011119, + "grad_norm": 0.665714225887781, + "learning_rate": 3.846153846153847e-06, + "loss": 0.7656, + "step": 41 + }, + { + "epoch": 0.02030456852791878, + "grad_norm": 0.6390200477155564, + "learning_rate": 3.942307692307692e-06, + "loss": 0.7558, + "step": 42 + }, + { + "epoch": 0.020788010635726373, + "grad_norm": 0.6558268717213803, + "learning_rate": 4.0384615384615385e-06, + "loss": 0.7408, + "step": 43 + }, + { + "epoch": 0.021271452743533963, + "grad_norm": 0.6191043915893901, + "learning_rate": 4.134615384615385e-06, + "loss": 0.7482, + "step": 44 + }, + { + "epoch": 0.021754894851341553, + "grad_norm": 0.6193196066166552, + "learning_rate": 4.230769230769231e-06, + "loss": 0.7424, + "step": 45 + }, + { + "epoch": 0.022238336959149143, + "grad_norm": 0.6141046550876093, + "learning_rate": 4.326923076923077e-06, + "loss": 0.7372, + "step": 46 + }, + { + "epoch": 0.022721779066956733, + "grad_norm": 0.6265315399192994, + "learning_rate": 4.423076923076924e-06, + "loss": 0.7362, + "step": 47 + }, + { + "epoch": 0.023205221174764323, + "grad_norm": 0.6704953048927751, + "learning_rate": 4.51923076923077e-06, + "loss": 0.7326, + "step": 48 + }, + { + "epoch": 0.023688663282571912, + "grad_norm": 0.6544522629648533, + "learning_rate": 4.615384615384616e-06, + "loss": 0.7275, + "step": 49 + }, + { + "epoch": 0.024172105390379502, + "grad_norm": 0.6277879949612973, + "learning_rate": 4.711538461538462e-06, + "loss": 0.7311, + "step": 50 + }, + { + "epoch": 0.024655547498187092, + "grad_norm": 0.5924725867824154, + "learning_rate": 4.807692307692308e-06, + "loss": 0.7261, + "step": 51 + }, + { + "epoch": 0.025138989605994682, + "grad_norm": 0.591545350722231, + "learning_rate": 4.903846153846154e-06, + "loss": 0.7092, + "step": 52 + }, + { + "epoch": 0.025622431713802272, + "grad_norm": 0.5698079528908845, + "learning_rate": 5e-06, + "loss": 0.7093, + "step": 53 + }, + { + "epoch": 0.02610587382160986, + "grad_norm": 0.6162650749995418, + "learning_rate": 5.096153846153846e-06, + "loss": 0.687, + "step": 54 + }, + { + "epoch": 0.02658931592941745, + "grad_norm": 0.5577930499697958, + "learning_rate": 5.192307692307693e-06, + "loss": 0.7143, + "step": 55 + }, + { + "epoch": 0.02707275803722504, + "grad_norm": 0.5640209708881836, + "learning_rate": 5.288461538461539e-06, + "loss": 0.7059, + "step": 56 + }, + { + "epoch": 0.02755620014503263, + "grad_norm": 0.5430126891143467, + "learning_rate": 5.384615384615385e-06, + "loss": 0.704, + "step": 57 + }, + { + "epoch": 0.02803964225284022, + "grad_norm": 0.6009517822786309, + "learning_rate": 5.480769230769232e-06, + "loss": 0.7169, + "step": 58 + }, + { + "epoch": 0.02852308436064781, + "grad_norm": 0.6167733540891279, + "learning_rate": 5.576923076923077e-06, + "loss": 0.7065, + "step": 59 + }, + { + "epoch": 0.029006526468455404, + "grad_norm": 0.5731458421774205, + "learning_rate": 5.6730769230769235e-06, + "loss": 0.6373, + "step": 60 + }, + { + "epoch": 0.029489968576262994, + "grad_norm": 0.5489403473955915, + "learning_rate": 5.769230769230769e-06, + "loss": 0.7018, + "step": 61 + }, + { + "epoch": 0.029973410684070584, + "grad_norm": 0.5325599545502842, + "learning_rate": 5.865384615384616e-06, + "loss": 0.6959, + "step": 62 + }, + { + "epoch": 0.030456852791878174, + "grad_norm": 0.5478537783639954, + "learning_rate": 5.961538461538462e-06, + "loss": 0.6896, + "step": 63 + }, + { + "epoch": 0.030940294899685764, + "grad_norm": 0.5363515063211778, + "learning_rate": 6.057692307692308e-06, + "loss": 0.7014, + "step": 64 + }, + { + "epoch": 0.03142373700749335, + "grad_norm": 0.5641946867306303, + "learning_rate": 6.153846153846155e-06, + "loss": 0.6903, + "step": 65 + }, + { + "epoch": 0.03190717911530094, + "grad_norm": 0.5481835775113026, + "learning_rate": 6.25e-06, + "loss": 0.6893, + "step": 66 + }, + { + "epoch": 0.03239062122310853, + "grad_norm": 0.5143476489389097, + "learning_rate": 6.3461538461538466e-06, + "loss": 0.6946, + "step": 67 + }, + { + "epoch": 0.03287406333091612, + "grad_norm": 0.590656354467126, + "learning_rate": 6.442307692307693e-06, + "loss": 0.6788, + "step": 68 + }, + { + "epoch": 0.03335750543872371, + "grad_norm": 0.5203563663337313, + "learning_rate": 6.538461538461539e-06, + "loss": 0.6847, + "step": 69 + }, + { + "epoch": 0.0338409475465313, + "grad_norm": 0.5084500426939229, + "learning_rate": 6.6346153846153846e-06, + "loss": 0.7086, + "step": 70 + }, + { + "epoch": 0.03432438965433889, + "grad_norm": 0.5032784264719405, + "learning_rate": 6.730769230769232e-06, + "loss": 0.6724, + "step": 71 + }, + { + "epoch": 0.03480783176214648, + "grad_norm": 0.5205048813341548, + "learning_rate": 6.826923076923078e-06, + "loss": 0.6592, + "step": 72 + }, + { + "epoch": 0.03529127386995407, + "grad_norm": 0.5066251849073853, + "learning_rate": 6.923076923076923e-06, + "loss": 0.6674, + "step": 73 + }, + { + "epoch": 0.03577471597776166, + "grad_norm": 0.5305760257061701, + "learning_rate": 7.01923076923077e-06, + "loss": 0.6665, + "step": 74 + }, + { + "epoch": 0.03625815808556925, + "grad_norm": 0.5583636863825877, + "learning_rate": 7.115384615384616e-06, + "loss": 0.6685, + "step": 75 + }, + { + "epoch": 0.03674160019337684, + "grad_norm": 0.6055857508188283, + "learning_rate": 7.211538461538462e-06, + "loss": 0.6826, + "step": 76 + }, + { + "epoch": 0.03722504230118443, + "grad_norm": 0.5576393446552599, + "learning_rate": 7.307692307692308e-06, + "loss": 0.6811, + "step": 77 + }, + { + "epoch": 0.03770848440899202, + "grad_norm": 0.5550469150359895, + "learning_rate": 7.403846153846155e-06, + "loss": 0.6752, + "step": 78 + }, + { + "epoch": 0.03819192651679961, + "grad_norm": 0.5095358853416947, + "learning_rate": 7.500000000000001e-06, + "loss": 0.657, + "step": 79 + }, + { + "epoch": 0.0386753686246072, + "grad_norm": 0.519449515803278, + "learning_rate": 7.5961538461538465e-06, + "loss": 0.6326, + "step": 80 + }, + { + "epoch": 0.03915881073241479, + "grad_norm": 0.5360371671954463, + "learning_rate": 7.692307692307694e-06, + "loss": 0.6577, + "step": 81 + }, + { + "epoch": 0.03964225284022238, + "grad_norm": 0.5220981103197152, + "learning_rate": 7.78846153846154e-06, + "loss": 0.6803, + "step": 82 + }, + { + "epoch": 0.04012569494802997, + "grad_norm": 0.5357442529489778, + "learning_rate": 7.884615384615384e-06, + "loss": 0.662, + "step": 83 + }, + { + "epoch": 0.04060913705583756, + "grad_norm": 0.5922567510802571, + "learning_rate": 7.980769230769232e-06, + "loss": 0.6784, + "step": 84 + }, + { + "epoch": 0.04109257916364515, + "grad_norm": 0.5471555288958341, + "learning_rate": 8.076923076923077e-06, + "loss": 0.663, + "step": 85 + }, + { + "epoch": 0.04157602127145275, + "grad_norm": 0.5234614832210157, + "learning_rate": 8.173076923076923e-06, + "loss": 0.6633, + "step": 86 + }, + { + "epoch": 0.04205946337926034, + "grad_norm": 0.5014680527453607, + "learning_rate": 8.26923076923077e-06, + "loss": 0.6345, + "step": 87 + }, + { + "epoch": 0.04254290548706793, + "grad_norm": 0.5541925854592269, + "learning_rate": 8.365384615384616e-06, + "loss": 0.661, + "step": 88 + }, + { + "epoch": 0.043026347594875516, + "grad_norm": 0.5556737957241218, + "learning_rate": 8.461538461538462e-06, + "loss": 0.6476, + "step": 89 + }, + { + "epoch": 0.043509789702683106, + "grad_norm": 0.5358812925229628, + "learning_rate": 8.557692307692308e-06, + "loss": 0.6667, + "step": 90 + }, + { + "epoch": 0.043993231810490696, + "grad_norm": 0.5285944473021625, + "learning_rate": 8.653846153846155e-06, + "loss": 0.6558, + "step": 91 + }, + { + "epoch": 0.044476673918298286, + "grad_norm": 0.6130129115794695, + "learning_rate": 8.750000000000001e-06, + "loss": 0.6662, + "step": 92 + }, + { + "epoch": 0.044960116026105876, + "grad_norm": 0.6086871477606206, + "learning_rate": 8.846153846153847e-06, + "loss": 0.6768, + "step": 93 + }, + { + "epoch": 0.045443558133913466, + "grad_norm": 0.534737794998822, + "learning_rate": 8.942307692307693e-06, + "loss": 0.633, + "step": 94 + }, + { + "epoch": 0.045927000241721055, + "grad_norm": 0.5048674854153722, + "learning_rate": 9.03846153846154e-06, + "loss": 0.6075, + "step": 95 + }, + { + "epoch": 0.046410442349528645, + "grad_norm": 0.5516912026027078, + "learning_rate": 9.134615384615384e-06, + "loss": 0.623, + "step": 96 + }, + { + "epoch": 0.046893884457336235, + "grad_norm": 0.6083291149980872, + "learning_rate": 9.230769230769232e-06, + "loss": 0.6556, + "step": 97 + }, + { + "epoch": 0.047377326565143825, + "grad_norm": 0.5460750932826393, + "learning_rate": 9.326923076923079e-06, + "loss": 0.6524, + "step": 98 + }, + { + "epoch": 0.047860768672951415, + "grad_norm": 0.5459534721301705, + "learning_rate": 9.423076923076923e-06, + "loss": 0.6449, + "step": 99 + }, + { + "epoch": 0.048344210780759005, + "grad_norm": 0.5622412415254093, + "learning_rate": 9.51923076923077e-06, + "loss": 0.6517, + "step": 100 + }, + { + "epoch": 0.048827652888566594, + "grad_norm": 0.6148179967646931, + "learning_rate": 9.615384615384616e-06, + "loss": 0.636, + "step": 101 + }, + { + "epoch": 0.049311094996374184, + "grad_norm": 0.5377477077942675, + "learning_rate": 9.711538461538462e-06, + "loss": 0.6569, + "step": 102 + }, + { + "epoch": 0.049794537104181774, + "grad_norm": 0.535881794576154, + "learning_rate": 9.807692307692308e-06, + "loss": 0.6515, + "step": 103 + }, + { + "epoch": 0.050277979211989364, + "grad_norm": 0.5554528998874018, + "learning_rate": 9.903846153846155e-06, + "loss": 0.6471, + "step": 104 + }, + { + "epoch": 0.050761421319796954, + "grad_norm": 0.5472055318440415, + "learning_rate": 1e-05, + "loss": 0.6212, + "step": 105 + }, + { + "epoch": 0.051244863427604544, + "grad_norm": 0.5562423079812571, + "learning_rate": 9.99999360979851e-06, + "loss": 0.6483, + "step": 106 + }, + { + "epoch": 0.051728305535412133, + "grad_norm": 0.6203972023036308, + "learning_rate": 9.999974439210376e-06, + "loss": 0.6474, + "step": 107 + }, + { + "epoch": 0.05221174764321972, + "grad_norm": 0.5879110259866966, + "learning_rate": 9.999942488284598e-06, + "loss": 0.6506, + "step": 108 + }, + { + "epoch": 0.05269518975102731, + "grad_norm": 0.5415023727684817, + "learning_rate": 9.999897757102843e-06, + "loss": 0.641, + "step": 109 + }, + { + "epoch": 0.0531786318588349, + "grad_norm": 0.5661080832571289, + "learning_rate": 9.99984024577945e-06, + "loss": 0.6561, + "step": 110 + }, + { + "epoch": 0.05366207396664249, + "grad_norm": 0.6384080684659277, + "learning_rate": 9.999769954461425e-06, + "loss": 0.6181, + "step": 111 + }, + { + "epoch": 0.05414551607445008, + "grad_norm": 0.5321086465207798, + "learning_rate": 9.999686883328433e-06, + "loss": 0.6269, + "step": 112 + }, + { + "epoch": 0.05462895818225767, + "grad_norm": 0.5658443861351884, + "learning_rate": 9.999591032592813e-06, + "loss": 0.6317, + "step": 113 + }, + { + "epoch": 0.05511240029006526, + "grad_norm": 0.5759014415066968, + "learning_rate": 9.999482402499569e-06, + "loss": 0.6468, + "step": 114 + }, + { + "epoch": 0.05559584239787285, + "grad_norm": 0.5480587710988183, + "learning_rate": 9.999360993326366e-06, + "loss": 0.6359, + "step": 115 + }, + { + "epoch": 0.05607928450568044, + "grad_norm": 0.6380718424826206, + "learning_rate": 9.999226805383534e-06, + "loss": 0.6349, + "step": 116 + }, + { + "epoch": 0.05656272661348803, + "grad_norm": 0.5246861209498886, + "learning_rate": 9.999079839014074e-06, + "loss": 0.6399, + "step": 117 + }, + { + "epoch": 0.05704616872129562, + "grad_norm": 0.515686613549135, + "learning_rate": 9.998920094593637e-06, + "loss": 0.5984, + "step": 118 + }, + { + "epoch": 0.05752961082910321, + "grad_norm": 0.5607127828178857, + "learning_rate": 9.998747572530548e-06, + "loss": 0.6398, + "step": 119 + }, + { + "epoch": 0.05801305293691081, + "grad_norm": 0.5459763409466101, + "learning_rate": 9.998562273265786e-06, + "loss": 0.626, + "step": 120 + }, + { + "epoch": 0.0584964950447184, + "grad_norm": 0.5525418327052581, + "learning_rate": 9.998364197272988e-06, + "loss": 0.6537, + "step": 121 + }, + { + "epoch": 0.05897993715252599, + "grad_norm": 6.74083445541264, + "learning_rate": 9.998153345058454e-06, + "loss": 0.9475, + "step": 122 + }, + { + "epoch": 0.05946337926033358, + "grad_norm": 0.6141628359508349, + "learning_rate": 9.997929717161142e-06, + "loss": 0.6473, + "step": 123 + }, + { + "epoch": 0.05994682136814117, + "grad_norm": 0.6652958169663876, + "learning_rate": 9.997693314152658e-06, + "loss": 0.6342, + "step": 124 + }, + { + "epoch": 0.06043026347594876, + "grad_norm": 0.606711498986106, + "learning_rate": 9.99744413663727e-06, + "loss": 0.623, + "step": 125 + }, + { + "epoch": 0.06091370558375635, + "grad_norm": 0.5175309840849823, + "learning_rate": 9.997182185251896e-06, + "loss": 0.6221, + "step": 126 + }, + { + "epoch": 0.06139714769156394, + "grad_norm": 0.5341566674562975, + "learning_rate": 9.996907460666104e-06, + "loss": 0.6357, + "step": 127 + }, + { + "epoch": 0.06188058979937153, + "grad_norm": 0.5922792510376619, + "learning_rate": 9.996619963582113e-06, + "loss": 0.6043, + "step": 128 + }, + { + "epoch": 0.06236403190717912, + "grad_norm": 0.5694036510960461, + "learning_rate": 9.996319694734787e-06, + "loss": 0.6311, + "step": 129 + }, + { + "epoch": 0.0628474740149867, + "grad_norm": 0.5540844850790518, + "learning_rate": 9.99600665489164e-06, + "loss": 0.6411, + "step": 130 + }, + { + "epoch": 0.06333091612279429, + "grad_norm": 0.5371960793753483, + "learning_rate": 9.995680844852824e-06, + "loss": 0.6403, + "step": 131 + }, + { + "epoch": 0.06381435823060189, + "grad_norm": 0.5225384791967033, + "learning_rate": 9.995342265451138e-06, + "loss": 0.6269, + "step": 132 + }, + { + "epoch": 0.06429780033840947, + "grad_norm": 0.6035451474536077, + "learning_rate": 9.994990917552017e-06, + "loss": 0.6321, + "step": 133 + }, + { + "epoch": 0.06478124244621707, + "grad_norm": 0.6507380493478006, + "learning_rate": 9.994626802053536e-06, + "loss": 0.6236, + "step": 134 + }, + { + "epoch": 0.06526468455402465, + "grad_norm": 0.5456651842881993, + "learning_rate": 9.994249919886402e-06, + "loss": 0.6258, + "step": 135 + }, + { + "epoch": 0.06574812666183225, + "grad_norm": 0.5172506944070536, + "learning_rate": 9.993860272013958e-06, + "loss": 0.6162, + "step": 136 + }, + { + "epoch": 0.06623156876963984, + "grad_norm": 0.6233262394445207, + "learning_rate": 9.993457859432172e-06, + "loss": 0.6261, + "step": 137 + }, + { + "epoch": 0.06671501087744743, + "grad_norm": 0.6073445562745826, + "learning_rate": 9.993042683169647e-06, + "loss": 0.6371, + "step": 138 + }, + { + "epoch": 0.06719845298525502, + "grad_norm": 0.5857241687958673, + "learning_rate": 9.992614744287605e-06, + "loss": 0.6275, + "step": 139 + }, + { + "epoch": 0.0676818950930626, + "grad_norm": 0.5304150460003405, + "learning_rate": 9.992174043879893e-06, + "loss": 0.6175, + "step": 140 + }, + { + "epoch": 0.0681653372008702, + "grad_norm": 0.5933722892089892, + "learning_rate": 9.991720583072975e-06, + "loss": 0.6255, + "step": 141 + }, + { + "epoch": 0.06864877930867778, + "grad_norm": 0.561723953482763, + "learning_rate": 9.991254363025935e-06, + "loss": 0.6257, + "step": 142 + }, + { + "epoch": 0.06913222141648538, + "grad_norm": 0.532228224452236, + "learning_rate": 9.99077538493047e-06, + "loss": 0.6301, + "step": 143 + }, + { + "epoch": 0.06961566352429296, + "grad_norm": 0.541783938730816, + "learning_rate": 9.990283650010883e-06, + "loss": 0.619, + "step": 144 + }, + { + "epoch": 0.07009910563210056, + "grad_norm": 0.5606995950440783, + "learning_rate": 9.989779159524091e-06, + "loss": 0.5818, + "step": 145 + }, + { + "epoch": 0.07058254773990814, + "grad_norm": 0.5286741282148979, + "learning_rate": 9.989261914759612e-06, + "loss": 0.6105, + "step": 146 + }, + { + "epoch": 0.07106598984771574, + "grad_norm": 0.525375741245272, + "learning_rate": 9.988731917039564e-06, + "loss": 0.6154, + "step": 147 + }, + { + "epoch": 0.07154943195552332, + "grad_norm": 0.5132546936158348, + "learning_rate": 9.988189167718665e-06, + "loss": 0.5533, + "step": 148 + }, + { + "epoch": 0.07203287406333092, + "grad_norm": 0.5330232205089095, + "learning_rate": 9.987633668184227e-06, + "loss": 0.6281, + "step": 149 + }, + { + "epoch": 0.0725163161711385, + "grad_norm": 0.5459730729112252, + "learning_rate": 9.98706541985615e-06, + "loss": 0.5836, + "step": 150 + }, + { + "epoch": 0.0729997582789461, + "grad_norm": 0.5818263727750432, + "learning_rate": 9.986484424186922e-06, + "loss": 0.6246, + "step": 151 + }, + { + "epoch": 0.07348320038675368, + "grad_norm": 0.5754133435232375, + "learning_rate": 9.985890682661616e-06, + "loss": 0.6038, + "step": 152 + }, + { + "epoch": 0.07396664249456128, + "grad_norm": 0.5528911744587542, + "learning_rate": 9.985284196797884e-06, + "loss": 0.6246, + "step": 153 + }, + { + "epoch": 0.07445008460236886, + "grad_norm": 0.5484687585797547, + "learning_rate": 9.984664968145953e-06, + "loss": 0.6318, + "step": 154 + }, + { + "epoch": 0.07493352671017646, + "grad_norm": 0.5351986552762329, + "learning_rate": 9.984032998288617e-06, + "loss": 0.6184, + "step": 155 + }, + { + "epoch": 0.07541696881798404, + "grad_norm": 0.519416066205614, + "learning_rate": 9.983388288841246e-06, + "loss": 0.6185, + "step": 156 + }, + { + "epoch": 0.07590041092579164, + "grad_norm": 0.5470449402548487, + "learning_rate": 9.982730841451768e-06, + "loss": 0.625, + "step": 157 + }, + { + "epoch": 0.07638385303359922, + "grad_norm": 0.5887016805140373, + "learning_rate": 9.982060657800672e-06, + "loss": 0.6183, + "step": 158 + }, + { + "epoch": 0.07686729514140682, + "grad_norm": 0.5522566946881194, + "learning_rate": 9.981377739601002e-06, + "loss": 0.6137, + "step": 159 + }, + { + "epoch": 0.0773507372492144, + "grad_norm": 0.5411997809451911, + "learning_rate": 9.980682088598349e-06, + "loss": 0.6229, + "step": 160 + }, + { + "epoch": 0.077834179357022, + "grad_norm": 0.52840707851752, + "learning_rate": 9.979973706570856e-06, + "loss": 0.614, + "step": 161 + }, + { + "epoch": 0.07831762146482958, + "grad_norm": 0.6047062373713257, + "learning_rate": 9.979252595329204e-06, + "loss": 0.6222, + "step": 162 + }, + { + "epoch": 0.07880106357263718, + "grad_norm": 0.5420471794760692, + "learning_rate": 9.978518756716611e-06, + "loss": 0.5856, + "step": 163 + }, + { + "epoch": 0.07928450568044476, + "grad_norm": 0.5857386315586672, + "learning_rate": 9.977772192608827e-06, + "loss": 0.6291, + "step": 164 + }, + { + "epoch": 0.07976794778825236, + "grad_norm": 0.5691356356316107, + "learning_rate": 9.977012904914133e-06, + "loss": 0.6149, + "step": 165 + }, + { + "epoch": 0.08025138989605994, + "grad_norm": 0.5823273363045892, + "learning_rate": 9.976240895573326e-06, + "loss": 0.6147, + "step": 166 + }, + { + "epoch": 0.08073483200386754, + "grad_norm": 0.538212010864403, + "learning_rate": 9.975456166559725e-06, + "loss": 0.6002, + "step": 167 + }, + { + "epoch": 0.08121827411167512, + "grad_norm": 0.601371610274862, + "learning_rate": 9.974658719879163e-06, + "loss": 0.606, + "step": 168 + }, + { + "epoch": 0.08170171621948272, + "grad_norm": 0.588104162701253, + "learning_rate": 9.973848557569974e-06, + "loss": 0.6226, + "step": 169 + }, + { + "epoch": 0.0821851583272903, + "grad_norm": 0.5316828963553285, + "learning_rate": 9.973025681703e-06, + "loss": 0.6144, + "step": 170 + }, + { + "epoch": 0.0826686004350979, + "grad_norm": 0.5405916050680715, + "learning_rate": 9.972190094381578e-06, + "loss": 0.6148, + "step": 171 + }, + { + "epoch": 0.0831520425429055, + "grad_norm": 0.5102891757426009, + "learning_rate": 9.971341797741538e-06, + "loss": 0.616, + "step": 172 + }, + { + "epoch": 0.08363548465071308, + "grad_norm": 0.5551757535954606, + "learning_rate": 9.970480793951194e-06, + "loss": 0.6196, + "step": 173 + }, + { + "epoch": 0.08411892675852067, + "grad_norm": 0.5349760515746151, + "learning_rate": 9.96960708521134e-06, + "loss": 0.5902, + "step": 174 + }, + { + "epoch": 0.08460236886632826, + "grad_norm": 0.5713299053870873, + "learning_rate": 9.968720673755246e-06, + "loss": 0.6039, + "step": 175 + }, + { + "epoch": 0.08508581097413585, + "grad_norm": 0.5886201187493544, + "learning_rate": 9.96782156184865e-06, + "loss": 0.6128, + "step": 176 + }, + { + "epoch": 0.08556925308194344, + "grad_norm": 0.52487297166769, + "learning_rate": 9.966909751789758e-06, + "loss": 0.6201, + "step": 177 + }, + { + "epoch": 0.08605269518975103, + "grad_norm": 0.47488673856360863, + "learning_rate": 9.965985245909226e-06, + "loss": 0.581, + "step": 178 + }, + { + "epoch": 0.08653613729755862, + "grad_norm": 0.5390345004627665, + "learning_rate": 9.96504804657017e-06, + "loss": 0.5748, + "step": 179 + }, + { + "epoch": 0.08701957940536621, + "grad_norm": 0.5030595297893009, + "learning_rate": 9.964098156168143e-06, + "loss": 0.6025, + "step": 180 + }, + { + "epoch": 0.0875030215131738, + "grad_norm": 0.5468598312459072, + "learning_rate": 9.963135577131144e-06, + "loss": 0.6086, + "step": 181 + }, + { + "epoch": 0.08798646362098139, + "grad_norm": 0.48113219800404783, + "learning_rate": 9.962160311919601e-06, + "loss": 0.5759, + "step": 182 + }, + { + "epoch": 0.08846990572878898, + "grad_norm": 0.5498772940672643, + "learning_rate": 9.96117236302637e-06, + "loss": 0.6009, + "step": 183 + }, + { + "epoch": 0.08895334783659657, + "grad_norm": 0.572150853367621, + "learning_rate": 9.960171732976731e-06, + "loss": 0.5891, + "step": 184 + }, + { + "epoch": 0.08943678994440415, + "grad_norm": 0.5440182913032069, + "learning_rate": 9.959158424328373e-06, + "loss": 0.6126, + "step": 185 + }, + { + "epoch": 0.08992023205221175, + "grad_norm": 0.5124606491120447, + "learning_rate": 9.958132439671392e-06, + "loss": 0.6113, + "step": 186 + }, + { + "epoch": 0.09040367416001933, + "grad_norm": 0.5122426086233111, + "learning_rate": 9.957093781628294e-06, + "loss": 0.5585, + "step": 187 + }, + { + "epoch": 0.09088711626782693, + "grad_norm": 0.5466339032920954, + "learning_rate": 9.956042452853967e-06, + "loss": 0.5829, + "step": 188 + }, + { + "epoch": 0.09137055837563451, + "grad_norm": 0.5319185267267565, + "learning_rate": 9.954978456035695e-06, + "loss": 0.6014, + "step": 189 + }, + { + "epoch": 0.09185400048344211, + "grad_norm": 0.5439360347029544, + "learning_rate": 9.953901793893137e-06, + "loss": 0.6135, + "step": 190 + }, + { + "epoch": 0.0923374425912497, + "grad_norm": 0.5572467498872743, + "learning_rate": 9.95281246917833e-06, + "loss": 0.6126, + "step": 191 + }, + { + "epoch": 0.09282088469905729, + "grad_norm": 0.5541110285684123, + "learning_rate": 9.951710484675677e-06, + "loss": 0.6077, + "step": 192 + }, + { + "epoch": 0.09330432680686487, + "grad_norm": 0.4850481807152515, + "learning_rate": 9.950595843201936e-06, + "loss": 0.6052, + "step": 193 + }, + { + "epoch": 0.09378776891467247, + "grad_norm": 0.4982494369774088, + "learning_rate": 9.949468547606222e-06, + "loss": 0.608, + "step": 194 + }, + { + "epoch": 0.09427121102248005, + "grad_norm": 0.5222210926075901, + "learning_rate": 9.948328600769996e-06, + "loss": 0.5725, + "step": 195 + }, + { + "epoch": 0.09475465313028765, + "grad_norm": 0.5156665548407187, + "learning_rate": 9.94717600560705e-06, + "loss": 0.5981, + "step": 196 + }, + { + "epoch": 0.09523809523809523, + "grad_norm": 0.4789398218595176, + "learning_rate": 9.946010765063512e-06, + "loss": 0.6163, + "step": 197 + }, + { + "epoch": 0.09572153734590283, + "grad_norm": 0.5066106303118647, + "learning_rate": 9.94483288211783e-06, + "loss": 0.6049, + "step": 198 + }, + { + "epoch": 0.09620497945371041, + "grad_norm": 0.519086410125638, + "learning_rate": 9.943642359780767e-06, + "loss": 0.6034, + "step": 199 + }, + { + "epoch": 0.09668842156151801, + "grad_norm": 0.5726309849663989, + "learning_rate": 9.942439201095398e-06, + "loss": 0.5977, + "step": 200 + }, + { + "epoch": 0.09717186366932559, + "grad_norm": 0.5149014744932526, + "learning_rate": 9.941223409137088e-06, + "loss": 0.6147, + "step": 201 + }, + { + "epoch": 0.09765530577713319, + "grad_norm": 0.5009166664227639, + "learning_rate": 9.939994987013505e-06, + "loss": 0.595, + "step": 202 + }, + { + "epoch": 0.09813874788494077, + "grad_norm": 0.5677576117209191, + "learning_rate": 9.93875393786459e-06, + "loss": 0.5825, + "step": 203 + }, + { + "epoch": 0.09862218999274837, + "grad_norm": 0.5705628060741978, + "learning_rate": 9.937500264862567e-06, + "loss": 0.6106, + "step": 204 + }, + { + "epoch": 0.09910563210055595, + "grad_norm": 0.5166084751955315, + "learning_rate": 9.936233971211926e-06, + "loss": 0.5724, + "step": 205 + }, + { + "epoch": 0.09958907420836355, + "grad_norm": 0.4998369485071646, + "learning_rate": 9.934955060149413e-06, + "loss": 0.5702, + "step": 206 + }, + { + "epoch": 0.10007251631617115, + "grad_norm": 0.4773861112208611, + "learning_rate": 9.933663534944029e-06, + "loss": 0.5976, + "step": 207 + }, + { + "epoch": 0.10055595842397873, + "grad_norm": 0.5142399648385931, + "learning_rate": 9.932359398897018e-06, + "loss": 0.5662, + "step": 208 + }, + { + "epoch": 0.10103940053178632, + "grad_norm": 0.5152331134346968, + "learning_rate": 9.931042655341856e-06, + "loss": 0.5987, + "step": 209 + }, + { + "epoch": 0.10152284263959391, + "grad_norm": 0.5697107336495173, + "learning_rate": 9.929713307644245e-06, + "loss": 0.5956, + "step": 210 + }, + { + "epoch": 0.1020062847474015, + "grad_norm": 0.5437386464507225, + "learning_rate": 9.928371359202103e-06, + "loss": 0.6023, + "step": 211 + }, + { + "epoch": 0.10248972685520909, + "grad_norm": 0.48930400532530816, + "learning_rate": 9.927016813445562e-06, + "loss": 0.5941, + "step": 212 + }, + { + "epoch": 0.10297316896301668, + "grad_norm": 0.5691895002113943, + "learning_rate": 9.925649673836949e-06, + "loss": 0.5977, + "step": 213 + }, + { + "epoch": 0.10345661107082427, + "grad_norm": 0.518358121778254, + "learning_rate": 9.924269943870781e-06, + "loss": 0.599, + "step": 214 + }, + { + "epoch": 0.10394005317863186, + "grad_norm": 0.5179203447080591, + "learning_rate": 9.922877627073763e-06, + "loss": 0.565, + "step": 215 + }, + { + "epoch": 0.10442349528643945, + "grad_norm": 0.5348210146349037, + "learning_rate": 9.921472727004765e-06, + "loss": 0.6038, + "step": 216 + }, + { + "epoch": 0.10490693739424704, + "grad_norm": 0.5011388091471438, + "learning_rate": 9.920055247254827e-06, + "loss": 0.5951, + "step": 217 + }, + { + "epoch": 0.10539037950205463, + "grad_norm": 0.5706178448892886, + "learning_rate": 9.91862519144714e-06, + "loss": 0.604, + "step": 218 + }, + { + "epoch": 0.10587382160986222, + "grad_norm": 0.5667257328777994, + "learning_rate": 9.917182563237045e-06, + "loss": 0.6006, + "step": 219 + }, + { + "epoch": 0.1063572637176698, + "grad_norm": 0.5402529870671051, + "learning_rate": 9.915727366312012e-06, + "loss": 0.591, + "step": 220 + }, + { + "epoch": 0.1068407058254774, + "grad_norm": 0.5123066262170495, + "learning_rate": 9.914259604391642e-06, + "loss": 0.5818, + "step": 221 + }, + { + "epoch": 0.10732414793328499, + "grad_norm": 0.5104812232878251, + "learning_rate": 9.912779281227656e-06, + "loss": 0.5991, + "step": 222 + }, + { + "epoch": 0.10780759004109258, + "grad_norm": 0.5073553912253322, + "learning_rate": 9.911286400603878e-06, + "loss": 0.5783, + "step": 223 + }, + { + "epoch": 0.10829103214890017, + "grad_norm": 0.5516103650201469, + "learning_rate": 9.90978096633623e-06, + "loss": 0.6007, + "step": 224 + }, + { + "epoch": 0.10877447425670776, + "grad_norm": 0.5241670992889956, + "learning_rate": 9.908262982272724e-06, + "loss": 0.5865, + "step": 225 + }, + { + "epoch": 0.10925791636451535, + "grad_norm": 0.4894067875331202, + "learning_rate": 9.906732452293448e-06, + "loss": 0.5635, + "step": 226 + }, + { + "epoch": 0.10974135847232294, + "grad_norm": 0.5079732216995924, + "learning_rate": 9.905189380310564e-06, + "loss": 0.5982, + "step": 227 + }, + { + "epoch": 0.11022480058013052, + "grad_norm": 0.47288266380376864, + "learning_rate": 9.903633770268286e-06, + "loss": 0.5734, + "step": 228 + }, + { + "epoch": 0.11070824268793812, + "grad_norm": 0.5195973051222883, + "learning_rate": 9.902065626142876e-06, + "loss": 0.6021, + "step": 229 + }, + { + "epoch": 0.1111916847957457, + "grad_norm": 0.5159734590151601, + "learning_rate": 9.900484951942642e-06, + "loss": 0.5847, + "step": 230 + }, + { + "epoch": 0.1116751269035533, + "grad_norm": 0.5157347113387764, + "learning_rate": 9.89889175170791e-06, + "loss": 0.5946, + "step": 231 + }, + { + "epoch": 0.11215856901136088, + "grad_norm": 0.49409523170190334, + "learning_rate": 9.89728602951103e-06, + "loss": 0.5941, + "step": 232 + }, + { + "epoch": 0.11264201111916848, + "grad_norm": 0.5699641967141135, + "learning_rate": 9.89566778945636e-06, + "loss": 0.5965, + "step": 233 + }, + { + "epoch": 0.11312545322697606, + "grad_norm": 0.5565932357020583, + "learning_rate": 9.894037035680246e-06, + "loss": 0.6076, + "step": 234 + }, + { + "epoch": 0.11360889533478366, + "grad_norm": 0.4762368359891958, + "learning_rate": 9.892393772351033e-06, + "loss": 0.5749, + "step": 235 + }, + { + "epoch": 0.11409233744259124, + "grad_norm": 0.5226269336653058, + "learning_rate": 9.890738003669029e-06, + "loss": 0.5882, + "step": 236 + }, + { + "epoch": 0.11457577955039884, + "grad_norm": 0.5893232226185929, + "learning_rate": 9.889069733866515e-06, + "loss": 0.5978, + "step": 237 + }, + { + "epoch": 0.11505922165820642, + "grad_norm": 0.5556325697280562, + "learning_rate": 9.887388967207722e-06, + "loss": 0.6, + "step": 238 + }, + { + "epoch": 0.11554266376601402, + "grad_norm": 0.48160661753964396, + "learning_rate": 9.885695707988825e-06, + "loss": 0.5977, + "step": 239 + }, + { + "epoch": 0.11602610587382162, + "grad_norm": 0.5122405505133801, + "learning_rate": 9.883989960537934e-06, + "loss": 0.6044, + "step": 240 + }, + { + "epoch": 0.1165095479816292, + "grad_norm": 0.5812889541684825, + "learning_rate": 9.882271729215071e-06, + "loss": 0.5849, + "step": 241 + }, + { + "epoch": 0.1169929900894368, + "grad_norm": 0.4906401332764143, + "learning_rate": 9.880541018412179e-06, + "loss": 0.5986, + "step": 242 + }, + { + "epoch": 0.11747643219724438, + "grad_norm": 0.48951055967126716, + "learning_rate": 9.878797832553093e-06, + "loss": 0.5646, + "step": 243 + }, + { + "epoch": 0.11795987430505198, + "grad_norm": 0.4836474446158179, + "learning_rate": 9.877042176093537e-06, + "loss": 0.5998, + "step": 244 + }, + { + "epoch": 0.11844331641285956, + "grad_norm": 0.4962973453940785, + "learning_rate": 9.875274053521107e-06, + "loss": 0.5846, + "step": 245 + }, + { + "epoch": 0.11892675852066716, + "grad_norm": 0.45261755838242107, + "learning_rate": 9.873493469355271e-06, + "loss": 0.5912, + "step": 246 + }, + { + "epoch": 0.11941020062847474, + "grad_norm": 0.49934013758424506, + "learning_rate": 9.871700428147342e-06, + "loss": 0.5836, + "step": 247 + }, + { + "epoch": 0.11989364273628234, + "grad_norm": 0.48318779237357384, + "learning_rate": 9.86989493448048e-06, + "loss": 0.5898, + "step": 248 + }, + { + "epoch": 0.12037708484408992, + "grad_norm": 0.4877998807669757, + "learning_rate": 9.868076992969672e-06, + "loss": 0.5933, + "step": 249 + }, + { + "epoch": 0.12086052695189752, + "grad_norm": 0.4697579805390032, + "learning_rate": 9.866246608261725e-06, + "loss": 0.5855, + "step": 250 + }, + { + "epoch": 0.1213439690597051, + "grad_norm": 0.512552573820198, + "learning_rate": 9.864403785035246e-06, + "loss": 0.5989, + "step": 251 + }, + { + "epoch": 0.1218274111675127, + "grad_norm": 0.4865753568683563, + "learning_rate": 9.862548528000644e-06, + "loss": 0.5722, + "step": 252 + }, + { + "epoch": 0.12231085327532028, + "grad_norm": 0.5276925045930954, + "learning_rate": 9.860680841900101e-06, + "loss": 0.5879, + "step": 253 + }, + { + "epoch": 0.12279429538312787, + "grad_norm": 0.5213718677505005, + "learning_rate": 9.858800731507575e-06, + "loss": 0.5999, + "step": 254 + }, + { + "epoch": 0.12327773749093546, + "grad_norm": 0.5180845494091726, + "learning_rate": 9.85690820162878e-06, + "loss": 0.586, + "step": 255 + }, + { + "epoch": 0.12376117959874305, + "grad_norm": 0.5698025401421347, + "learning_rate": 9.855003257101177e-06, + "loss": 0.6011, + "step": 256 + }, + { + "epoch": 0.12424462170655064, + "grad_norm": 0.562343589994959, + "learning_rate": 9.853085902793952e-06, + "loss": 0.5894, + "step": 257 + }, + { + "epoch": 0.12472806381435823, + "grad_norm": 0.5160827286882833, + "learning_rate": 9.851156143608025e-06, + "loss": 0.5897, + "step": 258 + }, + { + "epoch": 0.12521150592216582, + "grad_norm": 0.5407107287832078, + "learning_rate": 9.84921398447601e-06, + "loss": 0.59, + "step": 259 + }, + { + "epoch": 0.1256949480299734, + "grad_norm": 0.4828245059112851, + "learning_rate": 9.847259430362222e-06, + "loss": 0.5642, + "step": 260 + }, + { + "epoch": 0.126178390137781, + "grad_norm": 0.5766667340207283, + "learning_rate": 9.845292486262664e-06, + "loss": 0.6016, + "step": 261 + }, + { + "epoch": 0.12666183224558858, + "grad_norm": 0.5818866932241936, + "learning_rate": 9.843313157204999e-06, + "loss": 0.5807, + "step": 262 + }, + { + "epoch": 0.12714527435339618, + "grad_norm": 0.5140923007570054, + "learning_rate": 9.841321448248552e-06, + "loss": 0.5858, + "step": 263 + }, + { + "epoch": 0.12762871646120377, + "grad_norm": 0.513399510660716, + "learning_rate": 9.839317364484295e-06, + "loss": 0.5847, + "step": 264 + }, + { + "epoch": 0.12811215856901137, + "grad_norm": 0.5227642580781724, + "learning_rate": 9.837300911034824e-06, + "loss": 0.5888, + "step": 265 + }, + { + "epoch": 0.12859560067681894, + "grad_norm": 0.5579358896097371, + "learning_rate": 9.83527209305436e-06, + "loss": 0.5928, + "step": 266 + }, + { + "epoch": 0.12907904278462654, + "grad_norm": 0.5145348442577231, + "learning_rate": 9.83323091572872e-06, + "loss": 0.5872, + "step": 267 + }, + { + "epoch": 0.12956248489243413, + "grad_norm": 0.5112821410236051, + "learning_rate": 9.831177384275323e-06, + "loss": 0.5805, + "step": 268 + }, + { + "epoch": 0.13004592700024173, + "grad_norm": 0.5497912960403669, + "learning_rate": 9.829111503943159e-06, + "loss": 0.5837, + "step": 269 + }, + { + "epoch": 0.1305293691080493, + "grad_norm": 0.5226743950335115, + "learning_rate": 9.827033280012783e-06, + "loss": 0.5539, + "step": 270 + }, + { + "epoch": 0.1310128112158569, + "grad_norm": 0.5713921241049837, + "learning_rate": 9.824942717796304e-06, + "loss": 0.5881, + "step": 271 + }, + { + "epoch": 0.1314962533236645, + "grad_norm": 0.5241764388189555, + "learning_rate": 9.822839822637369e-06, + "loss": 0.6032, + "step": 272 + }, + { + "epoch": 0.1319796954314721, + "grad_norm": 0.5162440352522167, + "learning_rate": 9.820724599911147e-06, + "loss": 0.5842, + "step": 273 + }, + { + "epoch": 0.13246313753927969, + "grad_norm": 0.5431692492650363, + "learning_rate": 9.818597055024315e-06, + "loss": 0.585, + "step": 274 + }, + { + "epoch": 0.13294657964708725, + "grad_norm": 0.5124783198553914, + "learning_rate": 9.816457193415055e-06, + "loss": 0.5779, + "step": 275 + }, + { + "epoch": 0.13343002175489485, + "grad_norm": 0.5257695390265421, + "learning_rate": 9.81430502055302e-06, + "loss": 0.5798, + "step": 276 + }, + { + "epoch": 0.13391346386270245, + "grad_norm": 0.49781008962990064, + "learning_rate": 9.812140541939338e-06, + "loss": 0.5836, + "step": 277 + }, + { + "epoch": 0.13439690597051004, + "grad_norm": 0.5327804269781539, + "learning_rate": 9.809963763106593e-06, + "loss": 0.5733, + "step": 278 + }, + { + "epoch": 0.1348803480783176, + "grad_norm": 0.5167258655366103, + "learning_rate": 9.807774689618806e-06, + "loss": 0.58, + "step": 279 + }, + { + "epoch": 0.1353637901861252, + "grad_norm": 0.5321175943512093, + "learning_rate": 9.805573327071428e-06, + "loss": 0.5911, + "step": 280 + }, + { + "epoch": 0.1358472322939328, + "grad_norm": 0.49961117510050285, + "learning_rate": 9.803359681091313e-06, + "loss": 0.5737, + "step": 281 + }, + { + "epoch": 0.1363306744017404, + "grad_norm": 0.5314962622355859, + "learning_rate": 9.801133757336726e-06, + "loss": 0.593, + "step": 282 + }, + { + "epoch": 0.13681411650954797, + "grad_norm": 0.48173417582091976, + "learning_rate": 9.798895561497299e-06, + "loss": 0.5818, + "step": 283 + }, + { + "epoch": 0.13729755861735557, + "grad_norm": 0.5127693228983886, + "learning_rate": 9.796645099294049e-06, + "loss": 0.6024, + "step": 284 + }, + { + "epoch": 0.13778100072516317, + "grad_norm": 0.5128313174228813, + "learning_rate": 9.794382376479334e-06, + "loss": 0.5837, + "step": 285 + }, + { + "epoch": 0.13826444283297076, + "grad_norm": 0.502862882638082, + "learning_rate": 9.792107398836859e-06, + "loss": 0.5781, + "step": 286 + }, + { + "epoch": 0.13874788494077833, + "grad_norm": 0.5169656633134686, + "learning_rate": 9.789820172181648e-06, + "loss": 0.5821, + "step": 287 + }, + { + "epoch": 0.13923132704858593, + "grad_norm": 6.7246508188992, + "learning_rate": 9.787520702360035e-06, + "loss": 1.0972, + "step": 288 + }, + { + "epoch": 0.13971476915639353, + "grad_norm": 0.6005251051430991, + "learning_rate": 9.785208995249655e-06, + "loss": 0.5803, + "step": 289 + }, + { + "epoch": 0.14019821126420112, + "grad_norm": 0.5531574758650235, + "learning_rate": 9.782885056759413e-06, + "loss": 0.563, + "step": 290 + }, + { + "epoch": 0.1406816533720087, + "grad_norm": 0.5273779406180227, + "learning_rate": 9.780548892829486e-06, + "loss": 0.5872, + "step": 291 + }, + { + "epoch": 0.1411650954798163, + "grad_norm": 0.5063770192301159, + "learning_rate": 9.778200509431297e-06, + "loss": 0.5782, + "step": 292 + }, + { + "epoch": 0.14164853758762389, + "grad_norm": 0.5401099132225082, + "learning_rate": 9.775839912567502e-06, + "loss": 0.5804, + "step": 293 + }, + { + "epoch": 0.14213197969543148, + "grad_norm": 0.607784811294971, + "learning_rate": 9.773467108271978e-06, + "loss": 0.5831, + "step": 294 + }, + { + "epoch": 0.14261542180323905, + "grad_norm": 0.5051370116219928, + "learning_rate": 9.771082102609803e-06, + "loss": 0.5597, + "step": 295 + }, + { + "epoch": 0.14309886391104665, + "grad_norm": 0.5723810352863865, + "learning_rate": 9.768684901677245e-06, + "loss": 0.5779, + "step": 296 + }, + { + "epoch": 0.14358230601885424, + "grad_norm": 0.529491415132923, + "learning_rate": 9.766275511601742e-06, + "loss": 0.5849, + "step": 297 + }, + { + "epoch": 0.14406574812666184, + "grad_norm": 0.6275998382003428, + "learning_rate": 9.763853938541887e-06, + "loss": 0.5915, + "step": 298 + }, + { + "epoch": 0.1445491902344694, + "grad_norm": 0.5906428033404255, + "learning_rate": 9.76142018868742e-06, + "loss": 0.5816, + "step": 299 + }, + { + "epoch": 0.145032632342277, + "grad_norm": 0.597638837356143, + "learning_rate": 9.7589742682592e-06, + "loss": 0.5578, + "step": 300 + }, + { + "epoch": 0.1455160744500846, + "grad_norm": 0.5365546900890564, + "learning_rate": 9.756516183509198e-06, + "loss": 0.5833, + "step": 301 + }, + { + "epoch": 0.1459995165578922, + "grad_norm": 0.554155920273677, + "learning_rate": 9.754045940720471e-06, + "loss": 0.581, + "step": 302 + }, + { + "epoch": 0.14648295866569977, + "grad_norm": 0.5290449152773149, + "learning_rate": 9.751563546207167e-06, + "loss": 0.5879, + "step": 303 + }, + { + "epoch": 0.14696640077350737, + "grad_norm": 0.5303051981230842, + "learning_rate": 9.749069006314481e-06, + "loss": 0.557, + "step": 304 + }, + { + "epoch": 0.14744984288131496, + "grad_norm": 0.4750712434505446, + "learning_rate": 9.74656232741866e-06, + "loss": 0.5236, + "step": 305 + }, + { + "epoch": 0.14793328498912256, + "grad_norm": 0.515780571537496, + "learning_rate": 9.744043515926975e-06, + "loss": 0.5827, + "step": 306 + }, + { + "epoch": 0.14841672709693013, + "grad_norm": 0.5886066507830542, + "learning_rate": 9.741512578277715e-06, + "loss": 0.5741, + "step": 307 + }, + { + "epoch": 0.14890016920473773, + "grad_norm": 0.5712616310834069, + "learning_rate": 9.738969520940158e-06, + "loss": 0.587, + "step": 308 + }, + { + "epoch": 0.14938361131254532, + "grad_norm": 0.5883909446108012, + "learning_rate": 9.736414350414564e-06, + "loss": 0.5836, + "step": 309 + }, + { + "epoch": 0.14986705342035292, + "grad_norm": 0.49300111186175044, + "learning_rate": 9.733847073232156e-06, + "loss": 0.583, + "step": 310 + }, + { + "epoch": 0.15035049552816052, + "grad_norm": 0.47057695692490953, + "learning_rate": 9.7312676959551e-06, + "loss": 0.5433, + "step": 311 + }, + { + "epoch": 0.15083393763596809, + "grad_norm": 0.5647156070035382, + "learning_rate": 9.72867622517649e-06, + "loss": 0.5859, + "step": 312 + }, + { + "epoch": 0.15131737974377568, + "grad_norm": 0.5698749374107666, + "learning_rate": 9.726072667520338e-06, + "loss": 0.5759, + "step": 313 + }, + { + "epoch": 0.15180082185158328, + "grad_norm": 0.4935935341959304, + "learning_rate": 9.723457029641547e-06, + "loss": 0.5883, + "step": 314 + }, + { + "epoch": 0.15228426395939088, + "grad_norm": 0.5040267732247843, + "learning_rate": 9.720829318225897e-06, + "loss": 0.5723, + "step": 315 + }, + { + "epoch": 0.15276770606719844, + "grad_norm": 0.5390674583456238, + "learning_rate": 9.718189539990029e-06, + "loss": 0.5748, + "step": 316 + }, + { + "epoch": 0.15325114817500604, + "grad_norm": 0.5449958057788811, + "learning_rate": 9.715537701681431e-06, + "loss": 0.5831, + "step": 317 + }, + { + "epoch": 0.15373459028281364, + "grad_norm": 0.48895966772949706, + "learning_rate": 9.712873810078415e-06, + "loss": 0.5505, + "step": 318 + }, + { + "epoch": 0.15421803239062123, + "grad_norm": 0.5694877152526486, + "learning_rate": 9.710197871990101e-06, + "loss": 0.5789, + "step": 319 + }, + { + "epoch": 0.1547014744984288, + "grad_norm": 0.5390854150150773, + "learning_rate": 9.707509894256406e-06, + "loss": 0.5699, + "step": 320 + }, + { + "epoch": 0.1551849166062364, + "grad_norm": 0.5339825765060972, + "learning_rate": 9.704809883748012e-06, + "loss": 0.5841, + "step": 321 + }, + { + "epoch": 0.155668358714044, + "grad_norm": 0.5691147363910026, + "learning_rate": 9.70209784736637e-06, + "loss": 0.5791, + "step": 322 + }, + { + "epoch": 0.1561518008218516, + "grad_norm": 0.5098897525025804, + "learning_rate": 9.699373792043658e-06, + "loss": 0.5789, + "step": 323 + }, + { + "epoch": 0.15663524292965916, + "grad_norm": 0.5233093422091403, + "learning_rate": 9.696637724742785e-06, + "loss": 0.5791, + "step": 324 + }, + { + "epoch": 0.15711868503746676, + "grad_norm": 0.4951608627676522, + "learning_rate": 9.693889652457359e-06, + "loss": 0.5664, + "step": 325 + }, + { + "epoch": 0.15760212714527436, + "grad_norm": 0.5085606430384619, + "learning_rate": 9.691129582211671e-06, + "loss": 0.5777, + "step": 326 + }, + { + "epoch": 0.15808556925308195, + "grad_norm": 0.5137102450781047, + "learning_rate": 9.688357521060685e-06, + "loss": 0.5843, + "step": 327 + }, + { + "epoch": 0.15856901136088952, + "grad_norm": 0.4769071854330559, + "learning_rate": 9.685573476090015e-06, + "loss": 0.578, + "step": 328 + }, + { + "epoch": 0.15905245346869712, + "grad_norm": 0.542975418114207, + "learning_rate": 9.6827774544159e-06, + "loss": 0.5859, + "step": 329 + }, + { + "epoch": 0.15953589557650472, + "grad_norm": 0.4926718305346952, + "learning_rate": 9.6799694631852e-06, + "loss": 0.5871, + "step": 330 + }, + { + "epoch": 0.1600193376843123, + "grad_norm": 0.5010989320404932, + "learning_rate": 9.677149509575365e-06, + "loss": 0.5841, + "step": 331 + }, + { + "epoch": 0.16050277979211988, + "grad_norm": 0.5446382005351177, + "learning_rate": 9.674317600794426e-06, + "loss": 0.5762, + "step": 332 + }, + { + "epoch": 0.16098622189992748, + "grad_norm": 0.5406240370145704, + "learning_rate": 9.67147374408097e-06, + "loss": 0.5685, + "step": 333 + }, + { + "epoch": 0.16146966400773508, + "grad_norm": 0.5171074604025283, + "learning_rate": 9.66861794670412e-06, + "loss": 0.5856, + "step": 334 + }, + { + "epoch": 0.16195310611554267, + "grad_norm": 0.5545080974369176, + "learning_rate": 9.665750215963528e-06, + "loss": 0.5789, + "step": 335 + }, + { + "epoch": 0.16243654822335024, + "grad_norm": 0.49939805294647144, + "learning_rate": 9.662870559189344e-06, + "loss": 0.5702, + "step": 336 + }, + { + "epoch": 0.16291999033115784, + "grad_norm": 0.49295646596373777, + "learning_rate": 9.6599789837422e-06, + "loss": 0.5742, + "step": 337 + }, + { + "epoch": 0.16340343243896543, + "grad_norm": 0.5522231456414357, + "learning_rate": 9.657075497013202e-06, + "loss": 0.5752, + "step": 338 + }, + { + "epoch": 0.16388687454677303, + "grad_norm": 0.5606395929711875, + "learning_rate": 9.654160106423891e-06, + "loss": 0.5854, + "step": 339 + }, + { + "epoch": 0.1643703166545806, + "grad_norm": 0.5086990809592122, + "learning_rate": 9.651232819426242e-06, + "loss": 0.5764, + "step": 340 + }, + { + "epoch": 0.1648537587623882, + "grad_norm": 0.4984930367771814, + "learning_rate": 9.648293643502636e-06, + "loss": 0.5619, + "step": 341 + }, + { + "epoch": 0.1653372008701958, + "grad_norm": 0.5217470426797576, + "learning_rate": 9.645342586165845e-06, + "loss": 0.5833, + "step": 342 + }, + { + "epoch": 0.1658206429780034, + "grad_norm": 0.546389261380125, + "learning_rate": 9.642379654959006e-06, + "loss": 0.5381, + "step": 343 + }, + { + "epoch": 0.166304085085811, + "grad_norm": 0.5439151860872452, + "learning_rate": 9.639404857455614e-06, + "loss": 0.5674, + "step": 344 + }, + { + "epoch": 0.16678752719361856, + "grad_norm": 0.5469688158149608, + "learning_rate": 9.63641820125949e-06, + "loss": 0.5705, + "step": 345 + }, + { + "epoch": 0.16727096930142615, + "grad_norm": 0.4994352161741759, + "learning_rate": 9.633419694004767e-06, + "loss": 0.555, + "step": 346 + }, + { + "epoch": 0.16775441140923375, + "grad_norm": 0.5270157823994652, + "learning_rate": 9.63040934335587e-06, + "loss": 0.5741, + "step": 347 + }, + { + "epoch": 0.16823785351704135, + "grad_norm": 0.5302701119307424, + "learning_rate": 9.627387157007502e-06, + "loss": 0.5775, + "step": 348 + }, + { + "epoch": 0.16872129562484892, + "grad_norm": 0.5005904286760833, + "learning_rate": 9.624353142684611e-06, + "loss": 0.5724, + "step": 349 + }, + { + "epoch": 0.1692047377326565, + "grad_norm": 0.5035595085634601, + "learning_rate": 9.621307308142385e-06, + "loss": 0.5794, + "step": 350 + }, + { + "epoch": 0.1696881798404641, + "grad_norm": 0.521381746170865, + "learning_rate": 9.618249661166218e-06, + "loss": 0.5764, + "step": 351 + }, + { + "epoch": 0.1701716219482717, + "grad_norm": 0.48214165657815927, + "learning_rate": 9.615180209571709e-06, + "loss": 0.5804, + "step": 352 + }, + { + "epoch": 0.17065506405607928, + "grad_norm": 0.47552991671065514, + "learning_rate": 9.612098961204617e-06, + "loss": 0.5581, + "step": 353 + }, + { + "epoch": 0.17113850616388687, + "grad_norm": 0.46097880469562935, + "learning_rate": 9.609005923940865e-06, + "loss": 0.5618, + "step": 354 + }, + { + "epoch": 0.17162194827169447, + "grad_norm": 0.5629931104502605, + "learning_rate": 9.605901105686503e-06, + "loss": 0.5694, + "step": 355 + }, + { + "epoch": 0.17210539037950207, + "grad_norm": 0.5179757776717347, + "learning_rate": 9.602784514377701e-06, + "loss": 0.5897, + "step": 356 + }, + { + "epoch": 0.17258883248730963, + "grad_norm": 0.5355839686571028, + "learning_rate": 9.599656157980715e-06, + "loss": 0.5724, + "step": 357 + }, + { + "epoch": 0.17307227459511723, + "grad_norm": 0.6350286695754506, + "learning_rate": 9.596516044491873e-06, + "loss": 0.577, + "step": 358 + }, + { + "epoch": 0.17355571670292483, + "grad_norm": 0.5295601313068036, + "learning_rate": 9.593364181937563e-06, + "loss": 0.5834, + "step": 359 + }, + { + "epoch": 0.17403915881073242, + "grad_norm": 0.5016272467409, + "learning_rate": 9.590200578374198e-06, + "loss": 0.5848, + "step": 360 + }, + { + "epoch": 0.17452260091854, + "grad_norm": 0.4734403734457174, + "learning_rate": 9.587025241888202e-06, + "loss": 0.5629, + "step": 361 + }, + { + "epoch": 0.1750060430263476, + "grad_norm": 0.5345541955737336, + "learning_rate": 9.583838180595993e-06, + "loss": 0.5619, + "step": 362 + }, + { + "epoch": 0.1754894851341552, + "grad_norm": 0.5159159294276754, + "learning_rate": 9.580639402643957e-06, + "loss": 0.5788, + "step": 363 + }, + { + "epoch": 0.17597292724196278, + "grad_norm": 0.5475730953848408, + "learning_rate": 9.577428916208426e-06, + "loss": 0.5758, + "step": 364 + }, + { + "epoch": 0.17645636934977035, + "grad_norm": 0.5065491502971655, + "learning_rate": 9.574206729495662e-06, + "loss": 0.5739, + "step": 365 + }, + { + "epoch": 0.17693981145757795, + "grad_norm": 0.5385122338140608, + "learning_rate": 9.570972850741839e-06, + "loss": 0.5646, + "step": 366 + }, + { + "epoch": 0.17742325356538555, + "grad_norm": 0.5282114345918013, + "learning_rate": 9.567727288213005e-06, + "loss": 0.5809, + "step": 367 + }, + { + "epoch": 0.17790669567319314, + "grad_norm": 0.5183724179001736, + "learning_rate": 9.564470050205084e-06, + "loss": 0.5745, + "step": 368 + }, + { + "epoch": 0.1783901377810007, + "grad_norm": 0.501228022506401, + "learning_rate": 9.561201145043835e-06, + "loss": 0.5759, + "step": 369 + }, + { + "epoch": 0.1788735798888083, + "grad_norm": 0.5161478035704796, + "learning_rate": 9.557920581084848e-06, + "loss": 0.5716, + "step": 370 + }, + { + "epoch": 0.1793570219966159, + "grad_norm": 0.5508440640900468, + "learning_rate": 9.554628366713506e-06, + "loss": 0.5681, + "step": 371 + }, + { + "epoch": 0.1798404641044235, + "grad_norm": 0.4958022642187558, + "learning_rate": 9.551324510344972e-06, + "loss": 0.5674, + "step": 372 + }, + { + "epoch": 0.18032390621223107, + "grad_norm": 0.5211800045547449, + "learning_rate": 9.548009020424172e-06, + "loss": 0.5759, + "step": 373 + }, + { + "epoch": 0.18080734832003867, + "grad_norm": 0.5234346072417955, + "learning_rate": 9.544681905425767e-06, + "loss": 0.5761, + "step": 374 + }, + { + "epoch": 0.18129079042784627, + "grad_norm": 0.5277623761050696, + "learning_rate": 9.541343173854128e-06, + "loss": 0.5846, + "step": 375 + }, + { + "epoch": 0.18177423253565386, + "grad_norm": 0.5159488960453931, + "learning_rate": 9.537992834243323e-06, + "loss": 0.5655, + "step": 376 + }, + { + "epoch": 0.18225767464346146, + "grad_norm": 0.5036893425002033, + "learning_rate": 9.53463089515709e-06, + "loss": 0.578, + "step": 377 + }, + { + "epoch": 0.18274111675126903, + "grad_norm": 0.5500694186101432, + "learning_rate": 9.531257365188818e-06, + "loss": 0.5683, + "step": 378 + }, + { + "epoch": 0.18322455885907662, + "grad_norm": 0.4446123327167339, + "learning_rate": 9.527872252961518e-06, + "loss": 0.5112, + "step": 379 + }, + { + "epoch": 0.18370800096688422, + "grad_norm": 0.49646226307611685, + "learning_rate": 9.524475567127813e-06, + "loss": 0.5799, + "step": 380 + }, + { + "epoch": 0.18419144307469182, + "grad_norm": 0.5455620647014985, + "learning_rate": 9.521067316369903e-06, + "loss": 0.5601, + "step": 381 + }, + { + "epoch": 0.1846748851824994, + "grad_norm": 0.5073331374598753, + "learning_rate": 9.517647509399555e-06, + "loss": 0.5399, + "step": 382 + }, + { + "epoch": 0.18515832729030698, + "grad_norm": 0.5171824333562809, + "learning_rate": 9.514216154958067e-06, + "loss": 0.5754, + "step": 383 + }, + { + "epoch": 0.18564176939811458, + "grad_norm": 0.5085818096253197, + "learning_rate": 9.510773261816261e-06, + "loss": 0.5623, + "step": 384 + }, + { + "epoch": 0.18612521150592218, + "grad_norm": 0.50056273177622, + "learning_rate": 9.507318838774448e-06, + "loss": 0.5774, + "step": 385 + }, + { + "epoch": 0.18660865361372975, + "grad_norm": 0.5493241761943409, + "learning_rate": 9.50385289466241e-06, + "loss": 0.5698, + "step": 386 + }, + { + "epoch": 0.18709209572153734, + "grad_norm": 0.48083872272472233, + "learning_rate": 9.500375438339384e-06, + "loss": 0.5634, + "step": 387 + }, + { + "epoch": 0.18757553782934494, + "grad_norm": 0.48598643847981954, + "learning_rate": 9.496886478694025e-06, + "loss": 0.5642, + "step": 388 + }, + { + "epoch": 0.18805897993715254, + "grad_norm": 0.4945695421669264, + "learning_rate": 9.493386024644396e-06, + "loss": 0.5763, + "step": 389 + }, + { + "epoch": 0.1885424220449601, + "grad_norm": 0.4630609260733735, + "learning_rate": 9.48987408513794e-06, + "loss": 0.5667, + "step": 390 + }, + { + "epoch": 0.1890258641527677, + "grad_norm": 0.5178132025025237, + "learning_rate": 9.486350669151455e-06, + "loss": 0.5633, + "step": 391 + }, + { + "epoch": 0.1895093062605753, + "grad_norm": 0.4855261545618926, + "learning_rate": 9.482815785691082e-06, + "loss": 0.5705, + "step": 392 + }, + { + "epoch": 0.1899927483683829, + "grad_norm": 0.48580056178653924, + "learning_rate": 9.47926944379226e-06, + "loss": 0.5703, + "step": 393 + }, + { + "epoch": 0.19047619047619047, + "grad_norm": 0.5308237684959329, + "learning_rate": 9.475711652519732e-06, + "loss": 0.5583, + "step": 394 + }, + { + "epoch": 0.19095963258399806, + "grad_norm": 0.5127712618313278, + "learning_rate": 9.472142420967496e-06, + "loss": 0.5674, + "step": 395 + }, + { + "epoch": 0.19144307469180566, + "grad_norm": 0.4833488281294125, + "learning_rate": 9.468561758258795e-06, + "loss": 0.578, + "step": 396 + }, + { + "epoch": 0.19192651679961326, + "grad_norm": 0.5644191416840888, + "learning_rate": 9.464969673546092e-06, + "loss": 0.582, + "step": 397 + }, + { + "epoch": 0.19240995890742082, + "grad_norm": 0.5561428050479044, + "learning_rate": 9.461366176011047e-06, + "loss": 0.5762, + "step": 398 + }, + { + "epoch": 0.19289340101522842, + "grad_norm": 0.49800634280761286, + "learning_rate": 9.457751274864486e-06, + "loss": 0.5786, + "step": 399 + }, + { + "epoch": 0.19337684312303602, + "grad_norm": 0.464098426014889, + "learning_rate": 9.454124979346392e-06, + "loss": 0.531, + "step": 400 + }, + { + "epoch": 0.19386028523084362, + "grad_norm": 0.5317711530861378, + "learning_rate": 9.450487298725866e-06, + "loss": 0.5735, + "step": 401 + }, + { + "epoch": 0.19434372733865118, + "grad_norm": 0.5700860255634325, + "learning_rate": 9.446838242301113e-06, + "loss": 0.5736, + "step": 402 + }, + { + "epoch": 0.19482716944645878, + "grad_norm": 0.5415575586047788, + "learning_rate": 9.443177819399416e-06, + "loss": 0.5682, + "step": 403 + }, + { + "epoch": 0.19531061155426638, + "grad_norm": 0.45162964809703743, + "learning_rate": 9.439506039377111e-06, + "loss": 0.5457, + "step": 404 + }, + { + "epoch": 0.19579405366207397, + "grad_norm": 0.48073200361222107, + "learning_rate": 9.435822911619564e-06, + "loss": 0.5452, + "step": 405 + }, + { + "epoch": 0.19627749576988154, + "grad_norm": 0.5218011226870963, + "learning_rate": 9.432128445541147e-06, + "loss": 0.5569, + "step": 406 + }, + { + "epoch": 0.19676093787768914, + "grad_norm": 0.5241766492312198, + "learning_rate": 9.42842265058521e-06, + "loss": 0.5791, + "step": 407 + }, + { + "epoch": 0.19724437998549674, + "grad_norm": 0.4747479232641684, + "learning_rate": 9.424705536224065e-06, + "loss": 0.572, + "step": 408 + }, + { + "epoch": 0.19772782209330433, + "grad_norm": 0.4892195750767198, + "learning_rate": 9.420977111958957e-06, + "loss": 0.577, + "step": 409 + }, + { + "epoch": 0.1982112642011119, + "grad_norm": 0.49625147154018395, + "learning_rate": 9.41723738732004e-06, + "loss": 0.5673, + "step": 410 + }, + { + "epoch": 0.1986947063089195, + "grad_norm": 0.553969116933997, + "learning_rate": 9.41348637186635e-06, + "loss": 0.5805, + "step": 411 + }, + { + "epoch": 0.1991781484167271, + "grad_norm": 0.5271833056864474, + "learning_rate": 9.409724075185782e-06, + "loss": 0.5811, + "step": 412 + }, + { + "epoch": 0.1996615905245347, + "grad_norm": 0.541152410560869, + "learning_rate": 9.405950506895074e-06, + "loss": 0.5539, + "step": 413 + }, + { + "epoch": 0.2001450326323423, + "grad_norm": 0.4827367980584999, + "learning_rate": 9.40216567663977e-06, + "loss": 0.5754, + "step": 414 + }, + { + "epoch": 0.20062847474014986, + "grad_norm": 0.49177545628835745, + "learning_rate": 9.398369594094198e-06, + "loss": 0.508, + "step": 415 + }, + { + "epoch": 0.20111191684795746, + "grad_norm": 0.50467312755319, + "learning_rate": 9.394562268961454e-06, + "loss": 0.5681, + "step": 416 + }, + { + "epoch": 0.20159535895576505, + "grad_norm": 0.4916777572033636, + "learning_rate": 9.390743710973366e-06, + "loss": 0.575, + "step": 417 + }, + { + "epoch": 0.20207880106357265, + "grad_norm": 0.5183550927798377, + "learning_rate": 9.386913929890478e-06, + "loss": 0.57, + "step": 418 + }, + { + "epoch": 0.20256224317138022, + "grad_norm": 0.47362092706218123, + "learning_rate": 9.383072935502018e-06, + "loss": 0.5644, + "step": 419 + }, + { + "epoch": 0.20304568527918782, + "grad_norm": 0.49530019201729136, + "learning_rate": 9.379220737625877e-06, + "loss": 0.564, + "step": 420 + }, + { + "epoch": 0.2035291273869954, + "grad_norm": 0.5108751966700111, + "learning_rate": 9.375357346108583e-06, + "loss": 0.5602, + "step": 421 + }, + { + "epoch": 0.204012569494803, + "grad_norm": 0.5230318233484302, + "learning_rate": 9.371482770825277e-06, + "loss": 0.5695, + "step": 422 + }, + { + "epoch": 0.20449601160261058, + "grad_norm": 0.4802393361720882, + "learning_rate": 9.367597021679686e-06, + "loss": 0.5661, + "step": 423 + }, + { + "epoch": 0.20497945371041817, + "grad_norm": 0.541773712373739, + "learning_rate": 9.363700108604096e-06, + "loss": 0.5582, + "step": 424 + }, + { + "epoch": 0.20546289581822577, + "grad_norm": 0.5110126727655455, + "learning_rate": 9.359792041559334e-06, + "loss": 0.5645, + "step": 425 + }, + { + "epoch": 0.20594633792603337, + "grad_norm": 0.5486480496411716, + "learning_rate": 9.35587283053473e-06, + "loss": 0.5677, + "step": 426 + }, + { + "epoch": 0.20642978003384094, + "grad_norm": 0.5379779057549923, + "learning_rate": 9.351942485548109e-06, + "loss": 0.5435, + "step": 427 + }, + { + "epoch": 0.20691322214164853, + "grad_norm": 0.5341397558862222, + "learning_rate": 9.348001016645744e-06, + "loss": 0.5599, + "step": 428 + }, + { + "epoch": 0.20739666424945613, + "grad_norm": 0.44238086682442823, + "learning_rate": 9.344048433902351e-06, + "loss": 0.541, + "step": 429 + }, + { + "epoch": 0.20788010635726373, + "grad_norm": 0.5213851954927032, + "learning_rate": 9.340084747421048e-06, + "loss": 0.5366, + "step": 430 + }, + { + "epoch": 0.2083635484650713, + "grad_norm": 0.5349032988779688, + "learning_rate": 9.336109967333337e-06, + "loss": 0.5571, + "step": 431 + }, + { + "epoch": 0.2088469905728789, + "grad_norm": 0.4554230771685569, + "learning_rate": 9.332124103799075e-06, + "loss": 0.5516, + "step": 432 + }, + { + "epoch": 0.2093304326806865, + "grad_norm": 0.5021585721937876, + "learning_rate": 9.328127167006457e-06, + "loss": 0.5679, + "step": 433 + }, + { + "epoch": 0.2098138747884941, + "grad_norm": 0.5025134126056662, + "learning_rate": 9.324119167171967e-06, + "loss": 0.5659, + "step": 434 + }, + { + "epoch": 0.21029731689630166, + "grad_norm": 0.48977518403096176, + "learning_rate": 9.320100114540382e-06, + "loss": 0.5753, + "step": 435 + }, + { + "epoch": 0.21078075900410925, + "grad_norm": 0.4789181842167065, + "learning_rate": 9.316070019384722e-06, + "loss": 0.558, + "step": 436 + }, + { + "epoch": 0.21126420111191685, + "grad_norm": 0.48417362744631853, + "learning_rate": 9.312028892006233e-06, + "loss": 0.5637, + "step": 437 + }, + { + "epoch": 0.21174764321972445, + "grad_norm": 0.5040441298097904, + "learning_rate": 9.307976742734366e-06, + "loss": 0.5603, + "step": 438 + }, + { + "epoch": 0.21223108532753202, + "grad_norm": 0.5003182083782678, + "learning_rate": 9.30391358192674e-06, + "loss": 0.5583, + "step": 439 + }, + { + "epoch": 0.2127145274353396, + "grad_norm": 0.5188458903874932, + "learning_rate": 9.299839419969119e-06, + "loss": 0.5614, + "step": 440 + }, + { + "epoch": 0.2131979695431472, + "grad_norm": 0.4990120996823676, + "learning_rate": 9.295754267275393e-06, + "loss": 0.5732, + "step": 441 + }, + { + "epoch": 0.2136814116509548, + "grad_norm": 0.43407580533296863, + "learning_rate": 9.291658134287537e-06, + "loss": 0.5451, + "step": 442 + }, + { + "epoch": 0.21416485375876237, + "grad_norm": 0.487299832131986, + "learning_rate": 9.287551031475604e-06, + "loss": 0.5486, + "step": 443 + }, + { + "epoch": 0.21464829586656997, + "grad_norm": 0.4748601209022523, + "learning_rate": 9.283432969337672e-06, + "loss": 0.5568, + "step": 444 + }, + { + "epoch": 0.21513173797437757, + "grad_norm": 0.5116954397180901, + "learning_rate": 9.279303958399846e-06, + "loss": 0.5561, + "step": 445 + }, + { + "epoch": 0.21561518008218516, + "grad_norm": 0.5103832796562369, + "learning_rate": 9.275164009216205e-06, + "loss": 0.5653, + "step": 446 + }, + { + "epoch": 0.21609862218999276, + "grad_norm": 0.453674255766726, + "learning_rate": 9.271013132368799e-06, + "loss": 0.5359, + "step": 447 + }, + { + "epoch": 0.21658206429780033, + "grad_norm": 0.4865827031825044, + "learning_rate": 9.266851338467598e-06, + "loss": 0.5627, + "step": 448 + }, + { + "epoch": 0.21706550640560793, + "grad_norm": 0.4474998958247519, + "learning_rate": 9.262678638150486e-06, + "loss": 0.5372, + "step": 449 + }, + { + "epoch": 0.21754894851341552, + "grad_norm": 0.5312817145455567, + "learning_rate": 9.258495042083222e-06, + "loss": 0.583, + "step": 450 + }, + { + "epoch": 0.21803239062122312, + "grad_norm": 0.5326646088756841, + "learning_rate": 9.254300560959413e-06, + "loss": 0.5641, + "step": 451 + }, + { + "epoch": 0.2185158327290307, + "grad_norm": 0.47741110714076435, + "learning_rate": 9.25009520550049e-06, + "loss": 0.5692, + "step": 452 + }, + { + "epoch": 0.2189992748368383, + "grad_norm": 0.4992778758439529, + "learning_rate": 9.245878986455684e-06, + "loss": 0.5732, + "step": 453 + }, + { + "epoch": 0.21948271694464588, + "grad_norm": 0.5067531688765293, + "learning_rate": 9.241651914601986e-06, + "loss": 0.5684, + "step": 454 + }, + { + "epoch": 0.21996615905245348, + "grad_norm": 0.5259329600281596, + "learning_rate": 9.237414000744134e-06, + "loss": 0.5728, + "step": 455 + }, + { + "epoch": 0.22044960116026105, + "grad_norm": 0.4912112930780334, + "learning_rate": 9.23316525571458e-06, + "loss": 0.5543, + "step": 456 + }, + { + "epoch": 0.22093304326806865, + "grad_norm": 0.4325116439857764, + "learning_rate": 9.228905690373456e-06, + "loss": 0.5109, + "step": 457 + }, + { + "epoch": 0.22141648537587624, + "grad_norm": 0.5251969417490432, + "learning_rate": 9.224635315608554e-06, + "loss": 0.5613, + "step": 458 + }, + { + "epoch": 0.22189992748368384, + "grad_norm": 0.5371164613513753, + "learning_rate": 9.2203541423353e-06, + "loss": 0.5758, + "step": 459 + }, + { + "epoch": 0.2223833695914914, + "grad_norm": 0.49879877094748626, + "learning_rate": 9.216062181496712e-06, + "loss": 0.5656, + "step": 460 + }, + { + "epoch": 0.222866811699299, + "grad_norm": 0.4666739038962981, + "learning_rate": 9.211759444063392e-06, + "loss": 0.5643, + "step": 461 + }, + { + "epoch": 0.2233502538071066, + "grad_norm": 0.5019702713381807, + "learning_rate": 9.207445941033483e-06, + "loss": 0.5645, + "step": 462 + }, + { + "epoch": 0.2238336959149142, + "grad_norm": 0.560484985437826, + "learning_rate": 9.203121683432646e-06, + "loss": 0.5622, + "step": 463 + }, + { + "epoch": 0.22431713802272177, + "grad_norm": 0.501701537299382, + "learning_rate": 9.19878668231403e-06, + "loss": 0.5686, + "step": 464 + }, + { + "epoch": 0.22480058013052936, + "grad_norm": 0.48640275847390047, + "learning_rate": 9.19444094875825e-06, + "loss": 0.5617, + "step": 465 + }, + { + "epoch": 0.22528402223833696, + "grad_norm": 0.5066662929437282, + "learning_rate": 9.190084493873353e-06, + "loss": 0.5733, + "step": 466 + }, + { + "epoch": 0.22576746434614456, + "grad_norm": 0.5297511031777309, + "learning_rate": 9.185717328794784e-06, + "loss": 0.5632, + "step": 467 + }, + { + "epoch": 0.22625090645395213, + "grad_norm": 0.5778692323663056, + "learning_rate": 9.18133946468537e-06, + "loss": 0.5684, + "step": 468 + }, + { + "epoch": 0.22673434856175972, + "grad_norm": 0.5148715492097395, + "learning_rate": 9.176950912735287e-06, + "loss": 0.5559, + "step": 469 + }, + { + "epoch": 0.22721779066956732, + "grad_norm": 0.5157447753884506, + "learning_rate": 9.172551684162025e-06, + "loss": 0.5731, + "step": 470 + }, + { + "epoch": 0.22770123277737492, + "grad_norm": 0.4783319000473412, + "learning_rate": 9.16814179021037e-06, + "loss": 0.5671, + "step": 471 + }, + { + "epoch": 0.2281846748851825, + "grad_norm": 0.5017422895280137, + "learning_rate": 9.163721242152362e-06, + "loss": 0.5661, + "step": 472 + }, + { + "epoch": 0.22866811699299008, + "grad_norm": 0.49272124386072536, + "learning_rate": 9.159290051287282e-06, + "loss": 0.5627, + "step": 473 + }, + { + "epoch": 0.22915155910079768, + "grad_norm": 0.47471736533769476, + "learning_rate": 9.154848228941607e-06, + "loss": 0.5615, + "step": 474 + }, + { + "epoch": 0.22963500120860528, + "grad_norm": 0.5071884927272643, + "learning_rate": 9.150395786468998e-06, + "loss": 0.5645, + "step": 475 + }, + { + "epoch": 0.23011844331641285, + "grad_norm": 0.48690399925776484, + "learning_rate": 9.14593273525025e-06, + "loss": 0.5647, + "step": 476 + }, + { + "epoch": 0.23060188542422044, + "grad_norm": 0.5041235784595942, + "learning_rate": 9.14145908669329e-06, + "loss": 0.5729, + "step": 477 + }, + { + "epoch": 0.23108532753202804, + "grad_norm": 0.5265161224054821, + "learning_rate": 9.136974852233118e-06, + "loss": 0.5587, + "step": 478 + }, + { + "epoch": 0.23156876963983564, + "grad_norm": 0.4778337324840926, + "learning_rate": 9.132480043331801e-06, + "loss": 0.5646, + "step": 479 + }, + { + "epoch": 0.23205221174764323, + "grad_norm": 0.5036800160533508, + "learning_rate": 9.127974671478432e-06, + "loss": 0.5655, + "step": 480 + }, + { + "epoch": 0.2325356538554508, + "grad_norm": 0.4915164507750186, + "learning_rate": 9.123458748189105e-06, + "loss": 0.5608, + "step": 481 + }, + { + "epoch": 0.2330190959632584, + "grad_norm": 0.4447947403953834, + "learning_rate": 9.118932285006886e-06, + "loss": 0.5254, + "step": 482 + }, + { + "epoch": 0.233502538071066, + "grad_norm": 0.4936810479165672, + "learning_rate": 9.114395293501775e-06, + "loss": 0.5751, + "step": 483 + }, + { + "epoch": 0.2339859801788736, + "grad_norm": 0.4933009245810686, + "learning_rate": 9.10984778527069e-06, + "loss": 0.5603, + "step": 484 + }, + { + "epoch": 0.23446942228668116, + "grad_norm": 0.4720549987110232, + "learning_rate": 9.10528977193743e-06, + "loss": 0.5703, + "step": 485 + }, + { + "epoch": 0.23495286439448876, + "grad_norm": 0.5362136689894559, + "learning_rate": 9.100721265152644e-06, + "loss": 0.5635, + "step": 486 + }, + { + "epoch": 0.23543630650229636, + "grad_norm": 0.47602005538977166, + "learning_rate": 9.096142276593802e-06, + "loss": 0.5721, + "step": 487 + }, + { + "epoch": 0.23591974861010395, + "grad_norm": 0.48887012727323886, + "learning_rate": 9.09155281796517e-06, + "loss": 0.5502, + "step": 488 + }, + { + "epoch": 0.23640319071791152, + "grad_norm": 0.5468866437635687, + "learning_rate": 9.086952900997774e-06, + "loss": 0.5628, + "step": 489 + }, + { + "epoch": 0.23688663282571912, + "grad_norm": 0.468285091758703, + "learning_rate": 9.082342537449369e-06, + "loss": 0.5649, + "step": 490 + }, + { + "epoch": 0.23737007493352671, + "grad_norm": 0.49449575173177474, + "learning_rate": 9.07772173910442e-06, + "loss": 0.5363, + "step": 491 + }, + { + "epoch": 0.2378535170413343, + "grad_norm": 0.5665277859908898, + "learning_rate": 9.073090517774057e-06, + "loss": 0.5679, + "step": 492 + }, + { + "epoch": 0.23833695914914188, + "grad_norm": 0.559218042712036, + "learning_rate": 9.068448885296057e-06, + "loss": 0.5598, + "step": 493 + }, + { + "epoch": 0.23882040125694948, + "grad_norm": 0.5572180299965971, + "learning_rate": 9.063796853534808e-06, + "loss": 0.5606, + "step": 494 + }, + { + "epoch": 0.23930384336475707, + "grad_norm": 0.4852501650353095, + "learning_rate": 9.059134434381274e-06, + "loss": 0.5614, + "step": 495 + }, + { + "epoch": 0.23978728547256467, + "grad_norm": 0.5235782249928449, + "learning_rate": 9.054461639752976e-06, + "loss": 0.5637, + "step": 496 + }, + { + "epoch": 0.24027072758037224, + "grad_norm": 0.5028533022976227, + "learning_rate": 9.049778481593954e-06, + "loss": 0.5718, + "step": 497 + }, + { + "epoch": 0.24075416968817984, + "grad_norm": 0.508045864936268, + "learning_rate": 9.045084971874738e-06, + "loss": 0.5651, + "step": 498 + }, + { + "epoch": 0.24123761179598743, + "grad_norm": 0.5575870011120908, + "learning_rate": 9.040381122592317e-06, + "loss": 0.565, + "step": 499 + }, + { + "epoch": 0.24172105390379503, + "grad_norm": 0.5201685839473924, + "learning_rate": 9.035666945770107e-06, + "loss": 0.5593, + "step": 500 + }, + { + "epoch": 0.2422044960116026, + "grad_norm": 0.48179233555943923, + "learning_rate": 9.030942453457928e-06, + "loss": 0.5199, + "step": 501 + }, + { + "epoch": 0.2426879381194102, + "grad_norm": 0.4867208952029737, + "learning_rate": 9.02620765773196e-06, + "loss": 0.5548, + "step": 502 + }, + { + "epoch": 0.2431713802272178, + "grad_norm": 0.5240394440690106, + "learning_rate": 9.02146257069472e-06, + "loss": 0.5611, + "step": 503 + }, + { + "epoch": 0.2436548223350254, + "grad_norm": 0.48307750050965703, + "learning_rate": 9.01670720447504e-06, + "loss": 0.5577, + "step": 504 + }, + { + "epoch": 0.24413826444283296, + "grad_norm": 0.5034030614527921, + "learning_rate": 9.011941571228015e-06, + "loss": 0.5608, + "step": 505 + }, + { + "epoch": 0.24462170655064056, + "grad_norm": 0.46379490536223517, + "learning_rate": 9.007165683134986e-06, + "loss": 0.5315, + "step": 506 + }, + { + "epoch": 0.24510514865844815, + "grad_norm": 0.5103811282689319, + "learning_rate": 9.00237955240351e-06, + "loss": 0.5613, + "step": 507 + }, + { + "epoch": 0.24558859076625575, + "grad_norm": 0.47564392120255755, + "learning_rate": 8.997583191267326e-06, + "loss": 0.5764, + "step": 508 + }, + { + "epoch": 0.24607203287406332, + "grad_norm": 0.4811799201923712, + "learning_rate": 8.992776611986313e-06, + "loss": 0.5704, + "step": 509 + }, + { + "epoch": 0.24655547498187091, + "grad_norm": 0.4799439081762819, + "learning_rate": 8.987959826846479e-06, + "loss": 0.5573, + "step": 510 + }, + { + "epoch": 0.2470389170896785, + "grad_norm": 0.5219349618857427, + "learning_rate": 8.983132848159916e-06, + "loss": 0.5583, + "step": 511 + }, + { + "epoch": 0.2475223591974861, + "grad_norm": 0.5010818591918965, + "learning_rate": 8.978295688264768e-06, + "loss": 0.5699, + "step": 512 + }, + { + "epoch": 0.2480058013052937, + "grad_norm": 0.5282819201955711, + "learning_rate": 8.973448359525207e-06, + "loss": 0.5641, + "step": 513 + }, + { + "epoch": 0.24848924341310127, + "grad_norm": 0.5025819972323563, + "learning_rate": 8.968590874331395e-06, + "loss": 0.5649, + "step": 514 + }, + { + "epoch": 0.24897268552090887, + "grad_norm": 0.4880024154213522, + "learning_rate": 8.963723245099456e-06, + "loss": 0.5533, + "step": 515 + }, + { + "epoch": 0.24945612762871647, + "grad_norm": 0.4844265343558768, + "learning_rate": 8.958845484271443e-06, + "loss": 0.5571, + "step": 516 + }, + { + "epoch": 0.24993956973652406, + "grad_norm": 0.4918270286134992, + "learning_rate": 8.953957604315306e-06, + "loss": 0.5612, + "step": 517 + }, + { + "epoch": 0.25042301184433163, + "grad_norm": 0.4532098318099568, + "learning_rate": 8.949059617724859e-06, + "loss": 0.5532, + "step": 518 + }, + { + "epoch": 0.25090645395213923, + "grad_norm": 0.4784777680132966, + "learning_rate": 8.944151537019752e-06, + "loss": 0.5314, + "step": 519 + }, + { + "epoch": 0.2513898960599468, + "grad_norm": 0.49834032614411844, + "learning_rate": 8.939233374745432e-06, + "loss": 0.561, + "step": 520 + }, + { + "epoch": 0.2518733381677544, + "grad_norm": 0.43922831313439964, + "learning_rate": 8.934305143473123e-06, + "loss": 0.5229, + "step": 521 + }, + { + "epoch": 0.252356780275562, + "grad_norm": 0.4774051999235377, + "learning_rate": 8.929366855799777e-06, + "loss": 0.5584, + "step": 522 + }, + { + "epoch": 0.2528402223833696, + "grad_norm": 0.4860585540987837, + "learning_rate": 8.924418524348058e-06, + "loss": 0.5722, + "step": 523 + }, + { + "epoch": 0.25332366449117716, + "grad_norm": 0.476115105724116, + "learning_rate": 8.919460161766299e-06, + "loss": 0.5527, + "step": 524 + }, + { + "epoch": 0.25380710659898476, + "grad_norm": 0.49670836036646415, + "learning_rate": 8.914491780728471e-06, + "loss": 0.565, + "step": 525 + }, + { + "epoch": 0.25429054870679235, + "grad_norm": 0.49705890206049747, + "learning_rate": 8.909513393934162e-06, + "loss": 0.5562, + "step": 526 + }, + { + "epoch": 0.25477399081459995, + "grad_norm": 0.5118474736649574, + "learning_rate": 8.904525014108529e-06, + "loss": 0.5536, + "step": 527 + }, + { + "epoch": 0.25525743292240755, + "grad_norm": 0.5301718242423505, + "learning_rate": 8.899526654002268e-06, + "loss": 0.5612, + "step": 528 + }, + { + "epoch": 0.25574087503021514, + "grad_norm": 0.4796891269551852, + "learning_rate": 8.894518326391595e-06, + "loss": 0.5578, + "step": 529 + }, + { + "epoch": 0.25622431713802274, + "grad_norm": 0.4825310469483714, + "learning_rate": 8.889500044078199e-06, + "loss": 0.5554, + "step": 530 + }, + { + "epoch": 0.25670775924583034, + "grad_norm": 0.474416307358851, + "learning_rate": 8.88447181988921e-06, + "loss": 0.5466, + "step": 531 + }, + { + "epoch": 0.2571912013536379, + "grad_norm": 0.4754427571901456, + "learning_rate": 8.87943366667718e-06, + "loss": 0.5232, + "step": 532 + }, + { + "epoch": 0.2576746434614455, + "grad_norm": 0.5283380707149146, + "learning_rate": 8.87438559732003e-06, + "loss": 0.5575, + "step": 533 + }, + { + "epoch": 0.25815808556925307, + "grad_norm": 0.49022668890084664, + "learning_rate": 8.869327624721033e-06, + "loss": 0.5584, + "step": 534 + }, + { + "epoch": 0.25864152767706067, + "grad_norm": 0.45329648879294543, + "learning_rate": 8.864259761808778e-06, + "loss": 0.5557, + "step": 535 + }, + { + "epoch": 0.25912496978486826, + "grad_norm": 0.5150923796193744, + "learning_rate": 8.859182021537126e-06, + "loss": 0.5672, + "step": 536 + }, + { + "epoch": 0.25960841189267586, + "grad_norm": 0.49475203737919254, + "learning_rate": 8.854094416885192e-06, + "loss": 0.5513, + "step": 537 + }, + { + "epoch": 0.26009185400048346, + "grad_norm": 0.48640723658571816, + "learning_rate": 8.848996960857308e-06, + "loss": 0.5542, + "step": 538 + }, + { + "epoch": 0.26057529610829105, + "grad_norm": 0.5011403090647114, + "learning_rate": 8.843889666482977e-06, + "loss": 0.5503, + "step": 539 + }, + { + "epoch": 0.2610587382160986, + "grad_norm": 0.45868293065964316, + "learning_rate": 8.838772546816857e-06, + "loss": 0.5245, + "step": 540 + }, + { + "epoch": 0.2615421803239062, + "grad_norm": 0.5028670832415251, + "learning_rate": 8.833645614938716e-06, + "loss": 0.563, + "step": 541 + }, + { + "epoch": 0.2620256224317138, + "grad_norm": 0.4652958998559184, + "learning_rate": 8.82850888395341e-06, + "loss": 0.5214, + "step": 542 + }, + { + "epoch": 0.2625090645395214, + "grad_norm": 0.48814680090193757, + "learning_rate": 8.823362366990833e-06, + "loss": 0.5539, + "step": 543 + }, + { + "epoch": 0.262992506647329, + "grad_norm": 0.4686742850265713, + "learning_rate": 8.818206077205899e-06, + "loss": 0.5432, + "step": 544 + }, + { + "epoch": 0.2634759487551366, + "grad_norm": 0.44177435010013455, + "learning_rate": 8.8130400277785e-06, + "loss": 0.5432, + "step": 545 + }, + { + "epoch": 0.2639593908629442, + "grad_norm": 0.48745119596264225, + "learning_rate": 8.807864231913475e-06, + "loss": 0.5609, + "step": 546 + }, + { + "epoch": 0.2644428329707518, + "grad_norm": 0.5387031701921053, + "learning_rate": 8.802678702840575e-06, + "loss": 0.5608, + "step": 547 + }, + { + "epoch": 0.26492627507855937, + "grad_norm": 0.47706550642594997, + "learning_rate": 8.79748345381443e-06, + "loss": 0.5487, + "step": 548 + }, + { + "epoch": 0.2654097171863669, + "grad_norm": 0.4694250929319588, + "learning_rate": 8.792278498114517e-06, + "loss": 0.549, + "step": 549 + }, + { + "epoch": 0.2658931592941745, + "grad_norm": 0.4937111232536657, + "learning_rate": 8.78706384904512e-06, + "loss": 0.5564, + "step": 550 + }, + { + "epoch": 0.2663766014019821, + "grad_norm": 0.468357648344204, + "learning_rate": 8.7818395199353e-06, + "loss": 0.5546, + "step": 551 + }, + { + "epoch": 0.2668600435097897, + "grad_norm": 0.4713572915315673, + "learning_rate": 8.77660552413887e-06, + "loss": 0.5512, + "step": 552 + }, + { + "epoch": 0.2673434856175973, + "grad_norm": 0.4740909938486332, + "learning_rate": 8.77136187503434e-06, + "loss": 0.5631, + "step": 553 + }, + { + "epoch": 0.2678269277254049, + "grad_norm": 0.4472174307551216, + "learning_rate": 8.766108586024904e-06, + "loss": 0.5222, + "step": 554 + }, + { + "epoch": 0.2683103698332125, + "grad_norm": 0.4548110236983466, + "learning_rate": 8.760845670538387e-06, + "loss": 0.5485, + "step": 555 + }, + { + "epoch": 0.2687938119410201, + "grad_norm": 0.5173119662805489, + "learning_rate": 8.755573142027228e-06, + "loss": 0.5624, + "step": 556 + }, + { + "epoch": 0.26927725404882763, + "grad_norm": 0.4812632123799694, + "learning_rate": 8.750291013968432e-06, + "loss": 0.5562, + "step": 557 + }, + { + "epoch": 0.2697606961566352, + "grad_norm": 0.472663174890125, + "learning_rate": 8.744999299863549e-06, + "loss": 0.5669, + "step": 558 + }, + { + "epoch": 0.2702441382644428, + "grad_norm": 0.463122081686998, + "learning_rate": 8.739698013238625e-06, + "loss": 0.557, + "step": 559 + }, + { + "epoch": 0.2707275803722504, + "grad_norm": 0.5188284707009508, + "learning_rate": 8.734387167644171e-06, + "loss": 0.5202, + "step": 560 + }, + { + "epoch": 0.271211022480058, + "grad_norm": 0.49659334079030504, + "learning_rate": 8.729066776655144e-06, + "loss": 0.5605, + "step": 561 + }, + { + "epoch": 0.2716944645878656, + "grad_norm": 0.4433765304016552, + "learning_rate": 8.723736853870888e-06, + "loss": 0.5193, + "step": 562 + }, + { + "epoch": 0.2721779066956732, + "grad_norm": 0.46285084832800716, + "learning_rate": 8.718397412915114e-06, + "loss": 0.5583, + "step": 563 + }, + { + "epoch": 0.2726613488034808, + "grad_norm": 0.4406166472711255, + "learning_rate": 8.713048467435865e-06, + "loss": 0.5365, + "step": 564 + }, + { + "epoch": 0.27314479091128835, + "grad_norm": 0.49591339367367465, + "learning_rate": 8.707690031105478e-06, + "loss": 0.5638, + "step": 565 + }, + { + "epoch": 0.27362823301909595, + "grad_norm": 0.47717175741546425, + "learning_rate": 8.702322117620547e-06, + "loss": 0.5375, + "step": 566 + }, + { + "epoch": 0.27411167512690354, + "grad_norm": 0.49399204569955096, + "learning_rate": 8.696944740701891e-06, + "loss": 0.5502, + "step": 567 + }, + { + "epoch": 0.27459511723471114, + "grad_norm": 0.462084403002843, + "learning_rate": 8.69155791409452e-06, + "loss": 0.549, + "step": 568 + }, + { + "epoch": 0.27507855934251874, + "grad_norm": 0.4733870628371529, + "learning_rate": 8.686161651567596e-06, + "loss": 0.5479, + "step": 569 + }, + { + "epoch": 0.27556200145032633, + "grad_norm": 0.4586305030542931, + "learning_rate": 8.6807559669144e-06, + "loss": 0.517, + "step": 570 + }, + { + "epoch": 0.27604544355813393, + "grad_norm": 0.4786603573138254, + "learning_rate": 8.6753408739523e-06, + "loss": 0.5449, + "step": 571 + }, + { + "epoch": 0.2765288856659415, + "grad_norm": 0.4923356872720239, + "learning_rate": 8.669916386522708e-06, + "loss": 0.5516, + "step": 572 + }, + { + "epoch": 0.27701232777374907, + "grad_norm": 0.47497918747290174, + "learning_rate": 8.664482518491053e-06, + "loss": 0.5527, + "step": 573 + }, + { + "epoch": 0.27749576988155666, + "grad_norm": 0.5463551243922615, + "learning_rate": 8.659039283746738e-06, + "loss": 0.5528, + "step": 574 + }, + { + "epoch": 0.27797921198936426, + "grad_norm": 0.5125817786426824, + "learning_rate": 8.653586696203111e-06, + "loss": 0.5428, + "step": 575 + }, + { + "epoch": 0.27846265409717186, + "grad_norm": 0.46930359618316736, + "learning_rate": 8.648124769797424e-06, + "loss": 0.5566, + "step": 576 + }, + { + "epoch": 0.27894609620497945, + "grad_norm": 0.48871061545968875, + "learning_rate": 8.6426535184908e-06, + "loss": 0.5517, + "step": 577 + }, + { + "epoch": 0.27942953831278705, + "grad_norm": 0.49517469382405177, + "learning_rate": 8.637172956268203e-06, + "loss": 0.5537, + "step": 578 + }, + { + "epoch": 0.27991298042059465, + "grad_norm": 0.46885534036424203, + "learning_rate": 8.631683097138386e-06, + "loss": 0.5455, + "step": 579 + }, + { + "epoch": 0.28039642252840224, + "grad_norm": 0.4404595141316285, + "learning_rate": 8.626183955133876e-06, + "loss": 0.5216, + "step": 580 + }, + { + "epoch": 0.2808798646362098, + "grad_norm": 0.45805738086780906, + "learning_rate": 8.620675544310921e-06, + "loss": 0.5483, + "step": 581 + }, + { + "epoch": 0.2813633067440174, + "grad_norm": 0.43283074014496, + "learning_rate": 8.615157878749462e-06, + "loss": 0.546, + "step": 582 + }, + { + "epoch": 0.281846748851825, + "grad_norm": 0.45714306807295113, + "learning_rate": 8.609630972553098e-06, + "loss": 0.5521, + "step": 583 + }, + { + "epoch": 0.2823301909596326, + "grad_norm": 0.47311376331001226, + "learning_rate": 8.604094839849047e-06, + "loss": 0.5586, + "step": 584 + }, + { + "epoch": 0.2828136330674402, + "grad_norm": 0.4424955765808361, + "learning_rate": 8.598549494788111e-06, + "loss": 0.5384, + "step": 585 + }, + { + "epoch": 0.28329707517524777, + "grad_norm": 0.467505480407099, + "learning_rate": 8.592994951544637e-06, + "loss": 0.5368, + "step": 586 + }, + { + "epoch": 0.28378051728305537, + "grad_norm": 0.48553503600686004, + "learning_rate": 8.587431224316488e-06, + "loss": 0.5475, + "step": 587 + }, + { + "epoch": 0.28426395939086296, + "grad_norm": 0.46529868946828945, + "learning_rate": 8.581858327324996e-06, + "loss": 0.5212, + "step": 588 + }, + { + "epoch": 0.28474740149867056, + "grad_norm": 0.4898248932325677, + "learning_rate": 8.576276274814936e-06, + "loss": 0.553, + "step": 589 + }, + { + "epoch": 0.2852308436064781, + "grad_norm": 0.4455201034159363, + "learning_rate": 8.570685081054487e-06, + "loss": 0.5216, + "step": 590 + }, + { + "epoch": 0.2857142857142857, + "grad_norm": 0.46516198660507346, + "learning_rate": 8.565084760335188e-06, + "loss": 0.5505, + "step": 591 + }, + { + "epoch": 0.2861977278220933, + "grad_norm": 0.5006509760317717, + "learning_rate": 8.559475326971907e-06, + "loss": 0.551, + "step": 592 + }, + { + "epoch": 0.2866811699299009, + "grad_norm": 0.4994980078510237, + "learning_rate": 8.553856795302815e-06, + "loss": 0.5421, + "step": 593 + }, + { + "epoch": 0.2871646120377085, + "grad_norm": 0.5029150812228765, + "learning_rate": 8.548229179689325e-06, + "loss": 0.5519, + "step": 594 + }, + { + "epoch": 0.2876480541455161, + "grad_norm": 0.5073270485472724, + "learning_rate": 8.54259249451608e-06, + "loss": 0.5537, + "step": 595 + }, + { + "epoch": 0.2881314962533237, + "grad_norm": 0.45709172284548705, + "learning_rate": 8.536946754190903e-06, + "loss": 0.564, + "step": 596 + }, + { + "epoch": 0.2886149383611313, + "grad_norm": 0.47174309410425874, + "learning_rate": 8.531291973144755e-06, + "loss": 0.5452, + "step": 597 + }, + { + "epoch": 0.2890983804689388, + "grad_norm": 0.46639713589843634, + "learning_rate": 8.52562816583172e-06, + "loss": 0.5509, + "step": 598 + }, + { + "epoch": 0.2895818225767464, + "grad_norm": 0.4508036851803557, + "learning_rate": 8.519955346728939e-06, + "loss": 0.5428, + "step": 599 + }, + { + "epoch": 0.290065264684554, + "grad_norm": 0.44468353218524803, + "learning_rate": 8.5142735303366e-06, + "loss": 0.5205, + "step": 600 + }, + { + "epoch": 0.2905487067923616, + "grad_norm": 0.4850164433619974, + "learning_rate": 8.50858273117788e-06, + "loss": 0.5476, + "step": 601 + }, + { + "epoch": 0.2910321489001692, + "grad_norm": 0.49303870805000655, + "learning_rate": 8.502882963798923e-06, + "loss": 0.545, + "step": 602 + }, + { + "epoch": 0.2915155910079768, + "grad_norm": 0.47572858582093197, + "learning_rate": 8.497174242768792e-06, + "loss": 0.5515, + "step": 603 + }, + { + "epoch": 0.2919990331157844, + "grad_norm": 0.5284607359345597, + "learning_rate": 8.49145658267944e-06, + "loss": 0.5453, + "step": 604 + }, + { + "epoch": 0.292482475223592, + "grad_norm": 0.47829654266425203, + "learning_rate": 8.485729998145665e-06, + "loss": 0.5452, + "step": 605 + }, + { + "epoch": 0.29296591733139954, + "grad_norm": 0.4503645291799449, + "learning_rate": 8.479994503805079e-06, + "loss": 0.5536, + "step": 606 + }, + { + "epoch": 0.29344935943920714, + "grad_norm": 0.4693738299713831, + "learning_rate": 8.474250114318066e-06, + "loss": 0.5216, + "step": 607 + }, + { + "epoch": 0.29393280154701473, + "grad_norm": 0.4988674830387375, + "learning_rate": 8.468496844367752e-06, + "loss": 0.5582, + "step": 608 + }, + { + "epoch": 0.29441624365482233, + "grad_norm": 0.47627140431869974, + "learning_rate": 8.462734708659959e-06, + "loss": 0.5511, + "step": 609 + }, + { + "epoch": 0.2948996857626299, + "grad_norm": 0.43233992742433075, + "learning_rate": 8.456963721923166e-06, + "loss": 0.5279, + "step": 610 + }, + { + "epoch": 0.2953831278704375, + "grad_norm": 0.4908070311501362, + "learning_rate": 8.451183898908484e-06, + "loss": 0.5546, + "step": 611 + }, + { + "epoch": 0.2958665699782451, + "grad_norm": 0.4519643584485447, + "learning_rate": 8.445395254389605e-06, + "loss": 0.5221, + "step": 612 + }, + { + "epoch": 0.2963500120860527, + "grad_norm": 0.48396713453490725, + "learning_rate": 8.439597803162773e-06, + "loss": 0.5489, + "step": 613 + }, + { + "epoch": 0.29683345419386026, + "grad_norm": 0.4611763742603572, + "learning_rate": 8.433791560046737e-06, + "loss": 0.5457, + "step": 614 + }, + { + "epoch": 0.29731689630166785, + "grad_norm": 0.472544396347692, + "learning_rate": 8.427976539882725e-06, + "loss": 0.5553, + "step": 615 + }, + { + "epoch": 0.29780033840947545, + "grad_norm": 0.5058827141310254, + "learning_rate": 8.422152757534395e-06, + "loss": 0.5435, + "step": 616 + }, + { + "epoch": 0.29828378051728305, + "grad_norm": 0.4766589825937423, + "learning_rate": 8.416320227887805e-06, + "loss": 0.5526, + "step": 617 + }, + { + "epoch": 0.29876722262509064, + "grad_norm": 0.47223702801719897, + "learning_rate": 8.410478965851371e-06, + "loss": 0.5542, + "step": 618 + }, + { + "epoch": 0.29925066473289824, + "grad_norm": 0.4819039683875086, + "learning_rate": 8.404628986355832e-06, + "loss": 0.5546, + "step": 619 + }, + { + "epoch": 0.29973410684070584, + "grad_norm": 0.49462386708237827, + "learning_rate": 8.398770304354203e-06, + "loss": 0.5566, + "step": 620 + }, + { + "epoch": 0.30021754894851344, + "grad_norm": 0.4977376021667819, + "learning_rate": 8.39290293482175e-06, + "loss": 0.5508, + "step": 621 + }, + { + "epoch": 0.30070099105632103, + "grad_norm": 0.506465713525892, + "learning_rate": 8.387026892755942e-06, + "loss": 0.5568, + "step": 622 + }, + { + "epoch": 0.3011844331641286, + "grad_norm": 0.4712688161265267, + "learning_rate": 8.381142193176414e-06, + "loss": 0.5489, + "step": 623 + }, + { + "epoch": 0.30166787527193617, + "grad_norm": 0.4615215470431895, + "learning_rate": 8.375248851124937e-06, + "loss": 0.5554, + "step": 624 + }, + { + "epoch": 0.30215131737974377, + "grad_norm": 0.5110895222198079, + "learning_rate": 8.369346881665364e-06, + "loss": 0.5466, + "step": 625 + }, + { + "epoch": 0.30263475948755136, + "grad_norm": 0.47157470051165545, + "learning_rate": 8.363436299883604e-06, + "loss": 0.5644, + "step": 626 + }, + { + "epoch": 0.30311820159535896, + "grad_norm": 0.4789841431133952, + "learning_rate": 8.357517120887586e-06, + "loss": 0.5493, + "step": 627 + }, + { + "epoch": 0.30360164370316656, + "grad_norm": 0.47629409809645545, + "learning_rate": 8.351589359807204e-06, + "loss": 0.5523, + "step": 628 + }, + { + "epoch": 0.30408508581097415, + "grad_norm": 0.4618925314784255, + "learning_rate": 8.345653031794292e-06, + "loss": 0.5348, + "step": 629 + }, + { + "epoch": 0.30456852791878175, + "grad_norm": 0.500646417496574, + "learning_rate": 8.339708152022586e-06, + "loss": 0.554, + "step": 630 + }, + { + "epoch": 0.3050519700265893, + "grad_norm": 0.4553680483630167, + "learning_rate": 8.333754735687677e-06, + "loss": 0.5489, + "step": 631 + }, + { + "epoch": 0.3055354121343969, + "grad_norm": 0.4591000336987377, + "learning_rate": 8.327792798006977e-06, + "loss": 0.5508, + "step": 632 + }, + { + "epoch": 0.3060188542422045, + "grad_norm": 0.48696012413599493, + "learning_rate": 8.321822354219677e-06, + "loss": 0.5505, + "step": 633 + }, + { + "epoch": 0.3065022963500121, + "grad_norm": 0.4952413093498077, + "learning_rate": 8.315843419586717e-06, + "loss": 0.5574, + "step": 634 + }, + { + "epoch": 0.3069857384578197, + "grad_norm": 0.4716786308005616, + "learning_rate": 8.309856009390732e-06, + "loss": 0.5281, + "step": 635 + }, + { + "epoch": 0.3074691805656273, + "grad_norm": 0.5207738583309734, + "learning_rate": 8.303860138936027e-06, + "loss": 0.5607, + "step": 636 + }, + { + "epoch": 0.3079526226734349, + "grad_norm": 0.5226978234399785, + "learning_rate": 8.297855823548528e-06, + "loss": 0.5565, + "step": 637 + }, + { + "epoch": 0.30843606478124247, + "grad_norm": 0.49251100209183046, + "learning_rate": 8.291843078575752e-06, + "loss": 0.5485, + "step": 638 + }, + { + "epoch": 0.30891950688905, + "grad_norm": 0.4769824051475033, + "learning_rate": 8.285821919386758e-06, + "loss": 0.5456, + "step": 639 + }, + { + "epoch": 0.3094029489968576, + "grad_norm": 0.503019530780954, + "learning_rate": 8.279792361372114e-06, + "loss": 0.5602, + "step": 640 + }, + { + "epoch": 0.3098863911046652, + "grad_norm": 0.48405162661408385, + "learning_rate": 8.273754419943856e-06, + "loss": 0.5536, + "step": 641 + }, + { + "epoch": 0.3103698332124728, + "grad_norm": 0.4657304337869963, + "learning_rate": 8.267708110535449e-06, + "loss": 0.5477, + "step": 642 + }, + { + "epoch": 0.3108532753202804, + "grad_norm": 0.5106373655355231, + "learning_rate": 8.26165344860175e-06, + "loss": 0.571, + "step": 643 + }, + { + "epoch": 0.311336717428088, + "grad_norm": 0.4854760780132044, + "learning_rate": 8.255590449618958e-06, + "loss": 0.546, + "step": 644 + }, + { + "epoch": 0.3118201595358956, + "grad_norm": 0.4817908473273075, + "learning_rate": 8.24951912908459e-06, + "loss": 0.5446, + "step": 645 + }, + { + "epoch": 0.3123036016437032, + "grad_norm": 0.4792564537130554, + "learning_rate": 8.243439502517432e-06, + "loss": 0.5352, + "step": 646 + }, + { + "epoch": 0.31278704375151073, + "grad_norm": 0.5188490831185355, + "learning_rate": 8.237351585457499e-06, + "loss": 0.5298, + "step": 647 + }, + { + "epoch": 0.3132704858593183, + "grad_norm": 0.5232755336111542, + "learning_rate": 8.231255393465993e-06, + "loss": 0.5387, + "step": 648 + }, + { + "epoch": 0.3137539279671259, + "grad_norm": 0.48933101067554713, + "learning_rate": 8.225150942125278e-06, + "loss": 0.5156, + "step": 649 + }, + { + "epoch": 0.3142373700749335, + "grad_norm": 0.47579138598403903, + "learning_rate": 8.21903824703882e-06, + "loss": 0.552, + "step": 650 + }, + { + "epoch": 0.3147208121827411, + "grad_norm": 0.47742859766681844, + "learning_rate": 8.21291732383116e-06, + "loss": 0.5498, + "step": 651 + }, + { + "epoch": 0.3152042542905487, + "grad_norm": 0.5282098462854927, + "learning_rate": 8.206788188147874e-06, + "loss": 0.5327, + "step": 652 + }, + { + "epoch": 0.3156876963983563, + "grad_norm": 0.4655983753785802, + "learning_rate": 8.200650855655525e-06, + "loss": 0.5523, + "step": 653 + }, + { + "epoch": 0.3161711385061639, + "grad_norm": 0.46598993965098007, + "learning_rate": 8.19450534204163e-06, + "loss": 0.5428, + "step": 654 + }, + { + "epoch": 0.3166545806139715, + "grad_norm": 0.48320040727215685, + "learning_rate": 8.188351663014615e-06, + "loss": 0.5511, + "step": 655 + }, + { + "epoch": 0.31713802272177904, + "grad_norm": 0.4851268795547935, + "learning_rate": 8.182189834303783e-06, + "loss": 0.5515, + "step": 656 + }, + { + "epoch": 0.31762146482958664, + "grad_norm": 0.4829311813743368, + "learning_rate": 8.176019871659263e-06, + "loss": 0.5425, + "step": 657 + }, + { + "epoch": 0.31810490693739424, + "grad_norm": 0.4268110510337058, + "learning_rate": 8.169841790851976e-06, + "loss": 0.5192, + "step": 658 + }, + { + "epoch": 0.31858834904520184, + "grad_norm": 0.46970357309915234, + "learning_rate": 8.163655607673594e-06, + "loss": 0.5516, + "step": 659 + }, + { + "epoch": 0.31907179115300943, + "grad_norm": 0.4688205789040297, + "learning_rate": 8.157461337936506e-06, + "loss": 0.5398, + "step": 660 + }, + { + "epoch": 0.31955523326081703, + "grad_norm": 0.49966496418563966, + "learning_rate": 8.151258997473757e-06, + "loss": 0.5501, + "step": 661 + }, + { + "epoch": 0.3200386753686246, + "grad_norm": 0.4715831395525512, + "learning_rate": 8.145048602139031e-06, + "loss": 0.5473, + "step": 662 + }, + { + "epoch": 0.3205221174764322, + "grad_norm": 0.46025918766438206, + "learning_rate": 8.138830167806601e-06, + "loss": 0.5481, + "step": 663 + }, + { + "epoch": 0.32100555958423976, + "grad_norm": 0.49250849769551697, + "learning_rate": 8.132603710371287e-06, + "loss": 0.5563, + "step": 664 + }, + { + "epoch": 0.32148900169204736, + "grad_norm": 0.46277397720994495, + "learning_rate": 8.126369245748413e-06, + "loss": 0.5418, + "step": 665 + }, + { + "epoch": 0.32197244379985496, + "grad_norm": 0.44842320811529324, + "learning_rate": 8.120126789873775e-06, + "loss": 0.549, + "step": 666 + }, + { + "epoch": 0.32245588590766255, + "grad_norm": 0.4487718178782243, + "learning_rate": 8.113876358703593e-06, + "loss": 0.5515, + "step": 667 + }, + { + "epoch": 0.32293932801547015, + "grad_norm": 0.49737040438900676, + "learning_rate": 8.10761796821447e-06, + "loss": 0.5529, + "step": 668 + }, + { + "epoch": 0.32342277012327775, + "grad_norm": 0.5088088437400782, + "learning_rate": 8.10135163440336e-06, + "loss": 0.5507, + "step": 669 + }, + { + "epoch": 0.32390621223108534, + "grad_norm": 0.5221100660415426, + "learning_rate": 8.095077373287517e-06, + "loss": 0.5363, + "step": 670 + }, + { + "epoch": 0.32438965433889294, + "grad_norm": 0.5098038198929602, + "learning_rate": 8.088795200904457e-06, + "loss": 0.5443, + "step": 671 + }, + { + "epoch": 0.3248730964467005, + "grad_norm": 0.5299548080054053, + "learning_rate": 8.08250513331192e-06, + "loss": 0.5547, + "step": 672 + }, + { + "epoch": 0.3253565385545081, + "grad_norm": 0.47991648628747413, + "learning_rate": 8.076207186587826e-06, + "loss": 0.552, + "step": 673 + }, + { + "epoch": 0.3258399806623157, + "grad_norm": 0.4928995313967277, + "learning_rate": 8.069901376830232e-06, + "loss": 0.5449, + "step": 674 + }, + { + "epoch": 0.3263234227701233, + "grad_norm": 0.526245201002504, + "learning_rate": 8.063587720157298e-06, + "loss": 0.5544, + "step": 675 + }, + { + "epoch": 0.32680686487793087, + "grad_norm": 0.5169185895561939, + "learning_rate": 8.057266232707239e-06, + "loss": 0.5388, + "step": 676 + }, + { + "epoch": 0.32729030698573847, + "grad_norm": 0.45862190884382065, + "learning_rate": 8.050936930638285e-06, + "loss": 0.5523, + "step": 677 + }, + { + "epoch": 0.32777374909354606, + "grad_norm": 0.4791194354627634, + "learning_rate": 8.044599830128643e-06, + "loss": 0.5498, + "step": 678 + }, + { + "epoch": 0.32825719120135366, + "grad_norm": 0.5040011739287719, + "learning_rate": 8.038254947376454e-06, + "loss": 0.5378, + "step": 679 + }, + { + "epoch": 0.3287406333091612, + "grad_norm": 0.42346684737245893, + "learning_rate": 8.03190229859975e-06, + "loss": 0.5541, + "step": 680 + }, + { + "epoch": 0.3292240754169688, + "grad_norm": 0.48225697444636256, + "learning_rate": 8.02554190003641e-06, + "loss": 0.5505, + "step": 681 + }, + { + "epoch": 0.3297075175247764, + "grad_norm": 0.480200233217211, + "learning_rate": 8.019173767944128e-06, + "loss": 0.5563, + "step": 682 + }, + { + "epoch": 0.330190959632584, + "grad_norm": 0.4406037883552503, + "learning_rate": 8.012797918600363e-06, + "loss": 0.5241, + "step": 683 + }, + { + "epoch": 0.3306744017403916, + "grad_norm": 0.4838913486529156, + "learning_rate": 8.006414368302297e-06, + "loss": 0.5251, + "step": 684 + }, + { + "epoch": 0.3311578438481992, + "grad_norm": 0.45454190895682295, + "learning_rate": 8.000023133366804e-06, + "loss": 0.5449, + "step": 685 + }, + { + "epoch": 0.3316412859560068, + "grad_norm": 0.49869890620532237, + "learning_rate": 7.99362423013039e-06, + "loss": 0.5401, + "step": 686 + }, + { + "epoch": 0.3321247280638144, + "grad_norm": 0.4727231220514769, + "learning_rate": 7.98721767494917e-06, + "loss": 0.5381, + "step": 687 + }, + { + "epoch": 0.332608170171622, + "grad_norm": 0.46944667758244535, + "learning_rate": 7.980803484198817e-06, + "loss": 0.5542, + "step": 688 + }, + { + "epoch": 0.3330916122794295, + "grad_norm": 0.4643616722232514, + "learning_rate": 7.974381674274517e-06, + "loss": 0.5394, + "step": 689 + }, + { + "epoch": 0.3335750543872371, + "grad_norm": 0.4529493856728362, + "learning_rate": 7.967952261590936e-06, + "loss": 0.5478, + "step": 690 + }, + { + "epoch": 0.3340584964950447, + "grad_norm": 0.4497900124215144, + "learning_rate": 7.961515262582168e-06, + "loss": 0.5387, + "step": 691 + }, + { + "epoch": 0.3345419386028523, + "grad_norm": 0.4613195703294155, + "learning_rate": 7.955070693701704e-06, + "loss": 0.5488, + "step": 692 + }, + { + "epoch": 0.3350253807106599, + "grad_norm": 0.45208853687907335, + "learning_rate": 7.94861857142238e-06, + "loss": 0.5161, + "step": 693 + }, + { + "epoch": 0.3355088228184675, + "grad_norm": 0.45338462953665065, + "learning_rate": 7.942158912236339e-06, + "loss": 0.5504, + "step": 694 + }, + { + "epoch": 0.3359922649262751, + "grad_norm": 0.45784135957705213, + "learning_rate": 7.935691732654995e-06, + "loss": 0.5525, + "step": 695 + }, + { + "epoch": 0.3364757070340827, + "grad_norm": 0.4745455134248678, + "learning_rate": 7.929217049208977e-06, + "loss": 0.5549, + "step": 696 + }, + { + "epoch": 0.33695914914189024, + "grad_norm": 0.46788843343497605, + "learning_rate": 7.922734878448099e-06, + "loss": 0.5543, + "step": 697 + }, + { + "epoch": 0.33744259124969783, + "grad_norm": 0.4894111106267614, + "learning_rate": 7.916245236941311e-06, + "loss": 0.5456, + "step": 698 + }, + { + "epoch": 0.33792603335750543, + "grad_norm": 0.4818527781927651, + "learning_rate": 7.90974814127666e-06, + "loss": 0.5436, + "step": 699 + }, + { + "epoch": 0.338409475465313, + "grad_norm": 0.48230512049955104, + "learning_rate": 7.903243608061246e-06, + "loss": 0.5569, + "step": 700 + }, + { + "epoch": 0.3388929175731206, + "grad_norm": 0.4651013778967097, + "learning_rate": 7.89673165392118e-06, + "loss": 0.5497, + "step": 701 + }, + { + "epoch": 0.3393763596809282, + "grad_norm": 0.5263037891579944, + "learning_rate": 7.890212295501542e-06, + "loss": 0.5489, + "step": 702 + }, + { + "epoch": 0.3398598017887358, + "grad_norm": 0.47525750483933155, + "learning_rate": 7.883685549466337e-06, + "loss": 0.5438, + "step": 703 + }, + { + "epoch": 0.3403432438965434, + "grad_norm": 0.48435256135519467, + "learning_rate": 7.877151432498456e-06, + "loss": 0.5506, + "step": 704 + }, + { + "epoch": 0.34082668600435095, + "grad_norm": 0.49040296450298604, + "learning_rate": 7.870609961299627e-06, + "loss": 0.536, + "step": 705 + }, + { + "epoch": 0.34131012811215855, + "grad_norm": 0.4437135993163076, + "learning_rate": 7.864061152590376e-06, + "loss": 0.5539, + "step": 706 + }, + { + "epoch": 0.34179357021996615, + "grad_norm": 0.48585487486606105, + "learning_rate": 7.857505023109989e-06, + "loss": 0.5461, + "step": 707 + }, + { + "epoch": 0.34227701232777374, + "grad_norm": 0.48202275018795376, + "learning_rate": 7.850941589616458e-06, + "loss": 0.5371, + "step": 708 + }, + { + "epoch": 0.34276045443558134, + "grad_norm": 0.4716406712767161, + "learning_rate": 7.844370868886452e-06, + "loss": 0.5557, + "step": 709 + }, + { + "epoch": 0.34324389654338894, + "grad_norm": 0.49083867550017374, + "learning_rate": 7.83779287771526e-06, + "loss": 0.5459, + "step": 710 + }, + { + "epoch": 0.34372733865119653, + "grad_norm": 0.49576681886311147, + "learning_rate": 7.831207632916757e-06, + "loss": 0.5466, + "step": 711 + }, + { + "epoch": 0.34421078075900413, + "grad_norm": 0.4533195955521626, + "learning_rate": 7.824615151323363e-06, + "loss": 0.519, + "step": 712 + }, + { + "epoch": 0.3446942228668117, + "grad_norm": 0.4638295505748454, + "learning_rate": 7.818015449785987e-06, + "loss": 0.5485, + "step": 713 + }, + { + "epoch": 0.34517766497461927, + "grad_norm": 0.4802273717901249, + "learning_rate": 7.811408545174001e-06, + "loss": 0.5453, + "step": 714 + }, + { + "epoch": 0.34566110708242687, + "grad_norm": 0.4633815927205105, + "learning_rate": 7.804794454375189e-06, + "loss": 0.5504, + "step": 715 + }, + { + "epoch": 0.34614454919023446, + "grad_norm": 0.4455507327132057, + "learning_rate": 7.798173194295693e-06, + "loss": 0.5425, + "step": 716 + }, + { + "epoch": 0.34662799129804206, + "grad_norm": 0.4555461305882042, + "learning_rate": 7.791544781859993e-06, + "loss": 0.5402, + "step": 717 + }, + { + "epoch": 0.34711143340584966, + "grad_norm": 0.4459519768062681, + "learning_rate": 7.784909234010843e-06, + "loss": 0.5448, + "step": 718 + }, + { + "epoch": 0.34759487551365725, + "grad_norm": 0.43036102684437805, + "learning_rate": 7.778266567709239e-06, + "loss": 0.5532, + "step": 719 + }, + { + "epoch": 0.34807831762146485, + "grad_norm": 0.4640780423848208, + "learning_rate": 7.771616799934372e-06, + "loss": 0.5403, + "step": 720 + }, + { + "epoch": 0.34856175972927245, + "grad_norm": 0.4785048230046999, + "learning_rate": 7.764959947683581e-06, + "loss": 0.5484, + "step": 721 + }, + { + "epoch": 0.34904520183708, + "grad_norm": 0.5054523460781126, + "learning_rate": 7.758296027972324e-06, + "loss": 0.5367, + "step": 722 + }, + { + "epoch": 0.3495286439448876, + "grad_norm": 0.46292028395178175, + "learning_rate": 7.751625057834107e-06, + "loss": 0.5221, + "step": 723 + }, + { + "epoch": 0.3500120860526952, + "grad_norm": 0.5080355944834025, + "learning_rate": 7.744947054320475e-06, + "loss": 0.552, + "step": 724 + }, + { + "epoch": 0.3504955281605028, + "grad_norm": 0.4692605361826857, + "learning_rate": 7.73826203450094e-06, + "loss": 0.5516, + "step": 725 + }, + { + "epoch": 0.3509789702683104, + "grad_norm": 0.4452014990295793, + "learning_rate": 7.731570015462953e-06, + "loss": 0.5385, + "step": 726 + }, + { + "epoch": 0.35146241237611797, + "grad_norm": 0.4407238797603078, + "learning_rate": 7.724871014311853e-06, + "loss": 0.5512, + "step": 727 + }, + { + "epoch": 0.35194585448392557, + "grad_norm": 0.4578450461185458, + "learning_rate": 7.718165048170827e-06, + "loss": 0.5436, + "step": 728 + }, + { + "epoch": 0.35242929659173317, + "grad_norm": 0.47134089664050416, + "learning_rate": 7.711452134180865e-06, + "loss": 0.5439, + "step": 729 + }, + { + "epoch": 0.3529127386995407, + "grad_norm": 0.45807802586279717, + "learning_rate": 7.704732289500717e-06, + "loss": 0.535, + "step": 730 + }, + { + "epoch": 0.3533961808073483, + "grad_norm": 0.46923078006226726, + "learning_rate": 7.698005531306844e-06, + "loss": 0.5438, + "step": 731 + }, + { + "epoch": 0.3538796229151559, + "grad_norm": 0.47163216368627525, + "learning_rate": 7.691271876793387e-06, + "loss": 0.5412, + "step": 732 + }, + { + "epoch": 0.3543630650229635, + "grad_norm": 0.43982060964801745, + "learning_rate": 7.684531343172108e-06, + "loss": 0.5326, + "step": 733 + }, + { + "epoch": 0.3548465071307711, + "grad_norm": 0.49898422329655673, + "learning_rate": 7.677783947672352e-06, + "loss": 0.5352, + "step": 734 + }, + { + "epoch": 0.3553299492385787, + "grad_norm": 0.4426035951569431, + "learning_rate": 7.67102970754101e-06, + "loss": 0.5083, + "step": 735 + }, + { + "epoch": 0.3558133913463863, + "grad_norm": 0.473881609856312, + "learning_rate": 7.664268640042459e-06, + "loss": 0.5493, + "step": 736 + }, + { + "epoch": 0.3562968334541939, + "grad_norm": 0.48477702288906854, + "learning_rate": 7.657500762458536e-06, + "loss": 0.5415, + "step": 737 + }, + { + "epoch": 0.3567802755620014, + "grad_norm": 0.4673684560489235, + "learning_rate": 7.65072609208848e-06, + "loss": 0.5402, + "step": 738 + }, + { + "epoch": 0.357263717669809, + "grad_norm": 0.45922228645390506, + "learning_rate": 7.643944646248898e-06, + "loss": 0.5523, + "step": 739 + }, + { + "epoch": 0.3577471597776166, + "grad_norm": 0.5023203702238386, + "learning_rate": 7.637156442273705e-06, + "loss": 0.5472, + "step": 740 + }, + { + "epoch": 0.3582306018854242, + "grad_norm": 0.5158066743775931, + "learning_rate": 7.630361497514104e-06, + "loss": 0.5409, + "step": 741 + }, + { + "epoch": 0.3587140439932318, + "grad_norm": 0.4279389055361383, + "learning_rate": 7.6235598293385184e-06, + "loss": 0.548, + "step": 742 + }, + { + "epoch": 0.3591974861010394, + "grad_norm": 0.48124896416843527, + "learning_rate": 7.616751455132561e-06, + "loss": 0.5061, + "step": 743 + }, + { + "epoch": 0.359680928208847, + "grad_norm": 0.45130820986839676, + "learning_rate": 7.6099363922989845e-06, + "loss": 0.5408, + "step": 744 + }, + { + "epoch": 0.3601643703166546, + "grad_norm": 0.43968032097493187, + "learning_rate": 7.60311465825764e-06, + "loss": 0.5419, + "step": 745 + }, + { + "epoch": 0.36064781242446214, + "grad_norm": 0.4638052394642039, + "learning_rate": 7.596286270445429e-06, + "loss": 0.5474, + "step": 746 + }, + { + "epoch": 0.36113125453226974, + "grad_norm": 0.47215533812036253, + "learning_rate": 7.5894512463162595e-06, + "loss": 0.5481, + "step": 747 + }, + { + "epoch": 0.36161469664007734, + "grad_norm": 0.4910077404120728, + "learning_rate": 7.5826096033410056e-06, + "loss": 0.5483, + "step": 748 + }, + { + "epoch": 0.36209813874788493, + "grad_norm": 0.49435519282302404, + "learning_rate": 7.575761359007459e-06, + "loss": 0.5375, + "step": 749 + }, + { + "epoch": 0.36258158085569253, + "grad_norm": 0.4645080520487796, + "learning_rate": 7.568906530820281e-06, + "loss": 0.5406, + "step": 750 + }, + { + "epoch": 0.36306502296350013, + "grad_norm": 0.48056836362105476, + "learning_rate": 7.562045136300969e-06, + "loss": 0.547, + "step": 751 + }, + { + "epoch": 0.3635484650713077, + "grad_norm": 0.4648855493783244, + "learning_rate": 7.555177192987797e-06, + "loss": 0.5372, + "step": 752 + }, + { + "epoch": 0.3640319071791153, + "grad_norm": 0.43851001194612105, + "learning_rate": 7.5483027184357825e-06, + "loss": 0.5484, + "step": 753 + }, + { + "epoch": 0.3645153492869229, + "grad_norm": 0.4234049796935857, + "learning_rate": 7.541421730216638e-06, + "loss": 0.4914, + "step": 754 + }, + { + "epoch": 0.36499879139473046, + "grad_norm": 0.4886945785128111, + "learning_rate": 7.534534245918723e-06, + "loss": 0.5362, + "step": 755 + }, + { + "epoch": 0.36548223350253806, + "grad_norm": 0.47490196043064764, + "learning_rate": 7.527640283147003e-06, + "loss": 0.5387, + "step": 756 + }, + { + "epoch": 0.36596567561034565, + "grad_norm": 0.4422931000418374, + "learning_rate": 7.520739859523001e-06, + "loss": 0.5334, + "step": 757 + }, + { + "epoch": 0.36644911771815325, + "grad_norm": 0.45103949345201827, + "learning_rate": 7.513832992684758e-06, + "loss": 0.5423, + "step": 758 + }, + { + "epoch": 0.36693255982596085, + "grad_norm": 0.5023542886793314, + "learning_rate": 7.50691970028678e-06, + "loss": 0.5371, + "step": 759 + }, + { + "epoch": 0.36741600193376844, + "grad_norm": 0.48961990962706975, + "learning_rate": 7.500000000000001e-06, + "loss": 0.5602, + "step": 760 + }, + { + "epoch": 0.36789944404157604, + "grad_norm": 0.47244479352550756, + "learning_rate": 7.493073909511732e-06, + "loss": 0.5399, + "step": 761 + }, + { + "epoch": 0.36838288614938364, + "grad_norm": 0.47262373426445514, + "learning_rate": 7.486141446525619e-06, + "loss": 0.5465, + "step": 762 + }, + { + "epoch": 0.3688663282571912, + "grad_norm": 0.47837508137309714, + "learning_rate": 7.479202628761597e-06, + "loss": 0.5412, + "step": 763 + }, + { + "epoch": 0.3693497703649988, + "grad_norm": 0.47771254343171743, + "learning_rate": 7.472257473955841e-06, + "loss": 0.5429, + "step": 764 + }, + { + "epoch": 0.36983321247280637, + "grad_norm": 0.45892920422210776, + "learning_rate": 7.465305999860728e-06, + "loss": 0.5358, + "step": 765 + }, + { + "epoch": 0.37031665458061397, + "grad_norm": 0.4636269912834914, + "learning_rate": 7.4583482242447856e-06, + "loss": 0.528, + "step": 766 + }, + { + "epoch": 0.37080009668842157, + "grad_norm": 0.45196732778688614, + "learning_rate": 7.45138416489265e-06, + "loss": 0.5466, + "step": 767 + }, + { + "epoch": 0.37128353879622916, + "grad_norm": 0.44240214579051484, + "learning_rate": 7.444413839605017e-06, + "loss": 0.5315, + "step": 768 + }, + { + "epoch": 0.37176698090403676, + "grad_norm": 0.45295775865600874, + "learning_rate": 7.437437266198602e-06, + "loss": 0.5443, + "step": 769 + }, + { + "epoch": 0.37225042301184436, + "grad_norm": 0.44966146652009026, + "learning_rate": 7.430454462506085e-06, + "loss": 0.5417, + "step": 770 + }, + { + "epoch": 0.3727338651196519, + "grad_norm": 0.451144144721217, + "learning_rate": 7.423465446376079e-06, + "loss": 0.5389, + "step": 771 + }, + { + "epoch": 0.3732173072274595, + "grad_norm": 0.45620688667035586, + "learning_rate": 7.416470235673069e-06, + "loss": 0.538, + "step": 772 + }, + { + "epoch": 0.3737007493352671, + "grad_norm": 0.44604596469243557, + "learning_rate": 7.40946884827738e-06, + "loss": 0.5293, + "step": 773 + }, + { + "epoch": 0.3741841914430747, + "grad_norm": 0.4476638425696451, + "learning_rate": 7.402461302085121e-06, + "loss": 0.5402, + "step": 774 + }, + { + "epoch": 0.3746676335508823, + "grad_norm": 0.46401364076227924, + "learning_rate": 7.395447615008147e-06, + "loss": 0.5377, + "step": 775 + }, + { + "epoch": 0.3751510756586899, + "grad_norm": 0.46250351228418424, + "learning_rate": 7.388427804974003e-06, + "loss": 0.5455, + "step": 776 + }, + { + "epoch": 0.3756345177664975, + "grad_norm": 0.448294668881327, + "learning_rate": 7.381401889925894e-06, + "loss": 0.5311, + "step": 777 + }, + { + "epoch": 0.3761179598743051, + "grad_norm": 0.4490599419042619, + "learning_rate": 7.374369887822623e-06, + "loss": 0.5416, + "step": 778 + }, + { + "epoch": 0.3766014019821126, + "grad_norm": 0.44904272890455516, + "learning_rate": 7.367331816638554e-06, + "loss": 0.5464, + "step": 779 + }, + { + "epoch": 0.3770848440899202, + "grad_norm": 0.4731428544902919, + "learning_rate": 7.360287694363566e-06, + "loss": 0.5415, + "step": 780 + }, + { + "epoch": 0.3775682861977278, + "grad_norm": 0.4760085542862297, + "learning_rate": 7.353237539002999e-06, + "loss": 0.5388, + "step": 781 + }, + { + "epoch": 0.3780517283055354, + "grad_norm": 0.4817431722421546, + "learning_rate": 7.346181368577624e-06, + "loss": 0.5513, + "step": 782 + }, + { + "epoch": 0.378535170413343, + "grad_norm": 0.462287277146555, + "learning_rate": 7.3391192011235764e-06, + "loss": 0.5393, + "step": 783 + }, + { + "epoch": 0.3790186125211506, + "grad_norm": 0.44812435609118556, + "learning_rate": 7.3320510546923285e-06, + "loss": 0.5509, + "step": 784 + }, + { + "epoch": 0.3795020546289582, + "grad_norm": 0.48025260306275075, + "learning_rate": 7.324976947350631e-06, + "loss": 0.5387, + "step": 785 + }, + { + "epoch": 0.3799854967367658, + "grad_norm": 0.4639545538957294, + "learning_rate": 7.317896897180472e-06, + "loss": 0.5298, + "step": 786 + }, + { + "epoch": 0.3804689388445734, + "grad_norm": 0.46088677266135386, + "learning_rate": 7.31081092227903e-06, + "loss": 0.5371, + "step": 787 + }, + { + "epoch": 0.38095238095238093, + "grad_norm": 0.4446814202000039, + "learning_rate": 7.303719040758631e-06, + "loss": 0.5368, + "step": 788 + }, + { + "epoch": 0.38143582306018853, + "grad_norm": 0.4628164716114684, + "learning_rate": 7.296621270746691e-06, + "loss": 0.5439, + "step": 789 + }, + { + "epoch": 0.3819192651679961, + "grad_norm": 0.46916898249294825, + "learning_rate": 7.289517630385687e-06, + "loss": 0.5188, + "step": 790 + }, + { + "epoch": 0.3824027072758037, + "grad_norm": 0.44187034218765375, + "learning_rate": 7.282408137833093e-06, + "loss": 0.5404, + "step": 791 + }, + { + "epoch": 0.3828861493836113, + "grad_norm": 0.47877844839355055, + "learning_rate": 7.275292811261346e-06, + "loss": 0.5377, + "step": 792 + }, + { + "epoch": 0.3833695914914189, + "grad_norm": 0.46034254680546527, + "learning_rate": 7.268171668857794e-06, + "loss": 0.5489, + "step": 793 + }, + { + "epoch": 0.3838530335992265, + "grad_norm": 0.4557903207271804, + "learning_rate": 7.261044728824652e-06, + "loss": 0.5415, + "step": 794 + }, + { + "epoch": 0.3843364757070341, + "grad_norm": 0.477573699663043, + "learning_rate": 7.253912009378953e-06, + "loss": 0.5526, + "step": 795 + }, + { + "epoch": 0.38481991781484165, + "grad_norm": 0.5200587123977845, + "learning_rate": 7.246773528752501e-06, + "loss": 0.5452, + "step": 796 + }, + { + "epoch": 0.38530335992264925, + "grad_norm": 0.4504279257869106, + "learning_rate": 7.239629305191828e-06, + "loss": 0.528, + "step": 797 + }, + { + "epoch": 0.38578680203045684, + "grad_norm": 0.47332344951633437, + "learning_rate": 7.2324793569581474e-06, + "loss": 0.5413, + "step": 798 + }, + { + "epoch": 0.38627024413826444, + "grad_norm": 0.4658360771399747, + "learning_rate": 7.2253237023273e-06, + "loss": 0.5111, + "step": 799 + }, + { + "epoch": 0.38675368624607204, + "grad_norm": 0.4820288472202763, + "learning_rate": 7.21816235958972e-06, + "loss": 0.5472, + "step": 800 + }, + { + "epoch": 0.38723712835387963, + "grad_norm": 0.4713168204495061, + "learning_rate": 7.210995347050372e-06, + "loss": 0.5441, + "step": 801 + }, + { + "epoch": 0.38772057046168723, + "grad_norm": 0.45204543208032005, + "learning_rate": 7.203822683028721e-06, + "loss": 0.5403, + "step": 802 + }, + { + "epoch": 0.3882040125694948, + "grad_norm": 0.4577372851640406, + "learning_rate": 7.196644385858673e-06, + "loss": 0.5303, + "step": 803 + }, + { + "epoch": 0.38868745467730237, + "grad_norm": 0.4458565390228467, + "learning_rate": 7.189460473888535e-06, + "loss": 0.5453, + "step": 804 + }, + { + "epoch": 0.38917089678510997, + "grad_norm": 0.4735785239686837, + "learning_rate": 7.182270965480963e-06, + "loss": 0.5491, + "step": 805 + }, + { + "epoch": 0.38965433889291756, + "grad_norm": 0.44995709460533084, + "learning_rate": 7.17507587901292e-06, + "loss": 0.5328, + "step": 806 + }, + { + "epoch": 0.39013778100072516, + "grad_norm": 0.45714294309440695, + "learning_rate": 7.167875232875632e-06, + "loss": 0.5401, + "step": 807 + }, + { + "epoch": 0.39062122310853276, + "grad_norm": 0.45625350898261685, + "learning_rate": 7.160669045474524e-06, + "loss": 0.5198, + "step": 808 + }, + { + "epoch": 0.39110466521634035, + "grad_norm": 0.45109568818047574, + "learning_rate": 7.153457335229196e-06, + "loss": 0.5396, + "step": 809 + }, + { + "epoch": 0.39158810732414795, + "grad_norm": 0.4362283889114229, + "learning_rate": 7.146240120573358e-06, + "loss": 0.5421, + "step": 810 + }, + { + "epoch": 0.39207154943195555, + "grad_norm": 0.44454460051068084, + "learning_rate": 7.1390174199547945e-06, + "loss": 0.5495, + "step": 811 + }, + { + "epoch": 0.3925549915397631, + "grad_norm": 0.47028424443890243, + "learning_rate": 7.131789251835309e-06, + "loss": 0.5528, + "step": 812 + }, + { + "epoch": 0.3930384336475707, + "grad_norm": 0.44600311061331627, + "learning_rate": 7.124555634690684e-06, + "loss": 0.546, + "step": 813 + }, + { + "epoch": 0.3935218757553783, + "grad_norm": 0.42127806705345067, + "learning_rate": 7.117316587010625e-06, + "loss": 0.5164, + "step": 814 + }, + { + "epoch": 0.3940053178631859, + "grad_norm": 0.4919804442725305, + "learning_rate": 7.110072127298722e-06, + "loss": 0.5405, + "step": 815 + }, + { + "epoch": 0.3944887599709935, + "grad_norm": 0.433673881020081, + "learning_rate": 7.1028222740724e-06, + "loss": 0.5474, + "step": 816 + }, + { + "epoch": 0.39497220207880107, + "grad_norm": 0.44154650103792475, + "learning_rate": 7.095567045862867e-06, + "loss": 0.537, + "step": 817 + }, + { + "epoch": 0.39545564418660867, + "grad_norm": 0.4862239172154185, + "learning_rate": 7.0883064612150684e-06, + "loss": 0.5418, + "step": 818 + }, + { + "epoch": 0.39593908629441626, + "grad_norm": 0.44482243313717656, + "learning_rate": 7.081040538687649e-06, + "loss": 0.5421, + "step": 819 + }, + { + "epoch": 0.3964225284022238, + "grad_norm": 0.4674091065145529, + "learning_rate": 7.073769296852888e-06, + "loss": 0.5322, + "step": 820 + }, + { + "epoch": 0.3969059705100314, + "grad_norm": 0.42008927529357837, + "learning_rate": 7.066492754296668e-06, + "loss": 0.5021, + "step": 821 + }, + { + "epoch": 0.397389412617839, + "grad_norm": 0.4604995654095531, + "learning_rate": 7.059210929618416e-06, + "loss": 0.5449, + "step": 822 + }, + { + "epoch": 0.3978728547256466, + "grad_norm": 0.45094698267399413, + "learning_rate": 7.051923841431063e-06, + "loss": 0.5376, + "step": 823 + }, + { + "epoch": 0.3983562968334542, + "grad_norm": 0.46562198473083916, + "learning_rate": 7.044631508360996e-06, + "loss": 0.5449, + "step": 824 + }, + { + "epoch": 0.3988397389412618, + "grad_norm": 0.45370053407350325, + "learning_rate": 7.037333949048005e-06, + "loss": 0.5443, + "step": 825 + }, + { + "epoch": 0.3993231810490694, + "grad_norm": 0.45490375066885613, + "learning_rate": 7.03003118214524e-06, + "loss": 0.4994, + "step": 826 + }, + { + "epoch": 0.399806623156877, + "grad_norm": 0.4689025885486327, + "learning_rate": 7.022723226319159e-06, + "loss": 0.5249, + "step": 827 + }, + { + "epoch": 0.4002900652646846, + "grad_norm": 0.4587224558807211, + "learning_rate": 7.0154101002494914e-06, + "loss": 0.531, + "step": 828 + }, + { + "epoch": 0.4007735073724921, + "grad_norm": 0.43840918685603564, + "learning_rate": 7.008091822629172e-06, + "loss": 0.5331, + "step": 829 + }, + { + "epoch": 0.4012569494802997, + "grad_norm": 0.5046588696515812, + "learning_rate": 7.00076841216431e-06, + "loss": 0.5302, + "step": 830 + }, + { + "epoch": 0.4017403915881073, + "grad_norm": 0.485313377425035, + "learning_rate": 6.993439887574133e-06, + "loss": 0.5418, + "step": 831 + }, + { + "epoch": 0.4022238336959149, + "grad_norm": 0.47268076488419086, + "learning_rate": 6.986106267590942e-06, + "loss": 0.535, + "step": 832 + }, + { + "epoch": 0.4027072758037225, + "grad_norm": 0.4528644527398112, + "learning_rate": 6.978767570960057e-06, + "loss": 0.5362, + "step": 833 + }, + { + "epoch": 0.4031907179115301, + "grad_norm": 0.458241227473586, + "learning_rate": 6.971423816439782e-06, + "loss": 0.5346, + "step": 834 + }, + { + "epoch": 0.4036741600193377, + "grad_norm": 0.476721951726036, + "learning_rate": 6.964075022801341e-06, + "loss": 0.541, + "step": 835 + }, + { + "epoch": 0.4041576021271453, + "grad_norm": 0.4634216130492689, + "learning_rate": 6.956721208828847e-06, + "loss": 0.5441, + "step": 836 + }, + { + "epoch": 0.40464104423495284, + "grad_norm": 0.46202808494163927, + "learning_rate": 6.949362393319239e-06, + "loss": 0.5416, + "step": 837 + }, + { + "epoch": 0.40512448634276044, + "grad_norm": 0.44981029617918733, + "learning_rate": 6.941998595082243e-06, + "loss": 0.5438, + "step": 838 + }, + { + "epoch": 0.40560792845056803, + "grad_norm": 0.44835769630421096, + "learning_rate": 6.934629832940322e-06, + "loss": 0.5322, + "step": 839 + }, + { + "epoch": 0.40609137055837563, + "grad_norm": 0.4879526087044361, + "learning_rate": 6.927256125728624e-06, + "loss": 0.544, + "step": 840 + }, + { + "epoch": 0.4065748126661832, + "grad_norm": 0.47319742567660167, + "learning_rate": 6.91987749229494e-06, + "loss": 0.5401, + "step": 841 + }, + { + "epoch": 0.4070582547739908, + "grad_norm": 0.4492955794688954, + "learning_rate": 6.91249395149965e-06, + "loss": 0.5393, + "step": 842 + }, + { + "epoch": 0.4075416968817984, + "grad_norm": 0.4436463500280526, + "learning_rate": 6.905105522215684e-06, + "loss": 0.5384, + "step": 843 + }, + { + "epoch": 0.408025138989606, + "grad_norm": 0.43897712052796284, + "learning_rate": 6.897712223328457e-06, + "loss": 0.5297, + "step": 844 + }, + { + "epoch": 0.40850858109741356, + "grad_norm": 0.5141765277378779, + "learning_rate": 6.89031407373584e-06, + "loss": 0.5386, + "step": 845 + }, + { + "epoch": 0.40899202320522116, + "grad_norm": 0.48662407879743685, + "learning_rate": 6.8829110923481e-06, + "loss": 0.5429, + "step": 846 + }, + { + "epoch": 0.40947546531302875, + "grad_norm": 0.4671606204232888, + "learning_rate": 6.875503298087853e-06, + "loss": 0.5339, + "step": 847 + }, + { + "epoch": 0.40995890742083635, + "grad_norm": 0.47636170298906977, + "learning_rate": 6.868090709890016e-06, + "loss": 0.5392, + "step": 848 + }, + { + "epoch": 0.41044234952864395, + "grad_norm": 0.4249603079226182, + "learning_rate": 6.8606733467017675e-06, + "loss": 0.5046, + "step": 849 + }, + { + "epoch": 0.41092579163645154, + "grad_norm": 0.4669023850804772, + "learning_rate": 6.85325122748248e-06, + "loss": 0.5331, + "step": 850 + }, + { + "epoch": 0.41140923374425914, + "grad_norm": 0.43795275267260886, + "learning_rate": 6.845824371203691e-06, + "loss": 0.5409, + "step": 851 + }, + { + "epoch": 0.41189267585206674, + "grad_norm": 0.44363617551756607, + "learning_rate": 6.838392796849042e-06, + "loss": 0.5371, + "step": 852 + }, + { + "epoch": 0.4123761179598743, + "grad_norm": 0.4451816120752011, + "learning_rate": 6.830956523414239e-06, + "loss": 0.5304, + "step": 853 + }, + { + "epoch": 0.4128595600676819, + "grad_norm": 0.4684744852800341, + "learning_rate": 6.8235155699069944e-06, + "loss": 0.5316, + "step": 854 + }, + { + "epoch": 0.41334300217548947, + "grad_norm": 0.44092290998190986, + "learning_rate": 6.816069955346986e-06, + "loss": 0.5127, + "step": 855 + }, + { + "epoch": 0.41382644428329707, + "grad_norm": 0.49751628964469147, + "learning_rate": 6.808619698765804e-06, + "loss": 0.5459, + "step": 856 + }, + { + "epoch": 0.41430988639110466, + "grad_norm": 0.45953817226939175, + "learning_rate": 6.8011648192069045e-06, + "loss": 0.5316, + "step": 857 + }, + { + "epoch": 0.41479332849891226, + "grad_norm": 0.470694123027967, + "learning_rate": 6.7937053357255585e-06, + "loss": 0.5341, + "step": 858 + }, + { + "epoch": 0.41527677060671986, + "grad_norm": 0.482654546386677, + "learning_rate": 6.786241267388812e-06, + "loss": 0.5392, + "step": 859 + }, + { + "epoch": 0.41576021271452746, + "grad_norm": 0.49418422954918506, + "learning_rate": 6.778772633275421e-06, + "loss": 0.5259, + "step": 860 + }, + { + "epoch": 0.41624365482233505, + "grad_norm": 0.4508128718503279, + "learning_rate": 6.771299452475818e-06, + "loss": 0.5439, + "step": 861 + }, + { + "epoch": 0.4167270969301426, + "grad_norm": 0.483501145929134, + "learning_rate": 6.763821744092054e-06, + "loss": 0.521, + "step": 862 + }, + { + "epoch": 0.4172105390379502, + "grad_norm": 0.45484324344169746, + "learning_rate": 6.756339527237756e-06, + "loss": 0.5282, + "step": 863 + }, + { + "epoch": 0.4176939811457578, + "grad_norm": 0.47800071067084154, + "learning_rate": 6.748852821038075e-06, + "loss": 0.5362, + "step": 864 + }, + { + "epoch": 0.4181774232535654, + "grad_norm": 0.4561384481336352, + "learning_rate": 6.741361644629629e-06, + "loss": 0.5452, + "step": 865 + }, + { + "epoch": 0.418660865361373, + "grad_norm": 0.4974806458018085, + "learning_rate": 6.733866017160475e-06, + "loss": 0.5374, + "step": 866 + }, + { + "epoch": 0.4191443074691806, + "grad_norm": 0.4696802681175673, + "learning_rate": 6.7263659577900375e-06, + "loss": 0.5368, + "step": 867 + }, + { + "epoch": 0.4196277495769882, + "grad_norm": 0.46421987800289705, + "learning_rate": 6.718861485689077e-06, + "loss": 0.5361, + "step": 868 + }, + { + "epoch": 0.42011119168479577, + "grad_norm": 0.42817223127930704, + "learning_rate": 6.711352620039623e-06, + "loss": 0.5132, + "step": 869 + }, + { + "epoch": 0.4205946337926033, + "grad_norm": 0.4191221628297101, + "learning_rate": 6.703839380034945e-06, + "loss": 0.5282, + "step": 870 + }, + { + "epoch": 0.4210780759004109, + "grad_norm": 0.4151086659722137, + "learning_rate": 6.6963217848794895e-06, + "loss": 0.5046, + "step": 871 + }, + { + "epoch": 0.4215615180082185, + "grad_norm": 0.4638906293888461, + "learning_rate": 6.6887998537888354e-06, + "loss": 0.5365, + "step": 872 + }, + { + "epoch": 0.4220449601160261, + "grad_norm": 0.46061668652958593, + "learning_rate": 6.681273605989643e-06, + "loss": 0.5315, + "step": 873 + }, + { + "epoch": 0.4225284022238337, + "grad_norm": 0.5190170389663172, + "learning_rate": 6.673743060719613e-06, + "loss": 0.5328, + "step": 874 + }, + { + "epoch": 0.4230118443316413, + "grad_norm": 0.45460321890251315, + "learning_rate": 6.666208237227421e-06, + "loss": 0.5359, + "step": 875 + }, + { + "epoch": 0.4234952864394489, + "grad_norm": 0.4292973087733905, + "learning_rate": 6.6586691547726855e-06, + "loss": 0.5139, + "step": 876 + }, + { + "epoch": 0.4239787285472565, + "grad_norm": 0.47082103025419264, + "learning_rate": 6.651125832625908e-06, + "loss": 0.5455, + "step": 877 + }, + { + "epoch": 0.42446217065506403, + "grad_norm": 0.5113179606142492, + "learning_rate": 6.6435782900684284e-06, + "loss": 0.5528, + "step": 878 + }, + { + "epoch": 0.4249456127628716, + "grad_norm": 0.48001824284611705, + "learning_rate": 6.636026546392374e-06, + "loss": 0.5391, + "step": 879 + }, + { + "epoch": 0.4254290548706792, + "grad_norm": 0.4190847750909328, + "learning_rate": 6.628470620900611e-06, + "loss": 0.5309, + "step": 880 + }, + { + "epoch": 0.4259124969784868, + "grad_norm": 0.4429086362697282, + "learning_rate": 6.620910532906692e-06, + "loss": 0.5194, + "step": 881 + }, + { + "epoch": 0.4263959390862944, + "grad_norm": 0.4425962079317176, + "learning_rate": 6.613346301734813e-06, + "loss": 0.5132, + "step": 882 + }, + { + "epoch": 0.426879381194102, + "grad_norm": 0.4614396271141222, + "learning_rate": 6.605777946719757e-06, + "loss": 0.5358, + "step": 883 + }, + { + "epoch": 0.4273628233019096, + "grad_norm": 7.793396607210369, + "learning_rate": 6.59820548720685e-06, + "loss": 0.74, + "step": 884 + }, + { + "epoch": 0.4278462654097172, + "grad_norm": 0.47916638143499957, + "learning_rate": 6.590628942551909e-06, + "loss": 0.5401, + "step": 885 + }, + { + "epoch": 0.42832970751752475, + "grad_norm": 0.5010520173856451, + "learning_rate": 6.583048332121193e-06, + "loss": 0.5384, + "step": 886 + }, + { + "epoch": 0.42881314962533235, + "grad_norm": 0.43637840250511184, + "learning_rate": 6.5754636752913535e-06, + "loss": 0.5018, + "step": 887 + }, + { + "epoch": 0.42929659173313994, + "grad_norm": 0.4347273985958766, + "learning_rate": 6.567874991449383e-06, + "loss": 0.5303, + "step": 888 + }, + { + "epoch": 0.42978003384094754, + "grad_norm": 0.470696307591686, + "learning_rate": 6.560282299992571e-06, + "loss": 0.5454, + "step": 889 + }, + { + "epoch": 0.43026347594875514, + "grad_norm": 0.43949603254219816, + "learning_rate": 6.552685620328447e-06, + "loss": 0.5115, + "step": 890 + }, + { + "epoch": 0.43074691805656273, + "grad_norm": 0.5002179505262337, + "learning_rate": 6.545084971874738e-06, + "loss": 0.5386, + "step": 891 + }, + { + "epoch": 0.43123036016437033, + "grad_norm": 0.48898211514461637, + "learning_rate": 6.537480374059313e-06, + "loss": 0.5464, + "step": 892 + }, + { + "epoch": 0.4317138022721779, + "grad_norm": 0.47111181264654595, + "learning_rate": 6.529871846320138e-06, + "loss": 0.5225, + "step": 893 + }, + { + "epoch": 0.4321972443799855, + "grad_norm": 0.47111234243294525, + "learning_rate": 6.522259408105223e-06, + "loss": 0.5363, + "step": 894 + }, + { + "epoch": 0.43268068648779306, + "grad_norm": 0.4867092311804253, + "learning_rate": 6.514643078872571e-06, + "loss": 0.533, + "step": 895 + }, + { + "epoch": 0.43316412859560066, + "grad_norm": 0.4583905211154658, + "learning_rate": 6.507022878090137e-06, + "loss": 0.5428, + "step": 896 + }, + { + "epoch": 0.43364757070340826, + "grad_norm": 0.4950332559469317, + "learning_rate": 6.499398825235767e-06, + "loss": 0.5337, + "step": 897 + }, + { + "epoch": 0.43413101281121586, + "grad_norm": 0.46856791974797646, + "learning_rate": 6.491770939797152e-06, + "loss": 0.5323, + "step": 898 + }, + { + "epoch": 0.43461445491902345, + "grad_norm": 0.4646364739679311, + "learning_rate": 6.4841392412717864e-06, + "loss": 0.5407, + "step": 899 + }, + { + "epoch": 0.43509789702683105, + "grad_norm": 0.43558921933796657, + "learning_rate": 6.476503749166903e-06, + "loss": 0.5347, + "step": 900 + }, + { + "epoch": 0.43558133913463865, + "grad_norm": 0.4961569425990492, + "learning_rate": 6.4688644829994385e-06, + "loss": 0.5295, + "step": 901 + }, + { + "epoch": 0.43606478124244624, + "grad_norm": 0.4698493360586227, + "learning_rate": 6.4612214622959705e-06, + "loss": 0.5457, + "step": 902 + }, + { + "epoch": 0.4365482233502538, + "grad_norm": 0.435939008717009, + "learning_rate": 6.453574706592676e-06, + "loss": 0.521, + "step": 903 + }, + { + "epoch": 0.4370316654580614, + "grad_norm": 0.5347621942726917, + "learning_rate": 6.44592423543528e-06, + "loss": 0.5354, + "step": 904 + }, + { + "epoch": 0.437515107565869, + "grad_norm": 0.49607229965381217, + "learning_rate": 6.4382700683790025e-06, + "loss": 0.544, + "step": 905 + }, + { + "epoch": 0.4379985496736766, + "grad_norm": 0.4645864217211478, + "learning_rate": 6.4306122249885105e-06, + "loss": 0.5192, + "step": 906 + }, + { + "epoch": 0.43848199178148417, + "grad_norm": 0.46522668277788565, + "learning_rate": 6.422950724837872e-06, + "loss": 0.526, + "step": 907 + }, + { + "epoch": 0.43896543388929177, + "grad_norm": 0.46658657241667495, + "learning_rate": 6.415285587510495e-06, + "loss": 0.5088, + "step": 908 + }, + { + "epoch": 0.43944887599709936, + "grad_norm": 0.49352648435992785, + "learning_rate": 6.407616832599091e-06, + "loss": 0.5291, + "step": 909 + }, + { + "epoch": 0.43993231810490696, + "grad_norm": 0.5022475233173346, + "learning_rate": 6.399944479705615e-06, + "loss": 0.5349, + "step": 910 + }, + { + "epoch": 0.4404157602127145, + "grad_norm": 0.4645078975290342, + "learning_rate": 6.392268548441218e-06, + "loss": 0.5356, + "step": 911 + }, + { + "epoch": 0.4408992023205221, + "grad_norm": 0.4656283146754065, + "learning_rate": 6.384589058426201e-06, + "loss": 0.5297, + "step": 912 + }, + { + "epoch": 0.4413826444283297, + "grad_norm": 0.4778447743673107, + "learning_rate": 6.3769060292899585e-06, + "loss": 0.531, + "step": 913 + }, + { + "epoch": 0.4418660865361373, + "grad_norm": 0.5067362493290994, + "learning_rate": 6.3692194806709326e-06, + "loss": 0.5266, + "step": 914 + }, + { + "epoch": 0.4423495286439449, + "grad_norm": 0.4583050351500669, + "learning_rate": 6.36152943221656e-06, + "loss": 0.5068, + "step": 915 + }, + { + "epoch": 0.4428329707517525, + "grad_norm": 0.44674303828563183, + "learning_rate": 6.353835903583225e-06, + "loss": 0.5135, + "step": 916 + }, + { + "epoch": 0.4433164128595601, + "grad_norm": 0.4845057402223313, + "learning_rate": 6.346138914436207e-06, + "loss": 0.53, + "step": 917 + }, + { + "epoch": 0.4437998549673677, + "grad_norm": 0.46927160898270703, + "learning_rate": 6.338438484449632e-06, + "loss": 0.5282, + "step": 918 + }, + { + "epoch": 0.4442832970751752, + "grad_norm": 0.46844884470128584, + "learning_rate": 6.330734633306415e-06, + "loss": 0.5205, + "step": 919 + }, + { + "epoch": 0.4447667391829828, + "grad_norm": 0.46793830654029384, + "learning_rate": 6.3230273806982254e-06, + "loss": 0.5354, + "step": 920 + }, + { + "epoch": 0.4452501812907904, + "grad_norm": 0.46904655646766624, + "learning_rate": 6.31531674632542e-06, + "loss": 0.5403, + "step": 921 + }, + { + "epoch": 0.445733623398598, + "grad_norm": 0.4797829974325548, + "learning_rate": 6.307602749897001e-06, + "loss": 0.5285, + "step": 922 + }, + { + "epoch": 0.4462170655064056, + "grad_norm": 0.5126549879385887, + "learning_rate": 6.299885411130566e-06, + "loss": 0.5293, + "step": 923 + }, + { + "epoch": 0.4467005076142132, + "grad_norm": 0.47873452298166375, + "learning_rate": 6.292164749752256e-06, + "loss": 0.5358, + "step": 924 + }, + { + "epoch": 0.4471839497220208, + "grad_norm": 0.4697820734594197, + "learning_rate": 6.284440785496701e-06, + "loss": 0.5303, + "step": 925 + }, + { + "epoch": 0.4476673918298284, + "grad_norm": 0.48049048675766015, + "learning_rate": 6.27671353810698e-06, + "loss": 0.5384, + "step": 926 + }, + { + "epoch": 0.448150833937636, + "grad_norm": 0.44829305517954404, + "learning_rate": 6.268983027334557e-06, + "loss": 0.5349, + "step": 927 + }, + { + "epoch": 0.44863427604544354, + "grad_norm": 0.47046261605559686, + "learning_rate": 6.2612492729392396e-06, + "loss": 0.5445, + "step": 928 + }, + { + "epoch": 0.44911771815325113, + "grad_norm": 0.43943250727373434, + "learning_rate": 6.25351229468913e-06, + "loss": 0.5243, + "step": 929 + }, + { + "epoch": 0.44960116026105873, + "grad_norm": 0.457243800658822, + "learning_rate": 6.245772112360568e-06, + "loss": 0.5335, + "step": 930 + }, + { + "epoch": 0.4500846023688663, + "grad_norm": 0.4558342802943274, + "learning_rate": 6.2380287457380814e-06, + "loss": 0.5295, + "step": 931 + }, + { + "epoch": 0.4505680444766739, + "grad_norm": 0.46490304474994104, + "learning_rate": 6.230282214614342e-06, + "loss": 0.5277, + "step": 932 + }, + { + "epoch": 0.4510514865844815, + "grad_norm": 0.46808167357734604, + "learning_rate": 6.222532538790107e-06, + "loss": 0.5038, + "step": 933 + }, + { + "epoch": 0.4515349286922891, + "grad_norm": 0.44746189473984266, + "learning_rate": 6.214779738074169e-06, + "loss": 0.5482, + "step": 934 + }, + { + "epoch": 0.4520183708000967, + "grad_norm": 0.4749348946440431, + "learning_rate": 6.2070238322833165e-06, + "loss": 0.5408, + "step": 935 + }, + { + "epoch": 0.45250181290790426, + "grad_norm": 0.4105832023778957, + "learning_rate": 6.199264841242267e-06, + "loss": 0.507, + "step": 936 + }, + { + "epoch": 0.45298525501571185, + "grad_norm": 0.45569820188618954, + "learning_rate": 6.191502784783627e-06, + "loss": 0.5361, + "step": 937 + }, + { + "epoch": 0.45346869712351945, + "grad_norm": 0.4487391558411953, + "learning_rate": 6.183737682747839e-06, + "loss": 0.5404, + "step": 938 + }, + { + "epoch": 0.45395213923132705, + "grad_norm": 0.38508572249224443, + "learning_rate": 6.17596955498313e-06, + "loss": 0.475, + "step": 939 + }, + { + "epoch": 0.45443558133913464, + "grad_norm": 0.47287816273000344, + "learning_rate": 6.16819842134546e-06, + "loss": 0.5293, + "step": 940 + }, + { + "epoch": 0.45491902344694224, + "grad_norm": 0.4650031849319042, + "learning_rate": 6.160424301698472e-06, + "loss": 0.5315, + "step": 941 + }, + { + "epoch": 0.45540246555474984, + "grad_norm": 0.4613271364601288, + "learning_rate": 6.1526472159134454e-06, + "loss": 0.5398, + "step": 942 + }, + { + "epoch": 0.45588590766255743, + "grad_norm": 0.42626048007656836, + "learning_rate": 6.1448671838692365e-06, + "loss": 0.5246, + "step": 943 + }, + { + "epoch": 0.456369349770365, + "grad_norm": 0.45709658015590027, + "learning_rate": 6.1370842254522325e-06, + "loss": 0.5392, + "step": 944 + }, + { + "epoch": 0.45685279187817257, + "grad_norm": 0.4969316742066114, + "learning_rate": 6.129298360556304e-06, + "loss": 0.5216, + "step": 945 + }, + { + "epoch": 0.45733623398598017, + "grad_norm": 0.48874038272824616, + "learning_rate": 6.1215096090827485e-06, + "loss": 0.5341, + "step": 946 + }, + { + "epoch": 0.45781967609378776, + "grad_norm": 0.43416511670164404, + "learning_rate": 6.1137179909402445e-06, + "loss": 0.5357, + "step": 947 + }, + { + "epoch": 0.45830311820159536, + "grad_norm": 0.4398289245515299, + "learning_rate": 6.105923526044794e-06, + "loss": 0.5312, + "step": 948 + }, + { + "epoch": 0.45878656030940296, + "grad_norm": 0.506729531362168, + "learning_rate": 6.098126234319679e-06, + "loss": 0.5164, + "step": 949 + }, + { + "epoch": 0.45927000241721055, + "grad_norm": 0.49661020376021475, + "learning_rate": 6.0903261356954035e-06, + "loss": 0.5406, + "step": 950 + }, + { + "epoch": 0.45975344452501815, + "grad_norm": 0.49213602324131056, + "learning_rate": 6.08252325010965e-06, + "loss": 0.5369, + "step": 951 + }, + { + "epoch": 0.4602368866328257, + "grad_norm": 0.4522403269153531, + "learning_rate": 6.074717597507223e-06, + "loss": 0.5318, + "step": 952 + }, + { + "epoch": 0.4607203287406333, + "grad_norm": 0.46334490270574613, + "learning_rate": 6.066909197839996e-06, + "loss": 0.5053, + "step": 953 + }, + { + "epoch": 0.4612037708484409, + "grad_norm": 0.4555176211572043, + "learning_rate": 6.059098071066874e-06, + "loss": 0.5313, + "step": 954 + }, + { + "epoch": 0.4616872129562485, + "grad_norm": 0.4697386359927019, + "learning_rate": 6.051284237153723e-06, + "loss": 0.5304, + "step": 955 + }, + { + "epoch": 0.4621706550640561, + "grad_norm": 0.47606054578122203, + "learning_rate": 6.043467716073333e-06, + "loss": 0.5392, + "step": 956 + }, + { + "epoch": 0.4626540971718637, + "grad_norm": 0.4504863880612236, + "learning_rate": 6.035648527805359e-06, + "loss": 0.5333, + "step": 957 + }, + { + "epoch": 0.4631375392796713, + "grad_norm": 0.47075391826579777, + "learning_rate": 6.0278266923362805e-06, + "loss": 0.5331, + "step": 958 + }, + { + "epoch": 0.46362098138747887, + "grad_norm": 0.4403281566352786, + "learning_rate": 6.0200022296593375e-06, + "loss": 0.5432, + "step": 959 + }, + { + "epoch": 0.46410442349528647, + "grad_norm": 0.4572023392519702, + "learning_rate": 6.012175159774488e-06, + "loss": 0.5323, + "step": 960 + }, + { + "epoch": 0.464587865603094, + "grad_norm": 0.44773102967818523, + "learning_rate": 6.004345502688353e-06, + "loss": 0.5299, + "step": 961 + }, + { + "epoch": 0.4650713077109016, + "grad_norm": 0.4371461582728146, + "learning_rate": 5.996513278414166e-06, + "loss": 0.5385, + "step": 962 + }, + { + "epoch": 0.4655547498187092, + "grad_norm": 0.42182149099377253, + "learning_rate": 5.988678506971726e-06, + "loss": 0.5303, + "step": 963 + }, + { + "epoch": 0.4660381919265168, + "grad_norm": 0.4594942911793403, + "learning_rate": 5.980841208387338e-06, + "loss": 0.503, + "step": 964 + }, + { + "epoch": 0.4665216340343244, + "grad_norm": 0.4501324492723119, + "learning_rate": 5.973001402693769e-06, + "loss": 0.5253, + "step": 965 + }, + { + "epoch": 0.467005076142132, + "grad_norm": 0.4373460975849127, + "learning_rate": 5.965159109930196e-06, + "loss": 0.5386, + "step": 966 + }, + { + "epoch": 0.4674885182499396, + "grad_norm": 0.4418857621109752, + "learning_rate": 5.957314350142149e-06, + "loss": 0.529, + "step": 967 + }, + { + "epoch": 0.4679719603577472, + "grad_norm": 0.485651415733174, + "learning_rate": 5.94946714338147e-06, + "loss": 0.538, + "step": 968 + }, + { + "epoch": 0.4684554024655547, + "grad_norm": 0.44827050412474007, + "learning_rate": 5.941617509706247e-06, + "loss": 0.5333, + "step": 969 + }, + { + "epoch": 0.4689388445733623, + "grad_norm": 0.44674045160443804, + "learning_rate": 5.933765469180779e-06, + "loss": 0.5329, + "step": 970 + }, + { + "epoch": 0.4694222866811699, + "grad_norm": 0.47047904115025807, + "learning_rate": 5.925911041875514e-06, + "loss": 0.5304, + "step": 971 + }, + { + "epoch": 0.4699057287889775, + "grad_norm": 0.47618192657266745, + "learning_rate": 5.9180542478670025e-06, + "loss": 0.5339, + "step": 972 + }, + { + "epoch": 0.4703891708967851, + "grad_norm": 0.4372302817189476, + "learning_rate": 5.910195107237842e-06, + "loss": 0.5311, + "step": 973 + }, + { + "epoch": 0.4708726130045927, + "grad_norm": 0.4599705922186888, + "learning_rate": 5.902333640076627e-06, + "loss": 0.5294, + "step": 974 + }, + { + "epoch": 0.4713560551124003, + "grad_norm": 0.4668491954105507, + "learning_rate": 5.894469866477905e-06, + "loss": 0.5319, + "step": 975 + }, + { + "epoch": 0.4718394972202079, + "grad_norm": 0.4298713287991765, + "learning_rate": 5.886603806542114e-06, + "loss": 0.5308, + "step": 976 + }, + { + "epoch": 0.47232293932801545, + "grad_norm": 0.44120727064086296, + "learning_rate": 5.878735480375537e-06, + "loss": 0.5271, + "step": 977 + }, + { + "epoch": 0.47280638143582304, + "grad_norm": 0.45517085979612587, + "learning_rate": 5.87086490809025e-06, + "loss": 0.5363, + "step": 978 + }, + { + "epoch": 0.47328982354363064, + "grad_norm": 0.41718667843710106, + "learning_rate": 5.862992109804071e-06, + "loss": 0.5208, + "step": 979 + }, + { + "epoch": 0.47377326565143824, + "grad_norm": 0.4698208514148652, + "learning_rate": 5.855117105640503e-06, + "loss": 0.5045, + "step": 980 + }, + { + "epoch": 0.47425670775924583, + "grad_norm": 0.44330430106357593, + "learning_rate": 5.847239915728695e-06, + "loss": 0.5213, + "step": 981 + }, + { + "epoch": 0.47474014986705343, + "grad_norm": 0.46348151704515983, + "learning_rate": 5.839360560203379e-06, + "loss": 0.5416, + "step": 982 + }, + { + "epoch": 0.475223591974861, + "grad_norm": 0.4369134554030048, + "learning_rate": 5.831479059204822e-06, + "loss": 0.5397, + "step": 983 + }, + { + "epoch": 0.4757070340826686, + "grad_norm": 0.434332595795343, + "learning_rate": 5.823595432878775e-06, + "loss": 0.5253, + "step": 984 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 0.45066877191979876, + "learning_rate": 5.815709701376424e-06, + "loss": 0.501, + "step": 985 + }, + { + "epoch": 0.47667391829828376, + "grad_norm": 0.43680722385313575, + "learning_rate": 5.8078218848543326e-06, + "loss": 0.5425, + "step": 986 + }, + { + "epoch": 0.47715736040609136, + "grad_norm": 0.462367177083934, + "learning_rate": 5.799932003474398e-06, + "loss": 0.5293, + "step": 987 + }, + { + "epoch": 0.47764080251389895, + "grad_norm": 0.46046005650848004, + "learning_rate": 5.7920400774037884e-06, + "loss": 0.5273, + "step": 988 + }, + { + "epoch": 0.47812424462170655, + "grad_norm": 0.4325701614014005, + "learning_rate": 5.784146126814909e-06, + "loss": 0.523, + "step": 989 + }, + { + "epoch": 0.47860768672951415, + "grad_norm": 0.42688145845821907, + "learning_rate": 5.776250171885329e-06, + "loss": 0.5289, + "step": 990 + }, + { + "epoch": 0.47909112883732174, + "grad_norm": 0.4553909518433745, + "learning_rate": 5.768352232797748e-06, + "loss": 0.5354, + "step": 991 + }, + { + "epoch": 0.47957457094512934, + "grad_norm": 0.4612709888611227, + "learning_rate": 5.760452329739933e-06, + "loss": 0.5346, + "step": 992 + }, + { + "epoch": 0.48005801305293694, + "grad_norm": 0.4671180697912862, + "learning_rate": 5.752550482904674e-06, + "loss": 0.5381, + "step": 993 + }, + { + "epoch": 0.4805414551607445, + "grad_norm": 0.44306247126150655, + "learning_rate": 5.744646712489729e-06, + "loss": 0.5347, + "step": 994 + }, + { + "epoch": 0.4810248972685521, + "grad_norm": 0.47845590337101257, + "learning_rate": 5.736741038697771e-06, + "loss": 0.546, + "step": 995 + }, + { + "epoch": 0.4815083393763597, + "grad_norm": 0.42271596693767594, + "learning_rate": 5.728833481736339e-06, + "loss": 0.5189, + "step": 996 + }, + { + "epoch": 0.48199178148416727, + "grad_norm": 0.4538365308944498, + "learning_rate": 5.720924061817786e-06, + "loss": 0.5405, + "step": 997 + }, + { + "epoch": 0.48247522359197487, + "grad_norm": 0.4492861025701022, + "learning_rate": 5.71301279915923e-06, + "loss": 0.5317, + "step": 998 + }, + { + "epoch": 0.48295866569978246, + "grad_norm": 0.4745326397538611, + "learning_rate": 5.705099713982491e-06, + "loss": 0.532, + "step": 999 + }, + { + "epoch": 0.48344210780759006, + "grad_norm": 0.44214626511814337, + "learning_rate": 5.697184826514058e-06, + "loss": 0.5305, + "step": 1000 + }, + { + "epoch": 0.48392554991539766, + "grad_norm": 0.46953984488621225, + "learning_rate": 5.689268156985015e-06, + "loss": 0.5385, + "step": 1001 + }, + { + "epoch": 0.4844089920232052, + "grad_norm": 0.421787366978404, + "learning_rate": 5.6813497256310124e-06, + "loss": 0.5468, + "step": 1002 + }, + { + "epoch": 0.4848924341310128, + "grad_norm": 0.44994209423666665, + "learning_rate": 5.673429552692196e-06, + "loss": 0.5259, + "step": 1003 + }, + { + "epoch": 0.4853758762388204, + "grad_norm": 0.4777649344533823, + "learning_rate": 5.66550765841317e-06, + "loss": 0.5312, + "step": 1004 + }, + { + "epoch": 0.485859318346628, + "grad_norm": 0.4450135880363142, + "learning_rate": 5.6575840630429295e-06, + "loss": 0.5234, + "step": 1005 + }, + { + "epoch": 0.4863427604544356, + "grad_norm": 0.48478431421225965, + "learning_rate": 5.649658786834825e-06, + "loss": 0.5337, + "step": 1006 + }, + { + "epoch": 0.4868262025622432, + "grad_norm": 0.4600376201705413, + "learning_rate": 5.641731850046503e-06, + "loss": 0.5292, + "step": 1007 + }, + { + "epoch": 0.4873096446700508, + "grad_norm": 0.46358685580492737, + "learning_rate": 5.633803272939851e-06, + "loss": 0.5033, + "step": 1008 + }, + { + "epoch": 0.4877930867778584, + "grad_norm": 0.4424655348417247, + "learning_rate": 5.62587307578095e-06, + "loss": 0.5199, + "step": 1009 + }, + { + "epoch": 0.4882765288856659, + "grad_norm": 0.44958190643210255, + "learning_rate": 5.6179412788400255e-06, + "loss": 0.5285, + "step": 1010 + }, + { + "epoch": 0.4887599709934735, + "grad_norm": 0.46154354908053036, + "learning_rate": 5.610007902391387e-06, + "loss": 0.5302, + "step": 1011 + }, + { + "epoch": 0.4892434131012811, + "grad_norm": 0.4787830389554928, + "learning_rate": 5.602072966713389e-06, + "loss": 0.5319, + "step": 1012 + }, + { + "epoch": 0.4897268552090887, + "grad_norm": 0.46247126201641375, + "learning_rate": 5.594136492088363e-06, + "loss": 0.533, + "step": 1013 + }, + { + "epoch": 0.4902102973168963, + "grad_norm": 0.4497969101747785, + "learning_rate": 5.586198498802577e-06, + "loss": 0.5207, + "step": 1014 + }, + { + "epoch": 0.4906937394247039, + "grad_norm": 0.4711750674532031, + "learning_rate": 5.578259007146183e-06, + "loss": 0.5182, + "step": 1015 + }, + { + "epoch": 0.4911771815325115, + "grad_norm": 0.44280948751548693, + "learning_rate": 5.570318037413162e-06, + "loss": 0.5335, + "step": 1016 + }, + { + "epoch": 0.4916606236403191, + "grad_norm": 0.4140593593091692, + "learning_rate": 5.562375609901273e-06, + "loss": 0.498, + "step": 1017 + }, + { + "epoch": 0.49214406574812664, + "grad_norm": 0.5035482968527856, + "learning_rate": 5.5544317449119975e-06, + "loss": 0.535, + "step": 1018 + }, + { + "epoch": 0.49262750785593423, + "grad_norm": 0.470430802340302, + "learning_rate": 5.546486462750499e-06, + "loss": 0.529, + "step": 1019 + }, + { + "epoch": 0.49311094996374183, + "grad_norm": 0.4614608636774156, + "learning_rate": 5.538539783725556e-06, + "loss": 0.5415, + "step": 1020 + }, + { + "epoch": 0.4935943920715494, + "grad_norm": 0.43439106514800707, + "learning_rate": 5.530591728149522e-06, + "loss": 0.5237, + "step": 1021 + }, + { + "epoch": 0.494077834179357, + "grad_norm": 0.4532581332185661, + "learning_rate": 5.522642316338268e-06, + "loss": 0.5275, + "step": 1022 + }, + { + "epoch": 0.4945612762871646, + "grad_norm": 0.44400011265861933, + "learning_rate": 5.51469156861113e-06, + "loss": 0.5279, + "step": 1023 + }, + { + "epoch": 0.4950447183949722, + "grad_norm": 0.464713276215896, + "learning_rate": 5.50673950529086e-06, + "loss": 0.5261, + "step": 1024 + }, + { + "epoch": 0.4955281605027798, + "grad_norm": 0.46801515065857147, + "learning_rate": 5.498786146703575e-06, + "loss": 0.5392, + "step": 1025 + }, + { + "epoch": 0.4960116026105874, + "grad_norm": 0.43826397020984875, + "learning_rate": 5.490831513178698e-06, + "loss": 0.513, + "step": 1026 + }, + { + "epoch": 0.49649504471839495, + "grad_norm": 0.4545609138302334, + "learning_rate": 5.482875625048916e-06, + "loss": 0.5342, + "step": 1027 + }, + { + "epoch": 0.49697848682620255, + "grad_norm": 0.45528220157554766, + "learning_rate": 5.474918502650116e-06, + "loss": 0.5371, + "step": 1028 + }, + { + "epoch": 0.49746192893401014, + "grad_norm": 0.4918245291291218, + "learning_rate": 5.466960166321348e-06, + "loss": 0.5248, + "step": 1029 + }, + { + "epoch": 0.49794537104181774, + "grad_norm": 0.43603238272665606, + "learning_rate": 5.459000636404759e-06, + "loss": 0.5236, + "step": 1030 + }, + { + "epoch": 0.49842881314962534, + "grad_norm": 0.43802907063700564, + "learning_rate": 5.451039933245551e-06, + "loss": 0.5342, + "step": 1031 + }, + { + "epoch": 0.49891225525743294, + "grad_norm": 0.4660487433831574, + "learning_rate": 5.44307807719192e-06, + "loss": 0.5249, + "step": 1032 + }, + { + "epoch": 0.49939569736524053, + "grad_norm": 0.4349479566561577, + "learning_rate": 5.435115088595016e-06, + "loss": 0.4997, + "step": 1033 + }, + { + "epoch": 0.49987913947304813, + "grad_norm": 0.43592525172206065, + "learning_rate": 5.4271509878088755e-06, + "loss": 0.5263, + "step": 1034 + }, + { + "epoch": 0.5003625815808557, + "grad_norm": 0.42380458268711035, + "learning_rate": 5.4191857951903825e-06, + "loss": 0.503, + "step": 1035 + }, + { + "epoch": 0.5008460236886633, + "grad_norm": 0.47347721204466, + "learning_rate": 5.4112195310992144e-06, + "loss": 0.5228, + "step": 1036 + }, + { + "epoch": 0.5013294657964709, + "grad_norm": 0.4523581952116975, + "learning_rate": 5.403252215897781e-06, + "loss": 0.5295, + "step": 1037 + }, + { + "epoch": 0.5018129079042785, + "grad_norm": 0.44718772307460525, + "learning_rate": 5.395283869951184e-06, + "loss": 0.5402, + "step": 1038 + }, + { + "epoch": 0.5022963500120861, + "grad_norm": 0.42574725470561453, + "learning_rate": 5.387314513627156e-06, + "loss": 0.5228, + "step": 1039 + }, + { + "epoch": 0.5027797921198937, + "grad_norm": 0.4816073830916942, + "learning_rate": 5.379344167296017e-06, + "loss": 0.5302, + "step": 1040 + }, + { + "epoch": 0.5032632342277013, + "grad_norm": 0.454535172796951, + "learning_rate": 5.371372851330612e-06, + "loss": 0.5337, + "step": 1041 + }, + { + "epoch": 0.5037466763355088, + "grad_norm": 0.44304402033518747, + "learning_rate": 5.3634005861062675e-06, + "loss": 0.5348, + "step": 1042 + }, + { + "epoch": 0.5042301184433164, + "grad_norm": 0.4647077624164304, + "learning_rate": 5.355427392000736e-06, + "loss": 0.5367, + "step": 1043 + }, + { + "epoch": 0.504713560551124, + "grad_norm": 0.47204921033701974, + "learning_rate": 5.347453289394146e-06, + "loss": 0.5236, + "step": 1044 + }, + { + "epoch": 0.5051970026589316, + "grad_norm": 0.444269913904137, + "learning_rate": 5.339478298668943e-06, + "loss": 0.5374, + "step": 1045 + }, + { + "epoch": 0.5056804447667392, + "grad_norm": 0.437749383717649, + "learning_rate": 5.331502440209849e-06, + "loss": 0.529, + "step": 1046 + }, + { + "epoch": 0.5061638868745467, + "grad_norm": 0.44850371734571, + "learning_rate": 5.3235257344037996e-06, + "loss": 0.5363, + "step": 1047 + }, + { + "epoch": 0.5066473289823543, + "grad_norm": 0.4687290830806752, + "learning_rate": 5.3155482016398995e-06, + "loss": 0.5335, + "step": 1048 + }, + { + "epoch": 0.5071307710901619, + "grad_norm": 0.43863314490483407, + "learning_rate": 5.307569862309363e-06, + "loss": 0.5269, + "step": 1049 + }, + { + "epoch": 0.5076142131979695, + "grad_norm": 0.44499358455418236, + "learning_rate": 5.29959073680547e-06, + "loss": 0.5269, + "step": 1050 + }, + { + "epoch": 0.5080976553057771, + "grad_norm": 0.4443034689772289, + "learning_rate": 5.2916108455235084e-06, + "loss": 0.5247, + "step": 1051 + }, + { + "epoch": 0.5085810974135847, + "grad_norm": 0.41196428985814043, + "learning_rate": 5.2836302088607235e-06, + "loss": 0.5257, + "step": 1052 + }, + { + "epoch": 0.5090645395213923, + "grad_norm": 0.4196111969098687, + "learning_rate": 5.275648847216263e-06, + "loss": 0.5326, + "step": 1053 + }, + { + "epoch": 0.5095479816291999, + "grad_norm": 0.4396027925039716, + "learning_rate": 5.267666780991135e-06, + "loss": 0.5384, + "step": 1054 + }, + { + "epoch": 0.5100314237370075, + "grad_norm": 0.4918850008899013, + "learning_rate": 5.259684030588141e-06, + "loss": 0.5217, + "step": 1055 + }, + { + "epoch": 0.5105148658448151, + "grad_norm": 0.4636011028874602, + "learning_rate": 5.251700616411836e-06, + "loss": 0.5292, + "step": 1056 + }, + { + "epoch": 0.5109983079526227, + "grad_norm": 0.45080015588791633, + "learning_rate": 5.243716558868469e-06, + "loss": 0.5335, + "step": 1057 + }, + { + "epoch": 0.5114817500604303, + "grad_norm": 0.4430606358386943, + "learning_rate": 5.235731878365935e-06, + "loss": 0.5366, + "step": 1058 + }, + { + "epoch": 0.5119651921682379, + "grad_norm": 0.4765633831252005, + "learning_rate": 5.22774659531372e-06, + "loss": 0.5343, + "step": 1059 + }, + { + "epoch": 0.5124486342760455, + "grad_norm": 0.47759199812886083, + "learning_rate": 5.219760730122854e-06, + "loss": 0.5318, + "step": 1060 + }, + { + "epoch": 0.5129320763838531, + "grad_norm": 0.4277450357502153, + "learning_rate": 5.211774303205849e-06, + "loss": 0.5055, + "step": 1061 + }, + { + "epoch": 0.5134155184916607, + "grad_norm": 0.42824551582926373, + "learning_rate": 5.203787334976655e-06, + "loss": 0.5015, + "step": 1062 + }, + { + "epoch": 0.5138989605994683, + "grad_norm": 0.5041405711353805, + "learning_rate": 5.195799845850611e-06, + "loss": 0.525, + "step": 1063 + }, + { + "epoch": 0.5143824027072758, + "grad_norm": 0.4306413852615145, + "learning_rate": 5.18781185624438e-06, + "loss": 0.5265, + "step": 1064 + }, + { + "epoch": 0.5148658448150834, + "grad_norm": 0.4361432944536921, + "learning_rate": 5.179823386575908e-06, + "loss": 0.5311, + "step": 1065 + }, + { + "epoch": 0.515349286922891, + "grad_norm": 0.4240690760700838, + "learning_rate": 5.171834457264364e-06, + "loss": 0.5286, + "step": 1066 + }, + { + "epoch": 0.5158327290306985, + "grad_norm": 0.47256746355593565, + "learning_rate": 5.1638450887301006e-06, + "loss": 0.5282, + "step": 1067 + }, + { + "epoch": 0.5163161711385061, + "grad_norm": 0.4445988854998036, + "learning_rate": 5.155855301394585e-06, + "loss": 0.527, + "step": 1068 + }, + { + "epoch": 0.5167996132463137, + "grad_norm": 0.46405127876443125, + "learning_rate": 5.147865115680357e-06, + "loss": 0.5289, + "step": 1069 + }, + { + "epoch": 0.5172830553541213, + "grad_norm": 0.45801111107179227, + "learning_rate": 5.139874552010975e-06, + "loss": 0.531, + "step": 1070 + }, + { + "epoch": 0.5177664974619289, + "grad_norm": 0.454345233725776, + "learning_rate": 5.131883630810966e-06, + "loss": 0.5428, + "step": 1071 + }, + { + "epoch": 0.5182499395697365, + "grad_norm": 0.4649418568564353, + "learning_rate": 5.123892372505768e-06, + "loss": 0.524, + "step": 1072 + }, + { + "epoch": 0.5187333816775441, + "grad_norm": 0.4609921972037312, + "learning_rate": 5.11590079752168e-06, + "loss": 0.5337, + "step": 1073 + }, + { + "epoch": 0.5192168237853517, + "grad_norm": 0.4257268056499296, + "learning_rate": 5.107908926285813e-06, + "loss": 0.5247, + "step": 1074 + }, + { + "epoch": 0.5197002658931593, + "grad_norm": 0.4277925159892485, + "learning_rate": 5.099916779226032e-06, + "loss": 0.5314, + "step": 1075 + }, + { + "epoch": 0.5201837080009669, + "grad_norm": 0.46374173796570095, + "learning_rate": 5.091924376770912e-06, + "loss": 0.5267, + "step": 1076 + }, + { + "epoch": 0.5206671501087745, + "grad_norm": 0.46413399868377414, + "learning_rate": 5.083931739349675e-06, + "loss": 0.5227, + "step": 1077 + }, + { + "epoch": 0.5211505922165821, + "grad_norm": 0.42805047816232233, + "learning_rate": 5.075938887392149e-06, + "loss": 0.5148, + "step": 1078 + }, + { + "epoch": 0.5216340343243897, + "grad_norm": 0.42348168283938675, + "learning_rate": 5.0679458413287055e-06, + "loss": 0.5168, + "step": 1079 + }, + { + "epoch": 0.5221174764321972, + "grad_norm": 0.48349127354280697, + "learning_rate": 5.059952621590216e-06, + "loss": 0.5274, + "step": 1080 + }, + { + "epoch": 0.5226009185400048, + "grad_norm": 0.447828940551305, + "learning_rate": 5.051959248607993e-06, + "loss": 0.5251, + "step": 1081 + }, + { + "epoch": 0.5230843606478124, + "grad_norm": 0.45995219297674483, + "learning_rate": 5.043965742813744e-06, + "loss": 0.5246, + "step": 1082 + }, + { + "epoch": 0.52356780275562, + "grad_norm": 0.4744443087812869, + "learning_rate": 5.035972124639511e-06, + "loss": 0.5299, + "step": 1083 + }, + { + "epoch": 0.5240512448634276, + "grad_norm": 0.4140590259366713, + "learning_rate": 5.02797841451763e-06, + "loss": 0.5273, + "step": 1084 + }, + { + "epoch": 0.5245346869712352, + "grad_norm": 0.4421687910150297, + "learning_rate": 5.019984632880665e-06, + "loss": 0.5342, + "step": 1085 + }, + { + "epoch": 0.5250181290790428, + "grad_norm": 0.4665180180836343, + "learning_rate": 5.011990800161369e-06, + "loss": 0.5314, + "step": 1086 + }, + { + "epoch": 0.5255015711868504, + "grad_norm": 0.45569707259235365, + "learning_rate": 5.00399693679262e-06, + "loss": 0.5291, + "step": 1087 + }, + { + "epoch": 0.525985013294658, + "grad_norm": 0.3966652463954235, + "learning_rate": 4.9960030632073815e-06, + "loss": 0.4852, + "step": 1088 + }, + { + "epoch": 0.5264684554024656, + "grad_norm": 0.4156779611396039, + "learning_rate": 4.988009199838632e-06, + "loss": 0.5266, + "step": 1089 + }, + { + "epoch": 0.5269518975102732, + "grad_norm": 0.4459927015276638, + "learning_rate": 4.980015367119336e-06, + "loss": 0.5128, + "step": 1090 + }, + { + "epoch": 0.5274353396180808, + "grad_norm": 0.4470940459251613, + "learning_rate": 4.9720215854823716e-06, + "loss": 0.5215, + "step": 1091 + }, + { + "epoch": 0.5279187817258884, + "grad_norm": 0.4379040861596386, + "learning_rate": 4.96402787536049e-06, + "loss": 0.529, + "step": 1092 + }, + { + "epoch": 0.528402223833696, + "grad_norm": 0.42846117019918506, + "learning_rate": 4.956034257186258e-06, + "loss": 0.5196, + "step": 1093 + }, + { + "epoch": 0.5288856659415035, + "grad_norm": 0.45571668646782787, + "learning_rate": 4.9480407513920086e-06, + "loss": 0.527, + "step": 1094 + }, + { + "epoch": 0.5293691080493111, + "grad_norm": 0.4680208745726276, + "learning_rate": 4.940047378409786e-06, + "loss": 0.523, + "step": 1095 + }, + { + "epoch": 0.5298525501571187, + "grad_norm": 0.44029959009092084, + "learning_rate": 4.932054158671295e-06, + "loss": 0.5244, + "step": 1096 + }, + { + "epoch": 0.5303359922649262, + "grad_norm": 0.45471058687459337, + "learning_rate": 4.924061112607853e-06, + "loss": 0.532, + "step": 1097 + }, + { + "epoch": 0.5308194343727338, + "grad_norm": 0.43297324457453135, + "learning_rate": 4.9160682606503255e-06, + "loss": 0.5226, + "step": 1098 + }, + { + "epoch": 0.5313028764805414, + "grad_norm": 0.4269816974887771, + "learning_rate": 4.908075623229089e-06, + "loss": 0.5242, + "step": 1099 + }, + { + "epoch": 0.531786318588349, + "grad_norm": 0.4280455143246443, + "learning_rate": 4.900083220773968e-06, + "loss": 0.5082, + "step": 1100 + }, + { + "epoch": 0.5322697606961566, + "grad_norm": 0.4422972364197889, + "learning_rate": 4.892091073714189e-06, + "loss": 0.5162, + "step": 1101 + }, + { + "epoch": 0.5327532028039642, + "grad_norm": 0.44177794728378245, + "learning_rate": 4.88409920247832e-06, + "loss": 0.5287, + "step": 1102 + }, + { + "epoch": 0.5332366449117718, + "grad_norm": 0.4384466281073893, + "learning_rate": 4.876107627494234e-06, + "loss": 0.5224, + "step": 1103 + }, + { + "epoch": 0.5337200870195794, + "grad_norm": 0.42791066381831555, + "learning_rate": 4.868116369189033e-06, + "loss": 0.519, + "step": 1104 + }, + { + "epoch": 0.534203529127387, + "grad_norm": 0.43287603203122865, + "learning_rate": 4.860125447989026e-06, + "loss": 0.5315, + "step": 1105 + }, + { + "epoch": 0.5346869712351946, + "grad_norm": 0.40835340581900775, + "learning_rate": 4.852134884319646e-06, + "loss": 0.5013, + "step": 1106 + }, + { + "epoch": 0.5351704133430022, + "grad_norm": 0.4564877621647975, + "learning_rate": 4.844144698605418e-06, + "loss": 0.5163, + "step": 1107 + }, + { + "epoch": 0.5356538554508098, + "grad_norm": 0.4350849446506223, + "learning_rate": 4.836154911269902e-06, + "loss": 0.5216, + "step": 1108 + }, + { + "epoch": 0.5361372975586174, + "grad_norm": 0.45777989132001196, + "learning_rate": 4.8281655427356375e-06, + "loss": 0.5248, + "step": 1109 + }, + { + "epoch": 0.536620739666425, + "grad_norm": 0.463821530722432, + "learning_rate": 4.820176613424095e-06, + "loss": 0.5302, + "step": 1110 + }, + { + "epoch": 0.5371041817742326, + "grad_norm": 0.44517887708797427, + "learning_rate": 4.812188143755621e-06, + "loss": 0.5214, + "step": 1111 + }, + { + "epoch": 0.5375876238820402, + "grad_norm": 0.5017058167871686, + "learning_rate": 4.80420015414939e-06, + "loss": 0.5305, + "step": 1112 + }, + { + "epoch": 0.5380710659898477, + "grad_norm": 0.4672768776486219, + "learning_rate": 4.796212665023345e-06, + "loss": 0.5237, + "step": 1113 + }, + { + "epoch": 0.5385545080976553, + "grad_norm": 0.4419995316595335, + "learning_rate": 4.788225696794153e-06, + "loss": 0.5277, + "step": 1114 + }, + { + "epoch": 0.5390379502054629, + "grad_norm": 0.44730511943868706, + "learning_rate": 4.780239269877147e-06, + "loss": 0.5313, + "step": 1115 + }, + { + "epoch": 0.5395213923132705, + "grad_norm": 0.45204850110236905, + "learning_rate": 4.7722534046862805e-06, + "loss": 0.5231, + "step": 1116 + }, + { + "epoch": 0.540004834421078, + "grad_norm": 0.45779960123303604, + "learning_rate": 4.764268121634066e-06, + "loss": 0.507, + "step": 1117 + }, + { + "epoch": 0.5404882765288856, + "grad_norm": 0.41695878051285573, + "learning_rate": 4.7562834411315324e-06, + "loss": 0.5042, + "step": 1118 + }, + { + "epoch": 0.5409717186366932, + "grad_norm": 0.4513235919318933, + "learning_rate": 4.748299383588167e-06, + "loss": 0.5258, + "step": 1119 + }, + { + "epoch": 0.5414551607445008, + "grad_norm": 0.467590473541893, + "learning_rate": 4.74031596941186e-06, + "loss": 0.5375, + "step": 1120 + }, + { + "epoch": 0.5419386028523084, + "grad_norm": 0.4524946558422428, + "learning_rate": 4.7323332190088675e-06, + "loss": 0.5199, + "step": 1121 + }, + { + "epoch": 0.542422044960116, + "grad_norm": 0.42113263739650647, + "learning_rate": 4.7243511527837374e-06, + "loss": 0.5251, + "step": 1122 + }, + { + "epoch": 0.5429054870679236, + "grad_norm": 0.4636225245244688, + "learning_rate": 4.716369791139279e-06, + "loss": 0.5308, + "step": 1123 + }, + { + "epoch": 0.5433889291757312, + "grad_norm": 0.4398566927754983, + "learning_rate": 4.708389154476492e-06, + "loss": 0.5201, + "step": 1124 + }, + { + "epoch": 0.5438723712835388, + "grad_norm": 0.43330211663967066, + "learning_rate": 4.7004092631945315e-06, + "loss": 0.5258, + "step": 1125 + }, + { + "epoch": 0.5443558133913464, + "grad_norm": 0.44482177202458967, + "learning_rate": 4.692430137690638e-06, + "loss": 0.5222, + "step": 1126 + }, + { + "epoch": 0.544839255499154, + "grad_norm": 0.4318024796342877, + "learning_rate": 4.684451798360102e-06, + "loss": 0.5204, + "step": 1127 + }, + { + "epoch": 0.5453226976069616, + "grad_norm": 0.45444414518744425, + "learning_rate": 4.6764742655962e-06, + "loss": 0.5255, + "step": 1128 + }, + { + "epoch": 0.5458061397147692, + "grad_norm": 0.4372839038666406, + "learning_rate": 4.6684975597901526e-06, + "loss": 0.5275, + "step": 1129 + }, + { + "epoch": 0.5462895818225767, + "grad_norm": 0.43767287064021165, + "learning_rate": 4.660521701331058e-06, + "loss": 0.5046, + "step": 1130 + }, + { + "epoch": 0.5467730239303843, + "grad_norm": 0.42146624184063447, + "learning_rate": 4.652546710605857e-06, + "loss": 0.5284, + "step": 1131 + }, + { + "epoch": 0.5472564660381919, + "grad_norm": 0.4528806888549099, + "learning_rate": 4.644572607999267e-06, + "loss": 0.5234, + "step": 1132 + }, + { + "epoch": 0.5477399081459995, + "grad_norm": 0.41615032339392954, + "learning_rate": 4.636599413893734e-06, + "loss": 0.5149, + "step": 1133 + }, + { + "epoch": 0.5482233502538071, + "grad_norm": 0.4080780400252472, + "learning_rate": 4.628627148669391e-06, + "loss": 0.5069, + "step": 1134 + }, + { + "epoch": 0.5487067923616147, + "grad_norm": 0.4376373218589361, + "learning_rate": 4.620655832703984e-06, + "loss": 0.5232, + "step": 1135 + }, + { + "epoch": 0.5491902344694223, + "grad_norm": 0.44817086720050736, + "learning_rate": 4.612685486372846e-06, + "loss": 0.5284, + "step": 1136 + }, + { + "epoch": 0.5496736765772299, + "grad_norm": 0.4269717422470433, + "learning_rate": 4.604716130048818e-06, + "loss": 0.5292, + "step": 1137 + }, + { + "epoch": 0.5501571186850375, + "grad_norm": 0.4463889311004707, + "learning_rate": 4.596747784102221e-06, + "loss": 0.5296, + "step": 1138 + }, + { + "epoch": 0.5506405607928451, + "grad_norm": 0.4544805899306494, + "learning_rate": 4.588780468900787e-06, + "loss": 0.5342, + "step": 1139 + }, + { + "epoch": 0.5511240029006527, + "grad_norm": 0.43223974279832084, + "learning_rate": 4.580814204809618e-06, + "loss": 0.5278, + "step": 1140 + }, + { + "epoch": 0.5516074450084603, + "grad_norm": 0.4526101073084036, + "learning_rate": 4.572849012191126e-06, + "loss": 0.5274, + "step": 1141 + }, + { + "epoch": 0.5520908871162679, + "grad_norm": 0.46135315991278786, + "learning_rate": 4.564884911404986e-06, + "loss": 0.5308, + "step": 1142 + }, + { + "epoch": 0.5525743292240755, + "grad_norm": 0.451744932774668, + "learning_rate": 4.5569219228080805e-06, + "loss": 0.5228, + "step": 1143 + }, + { + "epoch": 0.553057771331883, + "grad_norm": 0.4431407162306295, + "learning_rate": 4.54896006675445e-06, + "loss": 0.5001, + "step": 1144 + }, + { + "epoch": 0.5535412134396906, + "grad_norm": 0.4115173802208087, + "learning_rate": 4.540999363595242e-06, + "loss": 0.4963, + "step": 1145 + }, + { + "epoch": 0.5540246555474981, + "grad_norm": 0.418962800898634, + "learning_rate": 4.5330398336786526e-06, + "loss": 0.5277, + "step": 1146 + }, + { + "epoch": 0.5545080976553057, + "grad_norm": 0.46372386433812574, + "learning_rate": 4.525081497349887e-06, + "loss": 0.5427, + "step": 1147 + }, + { + "epoch": 0.5549915397631133, + "grad_norm": 0.4530031866173555, + "learning_rate": 4.517124374951086e-06, + "loss": 0.5178, + "step": 1148 + }, + { + "epoch": 0.5554749818709209, + "grad_norm": 0.440822304311767, + "learning_rate": 4.509168486821304e-06, + "loss": 0.5225, + "step": 1149 + }, + { + "epoch": 0.5559584239787285, + "grad_norm": 0.4127961848352273, + "learning_rate": 4.501213853296425e-06, + "loss": 0.523, + "step": 1150 + }, + { + "epoch": 0.5564418660865361, + "grad_norm": 0.461288293810827, + "learning_rate": 4.493260494709141e-06, + "loss": 0.5251, + "step": 1151 + }, + { + "epoch": 0.5569253081943437, + "grad_norm": 0.43458067348060225, + "learning_rate": 4.48530843138887e-06, + "loss": 0.5419, + "step": 1152 + }, + { + "epoch": 0.5574087503021513, + "grad_norm": 0.42472609884541546, + "learning_rate": 4.477357683661734e-06, + "loss": 0.5318, + "step": 1153 + }, + { + "epoch": 0.5578921924099589, + "grad_norm": 0.4256107384490566, + "learning_rate": 4.469408271850479e-06, + "loss": 0.506, + "step": 1154 + }, + { + "epoch": 0.5583756345177665, + "grad_norm": 0.4098807197744025, + "learning_rate": 4.4614602162744455e-06, + "loss": 0.5152, + "step": 1155 + }, + { + "epoch": 0.5588590766255741, + "grad_norm": 0.4390148331821114, + "learning_rate": 4.453513537249503e-06, + "loss": 0.527, + "step": 1156 + }, + { + "epoch": 0.5593425187333817, + "grad_norm": 0.4711204869094555, + "learning_rate": 4.445568255088003e-06, + "loss": 0.5247, + "step": 1157 + }, + { + "epoch": 0.5598259608411893, + "grad_norm": 0.41653734784347574, + "learning_rate": 4.4376243900987296e-06, + "loss": 0.5232, + "step": 1158 + }, + { + "epoch": 0.5603094029489969, + "grad_norm": 0.44411194102788987, + "learning_rate": 4.429681962586839e-06, + "loss": 0.5365, + "step": 1159 + }, + { + "epoch": 0.5607928450568045, + "grad_norm": 0.44003715852304276, + "learning_rate": 4.421740992853818e-06, + "loss": 0.5311, + "step": 1160 + }, + { + "epoch": 0.5612762871646121, + "grad_norm": 0.43445702749764165, + "learning_rate": 4.413801501197424e-06, + "loss": 0.5192, + "step": 1161 + }, + { + "epoch": 0.5617597292724196, + "grad_norm": 0.4315566299087621, + "learning_rate": 4.405863507911638e-06, + "loss": 0.5007, + "step": 1162 + }, + { + "epoch": 0.5622431713802272, + "grad_norm": 0.3786991552102392, + "learning_rate": 4.3979270332866105e-06, + "loss": 0.4867, + "step": 1163 + }, + { + "epoch": 0.5627266134880348, + "grad_norm": 0.4244074214223604, + "learning_rate": 4.389992097608613e-06, + "loss": 0.5271, + "step": 1164 + }, + { + "epoch": 0.5632100555958424, + "grad_norm": 0.430673286759424, + "learning_rate": 4.3820587211599745e-06, + "loss": 0.5292, + "step": 1165 + }, + { + "epoch": 0.56369349770365, + "grad_norm": 0.46109079230422667, + "learning_rate": 4.374126924219052e-06, + "loss": 0.5197, + "step": 1166 + }, + { + "epoch": 0.5641769398114576, + "grad_norm": 0.4275407630254517, + "learning_rate": 4.366196727060152e-06, + "loss": 0.5196, + "step": 1167 + }, + { + "epoch": 0.5646603819192652, + "grad_norm": 0.4407289737300367, + "learning_rate": 4.3582681499535e-06, + "loss": 0.5206, + "step": 1168 + }, + { + "epoch": 0.5651438240270727, + "grad_norm": 0.4119248969357382, + "learning_rate": 4.3503412131651765e-06, + "loss": 0.5341, + "step": 1169 + }, + { + "epoch": 0.5656272661348803, + "grad_norm": 0.4621359921349015, + "learning_rate": 4.342415936957073e-06, + "loss": 0.5225, + "step": 1170 + }, + { + "epoch": 0.5661107082426879, + "grad_norm": 0.45885127710831636, + "learning_rate": 4.334492341586833e-06, + "loss": 0.5328, + "step": 1171 + }, + { + "epoch": 0.5665941503504955, + "grad_norm": 0.4217251248929531, + "learning_rate": 4.326570447307804e-06, + "loss": 0.5024, + "step": 1172 + }, + { + "epoch": 0.5670775924583031, + "grad_norm": 0.43349297844561585, + "learning_rate": 4.318650274368989e-06, + "loss": 0.5302, + "step": 1173 + }, + { + "epoch": 0.5675610345661107, + "grad_norm": 0.44173213520395654, + "learning_rate": 4.310731843014985e-06, + "loss": 0.518, + "step": 1174 + }, + { + "epoch": 0.5680444766739183, + "grad_norm": 0.44550331806578247, + "learning_rate": 4.302815173485944e-06, + "loss": 0.5262, + "step": 1175 + }, + { + "epoch": 0.5685279187817259, + "grad_norm": 0.41790214183276025, + "learning_rate": 4.294900286017509e-06, + "loss": 0.5249, + "step": 1176 + }, + { + "epoch": 0.5690113608895335, + "grad_norm": 0.4406462454965783, + "learning_rate": 4.286987200840772e-06, + "loss": 0.5399, + "step": 1177 + }, + { + "epoch": 0.5694948029973411, + "grad_norm": 0.42899916006281247, + "learning_rate": 4.279075938182214e-06, + "loss": 0.522, + "step": 1178 + }, + { + "epoch": 0.5699782451051486, + "grad_norm": 0.39425395091405524, + "learning_rate": 4.271166518263662e-06, + "loss": 0.4916, + "step": 1179 + }, + { + "epoch": 0.5704616872129562, + "grad_norm": 0.4325306883401716, + "learning_rate": 4.263258961302232e-06, + "loss": 0.5297, + "step": 1180 + }, + { + "epoch": 0.5709451293207638, + "grad_norm": 0.42524338171666753, + "learning_rate": 4.255353287510272e-06, + "loss": 0.524, + "step": 1181 + }, + { + "epoch": 0.5714285714285714, + "grad_norm": 0.44279478264935357, + "learning_rate": 4.247449517095329e-06, + "loss": 0.5215, + "step": 1182 + }, + { + "epoch": 0.571912013536379, + "grad_norm": 0.4197931530174695, + "learning_rate": 4.239547670260069e-06, + "loss": 0.5099, + "step": 1183 + }, + { + "epoch": 0.5723954556441866, + "grad_norm": 0.4310563818067479, + "learning_rate": 4.231647767202254e-06, + "loss": 0.5191, + "step": 1184 + }, + { + "epoch": 0.5728788977519942, + "grad_norm": 0.42356166524796646, + "learning_rate": 4.223749828114672e-06, + "loss": 0.528, + "step": 1185 + }, + { + "epoch": 0.5733623398598018, + "grad_norm": 0.4095445643557658, + "learning_rate": 4.215853873185093e-06, + "loss": 0.4862, + "step": 1186 + }, + { + "epoch": 0.5738457819676094, + "grad_norm": 0.47015761976525233, + "learning_rate": 4.2079599225962115e-06, + "loss": 0.5183, + "step": 1187 + }, + { + "epoch": 0.574329224075417, + "grad_norm": 0.4175313339147143, + "learning_rate": 4.2000679965256045e-06, + "loss": 0.5045, + "step": 1188 + }, + { + "epoch": 0.5748126661832246, + "grad_norm": 0.43952093100735207, + "learning_rate": 4.192178115145668e-06, + "loss": 0.4942, + "step": 1189 + }, + { + "epoch": 0.5752961082910322, + "grad_norm": 0.41782905847478197, + "learning_rate": 4.184290298623578e-06, + "loss": 0.4962, + "step": 1190 + }, + { + "epoch": 0.5757795503988398, + "grad_norm": 0.4321674737064156, + "learning_rate": 4.176404567121225e-06, + "loss": 0.5397, + "step": 1191 + }, + { + "epoch": 0.5762629925066474, + "grad_norm": 0.4802929727845466, + "learning_rate": 4.16852094079518e-06, + "loss": 0.5249, + "step": 1192 + }, + { + "epoch": 0.576746434614455, + "grad_norm": 0.4829071925445664, + "learning_rate": 4.160639439796624e-06, + "loss": 0.5192, + "step": 1193 + }, + { + "epoch": 0.5772298767222626, + "grad_norm": 0.41462388567526964, + "learning_rate": 4.152760084271305e-06, + "loss": 0.5224, + "step": 1194 + }, + { + "epoch": 0.57771331883007, + "grad_norm": 0.41336785715866026, + "learning_rate": 4.1448828943595e-06, + "loss": 0.531, + "step": 1195 + }, + { + "epoch": 0.5781967609378776, + "grad_norm": 0.4424735299028098, + "learning_rate": 4.1370078901959306e-06, + "loss": 0.5309, + "step": 1196 + }, + { + "epoch": 0.5786802030456852, + "grad_norm": 0.43755728178912967, + "learning_rate": 4.129135091909752e-06, + "loss": 0.5314, + "step": 1197 + }, + { + "epoch": 0.5791636451534928, + "grad_norm": 0.4234445834897067, + "learning_rate": 4.121264519624463e-06, + "loss": 0.4971, + "step": 1198 + }, + { + "epoch": 0.5796470872613004, + "grad_norm": 0.4465261505400547, + "learning_rate": 4.113396193457887e-06, + "loss": 0.5421, + "step": 1199 + }, + { + "epoch": 0.580130529369108, + "grad_norm": 0.43864096822609294, + "learning_rate": 4.105530133522096e-06, + "loss": 0.5286, + "step": 1200 + }, + { + "epoch": 0.5806139714769156, + "grad_norm": 0.44812939972631666, + "learning_rate": 4.0976663599233745e-06, + "loss": 0.5283, + "step": 1201 + }, + { + "epoch": 0.5810974135847232, + "grad_norm": 0.41015688502886277, + "learning_rate": 4.08980489276216e-06, + "loss": 0.5031, + "step": 1202 + }, + { + "epoch": 0.5815808556925308, + "grad_norm": 0.4371592707730246, + "learning_rate": 4.081945752133e-06, + "loss": 0.5247, + "step": 1203 + }, + { + "epoch": 0.5820642978003384, + "grad_norm": 0.4545452075633195, + "learning_rate": 4.074088958124488e-06, + "loss": 0.5233, + "step": 1204 + }, + { + "epoch": 0.582547739908146, + "grad_norm": 0.4034341944937049, + "learning_rate": 4.066234530819222e-06, + "loss": 0.4997, + "step": 1205 + }, + { + "epoch": 0.5830311820159536, + "grad_norm": 0.44158682076629885, + "learning_rate": 4.058382490293755e-06, + "loss": 0.5327, + "step": 1206 + }, + { + "epoch": 0.5835146241237612, + "grad_norm": 0.44126859021633347, + "learning_rate": 4.050532856618532e-06, + "loss": 0.5172, + "step": 1207 + }, + { + "epoch": 0.5839980662315688, + "grad_norm": 0.44983862546632325, + "learning_rate": 4.0426856498578515e-06, + "loss": 0.5321, + "step": 1208 + }, + { + "epoch": 0.5844815083393764, + "grad_norm": 0.4306823960936312, + "learning_rate": 4.034840890069805e-06, + "loss": 0.5286, + "step": 1209 + }, + { + "epoch": 0.584964950447184, + "grad_norm": 0.46647403495000134, + "learning_rate": 4.0269985973062325e-06, + "loss": 0.5205, + "step": 1210 + }, + { + "epoch": 0.5854483925549916, + "grad_norm": 0.43512362259260234, + "learning_rate": 4.019158791612662e-06, + "loss": 0.5186, + "step": 1211 + }, + { + "epoch": 0.5859318346627991, + "grad_norm": 0.42474616162914414, + "learning_rate": 4.0113214930282765e-06, + "loss": 0.5019, + "step": 1212 + }, + { + "epoch": 0.5864152767706067, + "grad_norm": 0.4496255266024326, + "learning_rate": 4.003486721585834e-06, + "loss": 0.5292, + "step": 1213 + }, + { + "epoch": 0.5868987188784143, + "grad_norm": 0.4692064186214037, + "learning_rate": 3.995654497311649e-06, + "loss": 0.524, + "step": 1214 + }, + { + "epoch": 0.5873821609862219, + "grad_norm": 0.47446797627391324, + "learning_rate": 3.987824840225512e-06, + "loss": 0.5296, + "step": 1215 + }, + { + "epoch": 0.5878656030940295, + "grad_norm": 0.45351628397562704, + "learning_rate": 3.979997770340664e-06, + "loss": 0.5191, + "step": 1216 + }, + { + "epoch": 0.5883490452018371, + "grad_norm": 0.4469658212338775, + "learning_rate": 3.972173307663721e-06, + "loss": 0.5259, + "step": 1217 + }, + { + "epoch": 0.5888324873096447, + "grad_norm": 0.42106841746243373, + "learning_rate": 3.964351472194642e-06, + "loss": 0.5169, + "step": 1218 + }, + { + "epoch": 0.5893159294174523, + "grad_norm": 0.46081869250650287, + "learning_rate": 3.95653228392667e-06, + "loss": 0.5324, + "step": 1219 + }, + { + "epoch": 0.5897993715252599, + "grad_norm": 0.43787433991965447, + "learning_rate": 3.9487157628462784e-06, + "loss": 0.5253, + "step": 1220 + }, + { + "epoch": 0.5902828136330674, + "grad_norm": 0.4446710684339124, + "learning_rate": 3.940901928933127e-06, + "loss": 0.5207, + "step": 1221 + }, + { + "epoch": 0.590766255740875, + "grad_norm": 0.4878921355151572, + "learning_rate": 3.933090802160004e-06, + "loss": 0.5216, + "step": 1222 + }, + { + "epoch": 0.5912496978486826, + "grad_norm": 0.43014793362728476, + "learning_rate": 3.925282402492779e-06, + "loss": 0.5158, + "step": 1223 + }, + { + "epoch": 0.5917331399564902, + "grad_norm": 0.4785437280134671, + "learning_rate": 3.917476749890351e-06, + "loss": 0.5337, + "step": 1224 + }, + { + "epoch": 0.5922165820642978, + "grad_norm": 0.4374190045344671, + "learning_rate": 3.909673864304597e-06, + "loss": 0.528, + "step": 1225 + }, + { + "epoch": 0.5927000241721054, + "grad_norm": 0.4547685840805692, + "learning_rate": 3.901873765680322e-06, + "loss": 0.5366, + "step": 1226 + }, + { + "epoch": 0.593183466279913, + "grad_norm": 0.4594395711536057, + "learning_rate": 3.894076473955207e-06, + "loss": 0.5202, + "step": 1227 + }, + { + "epoch": 0.5936669083877205, + "grad_norm": 0.5197830445285274, + "learning_rate": 3.886282009059757e-06, + "loss": 0.5293, + "step": 1228 + }, + { + "epoch": 0.5941503504955281, + "grad_norm": 0.438870970435199, + "learning_rate": 3.878490390917253e-06, + "loss": 0.521, + "step": 1229 + }, + { + "epoch": 0.5946337926033357, + "grad_norm": 0.43955571970066076, + "learning_rate": 3.8707016394436985e-06, + "loss": 0.5219, + "step": 1230 + }, + { + "epoch": 0.5951172347111433, + "grad_norm": 0.4319400896121196, + "learning_rate": 3.86291577454777e-06, + "loss": 0.5274, + "step": 1231 + }, + { + "epoch": 0.5956006768189509, + "grad_norm": 0.42866945423374303, + "learning_rate": 3.855132816130767e-06, + "loss": 0.5287, + "step": 1232 + }, + { + "epoch": 0.5960841189267585, + "grad_norm": 0.4905566924544953, + "learning_rate": 3.847352784086556e-06, + "loss": 0.5214, + "step": 1233 + }, + { + "epoch": 0.5965675610345661, + "grad_norm": 0.4815238783873125, + "learning_rate": 3.839575698301529e-06, + "loss": 0.5348, + "step": 1234 + }, + { + "epoch": 0.5970510031423737, + "grad_norm": 0.4611545386580029, + "learning_rate": 3.831801578654541e-06, + "loss": 0.521, + "step": 1235 + }, + { + "epoch": 0.5975344452501813, + "grad_norm": 0.44138236799554953, + "learning_rate": 3.8240304450168716e-06, + "loss": 0.5012, + "step": 1236 + }, + { + "epoch": 0.5980178873579889, + "grad_norm": 0.4729557809805851, + "learning_rate": 3.8162623172521615e-06, + "loss": 0.5239, + "step": 1237 + }, + { + "epoch": 0.5985013294657965, + "grad_norm": 0.47751713396917145, + "learning_rate": 3.808497215216374e-06, + "loss": 0.518, + "step": 1238 + }, + { + "epoch": 0.5989847715736041, + "grad_norm": 0.4632727328684821, + "learning_rate": 3.8007351587577342e-06, + "loss": 0.5212, + "step": 1239 + }, + { + "epoch": 0.5994682136814117, + "grad_norm": 0.43452448731632676, + "learning_rate": 3.7929761677166847e-06, + "loss": 0.5256, + "step": 1240 + }, + { + "epoch": 0.5999516557892193, + "grad_norm": 0.41707858133064807, + "learning_rate": 3.7852202619258327e-06, + "loss": 0.5258, + "step": 1241 + }, + { + "epoch": 0.6004350978970269, + "grad_norm": 0.42484738876263983, + "learning_rate": 3.777467461209895e-06, + "loss": 0.5226, + "step": 1242 + }, + { + "epoch": 0.6009185400048345, + "grad_norm": 0.46278022630826876, + "learning_rate": 3.76971778538566e-06, + "loss": 0.5265, + "step": 1243 + }, + { + "epoch": 0.6014019821126421, + "grad_norm": 0.4136178806145892, + "learning_rate": 3.76197125426192e-06, + "loss": 0.521, + "step": 1244 + }, + { + "epoch": 0.6018854242204495, + "grad_norm": 0.42100500706131366, + "learning_rate": 3.754227887639434e-06, + "loss": 0.5119, + "step": 1245 + }, + { + "epoch": 0.6023688663282571, + "grad_norm": 0.4123940096542578, + "learning_rate": 3.7464877053108706e-06, + "loss": 0.5258, + "step": 1246 + }, + { + "epoch": 0.6028523084360647, + "grad_norm": 0.4195138029502561, + "learning_rate": 3.7387507270607617e-06, + "loss": 0.529, + "step": 1247 + }, + { + "epoch": 0.6033357505438723, + "grad_norm": 0.45482061749805036, + "learning_rate": 3.7310169726654444e-06, + "loss": 0.528, + "step": 1248 + }, + { + "epoch": 0.6038191926516799, + "grad_norm": 0.4371974850319641, + "learning_rate": 3.7232864618930217e-06, + "loss": 0.5182, + "step": 1249 + }, + { + "epoch": 0.6043026347594875, + "grad_norm": 0.43099991632770085, + "learning_rate": 3.715559214503298e-06, + "loss": 0.5133, + "step": 1250 + }, + { + "epoch": 0.6047860768672951, + "grad_norm": 0.4048318396541149, + "learning_rate": 3.707835250247745e-06, + "loss": 0.4877, + "step": 1251 + }, + { + "epoch": 0.6052695189751027, + "grad_norm": 0.43040118158566426, + "learning_rate": 3.7001145888694335e-06, + "loss": 0.5256, + "step": 1252 + }, + { + "epoch": 0.6057529610829103, + "grad_norm": 0.43151561170744174, + "learning_rate": 3.6923972501029996e-06, + "loss": 0.5028, + "step": 1253 + }, + { + "epoch": 0.6062364031907179, + "grad_norm": 0.4168861988285146, + "learning_rate": 3.684683253674583e-06, + "loss": 0.5249, + "step": 1254 + }, + { + "epoch": 0.6067198452985255, + "grad_norm": 0.4341349343804055, + "learning_rate": 3.676972619301776e-06, + "loss": 0.5119, + "step": 1255 + }, + { + "epoch": 0.6072032874063331, + "grad_norm": 0.45716952276328937, + "learning_rate": 3.6692653666935875e-06, + "loss": 0.5262, + "step": 1256 + }, + { + "epoch": 0.6076867295141407, + "grad_norm": 0.4304219021067875, + "learning_rate": 3.6615615155503703e-06, + "loss": 0.5168, + "step": 1257 + }, + { + "epoch": 0.6081701716219483, + "grad_norm": 0.4098861167543977, + "learning_rate": 3.6538610855637953e-06, + "loss": 0.5193, + "step": 1258 + }, + { + "epoch": 0.6086536137297559, + "grad_norm": 0.45344600794993284, + "learning_rate": 3.6461640964167755e-06, + "loss": 0.5213, + "step": 1259 + }, + { + "epoch": 0.6091370558375635, + "grad_norm": 0.408785994340013, + "learning_rate": 3.638470567783442e-06, + "loss": 0.4982, + "step": 1260 + }, + { + "epoch": 0.609620497945371, + "grad_norm": 0.4481815917202834, + "learning_rate": 3.630780519329069e-06, + "loss": 0.5329, + "step": 1261 + }, + { + "epoch": 0.6101039400531786, + "grad_norm": 0.4271346926738626, + "learning_rate": 3.623093970710043e-06, + "loss": 0.5278, + "step": 1262 + }, + { + "epoch": 0.6105873821609862, + "grad_norm": 0.4443519643294819, + "learning_rate": 3.615410941573799e-06, + "loss": 0.5358, + "step": 1263 + }, + { + "epoch": 0.6110708242687938, + "grad_norm": 0.41093410035963396, + "learning_rate": 3.607731451558783e-06, + "loss": 0.4973, + "step": 1264 + }, + { + "epoch": 0.6115542663766014, + "grad_norm": 0.4526917508007144, + "learning_rate": 3.6000555202943872e-06, + "loss": 0.5223, + "step": 1265 + }, + { + "epoch": 0.612037708484409, + "grad_norm": 0.458600770079082, + "learning_rate": 3.59238316740091e-06, + "loss": 0.5206, + "step": 1266 + }, + { + "epoch": 0.6125211505922166, + "grad_norm": 0.4390522720994981, + "learning_rate": 3.584714412489506e-06, + "loss": 0.5306, + "step": 1267 + }, + { + "epoch": 0.6130045927000242, + "grad_norm": 0.41799452227515504, + "learning_rate": 3.5770492751621292e-06, + "loss": 0.5029, + "step": 1268 + }, + { + "epoch": 0.6134880348078318, + "grad_norm": 0.43636835439611227, + "learning_rate": 3.5693877750114903e-06, + "loss": 0.5167, + "step": 1269 + }, + { + "epoch": 0.6139714769156394, + "grad_norm": 0.43211619164840076, + "learning_rate": 3.5617299316209984e-06, + "loss": 0.5049, + "step": 1270 + }, + { + "epoch": 0.614454919023447, + "grad_norm": 0.42547953690955836, + "learning_rate": 3.5540757645647217e-06, + "loss": 0.4939, + "step": 1271 + }, + { + "epoch": 0.6149383611312546, + "grad_norm": 0.4170182860561763, + "learning_rate": 3.546425293407324e-06, + "loss": 0.5199, + "step": 1272 + }, + { + "epoch": 0.6154218032390621, + "grad_norm": 0.4222424350681242, + "learning_rate": 3.5387785377040316e-06, + "loss": 0.5132, + "step": 1273 + }, + { + "epoch": 0.6159052453468697, + "grad_norm": 0.4552864030500758, + "learning_rate": 3.531135517000561e-06, + "loss": 0.5269, + "step": 1274 + }, + { + "epoch": 0.6163886874546773, + "grad_norm": 0.43855551812482985, + "learning_rate": 3.523496250833098e-06, + "loss": 0.5122, + "step": 1275 + }, + { + "epoch": 0.6168721295624849, + "grad_norm": 0.46449768544610603, + "learning_rate": 3.515860758728214e-06, + "loss": 0.5234, + "step": 1276 + }, + { + "epoch": 0.6173555716702925, + "grad_norm": 0.46363731713711515, + "learning_rate": 3.5082290602028492e-06, + "loss": 0.5269, + "step": 1277 + }, + { + "epoch": 0.6178390137781, + "grad_norm": 0.42830833200680596, + "learning_rate": 3.5006011747642366e-06, + "loss": 0.5177, + "step": 1278 + }, + { + "epoch": 0.6183224558859076, + "grad_norm": 0.45403297568672957, + "learning_rate": 3.492977121909865e-06, + "loss": 0.5329, + "step": 1279 + }, + { + "epoch": 0.6188058979937152, + "grad_norm": 0.4707193517755616, + "learning_rate": 3.4853569211274306e-06, + "loss": 0.5275, + "step": 1280 + }, + { + "epoch": 0.6192893401015228, + "grad_norm": 0.4382585499371175, + "learning_rate": 3.4777405918947795e-06, + "loss": 0.5117, + "step": 1281 + }, + { + "epoch": 0.6197727822093304, + "grad_norm": 0.43247243116721396, + "learning_rate": 3.4701281536798638e-06, + "loss": 0.5274, + "step": 1282 + }, + { + "epoch": 0.620256224317138, + "grad_norm": 0.4457194202455219, + "learning_rate": 3.462519625940688e-06, + "loss": 0.5282, + "step": 1283 + }, + { + "epoch": 0.6207396664249456, + "grad_norm": 0.45208181214242377, + "learning_rate": 3.4549150281252635e-06, + "loss": 0.5224, + "step": 1284 + }, + { + "epoch": 0.6212231085327532, + "grad_norm": 0.43419084686544124, + "learning_rate": 3.4473143796715537e-06, + "loss": 0.5221, + "step": 1285 + }, + { + "epoch": 0.6217065506405608, + "grad_norm": 0.45834613229050314, + "learning_rate": 3.4397177000074307e-06, + "loss": 0.5286, + "step": 1286 + }, + { + "epoch": 0.6221899927483684, + "grad_norm": 0.4551713196802731, + "learning_rate": 3.4321250085506174e-06, + "loss": 0.519, + "step": 1287 + }, + { + "epoch": 0.622673434856176, + "grad_norm": 0.45716705644935435, + "learning_rate": 3.4245363247086477e-06, + "loss": 0.5291, + "step": 1288 + }, + { + "epoch": 0.6231568769639836, + "grad_norm": 0.4214582611290155, + "learning_rate": 3.4169516678788096e-06, + "loss": 0.5084, + "step": 1289 + }, + { + "epoch": 0.6236403190717912, + "grad_norm": 0.43304150590207136, + "learning_rate": 3.4093710574480926e-06, + "loss": 0.5181, + "step": 1290 + }, + { + "epoch": 0.6241237611795988, + "grad_norm": 0.4201723521255349, + "learning_rate": 3.4017945127931517e-06, + "loss": 0.5215, + "step": 1291 + }, + { + "epoch": 0.6246072032874064, + "grad_norm": 0.43513341637312203, + "learning_rate": 3.394222053280245e-06, + "loss": 0.5219, + "step": 1292 + }, + { + "epoch": 0.625090645395214, + "grad_norm": 0.41778354176218346, + "learning_rate": 3.386653698265189e-06, + "loss": 0.5295, + "step": 1293 + }, + { + "epoch": 0.6255740875030215, + "grad_norm": 0.4289158469228602, + "learning_rate": 3.3790894670933096e-06, + "loss": 0.4993, + "step": 1294 + }, + { + "epoch": 0.626057529610829, + "grad_norm": 0.4362452363722638, + "learning_rate": 3.3715293790993906e-06, + "loss": 0.5212, + "step": 1295 + }, + { + "epoch": 0.6265409717186367, + "grad_norm": 0.4287020140556269, + "learning_rate": 3.3639734536076263e-06, + "loss": 0.5145, + "step": 1296 + }, + { + "epoch": 0.6270244138264442, + "grad_norm": 0.4371619386426176, + "learning_rate": 3.356421709931573e-06, + "loss": 0.5263, + "step": 1297 + }, + { + "epoch": 0.6275078559342518, + "grad_norm": 0.4558196176944881, + "learning_rate": 3.348874167374093e-06, + "loss": 0.5193, + "step": 1298 + }, + { + "epoch": 0.6279912980420594, + "grad_norm": 0.433400379634257, + "learning_rate": 3.341330845227316e-06, + "loss": 0.5342, + "step": 1299 + }, + { + "epoch": 0.628474740149867, + "grad_norm": 0.44874534866829735, + "learning_rate": 3.33379176277258e-06, + "loss": 0.5192, + "step": 1300 + }, + { + "epoch": 0.6289581822576746, + "grad_norm": 0.42814892447345076, + "learning_rate": 3.326256939280389e-06, + "loss": 0.5222, + "step": 1301 + }, + { + "epoch": 0.6294416243654822, + "grad_norm": 0.4175511142456795, + "learning_rate": 3.3187263940103587e-06, + "loss": 0.5008, + "step": 1302 + }, + { + "epoch": 0.6299250664732898, + "grad_norm": 0.43012465279292367, + "learning_rate": 3.3112001462111666e-06, + "loss": 0.5252, + "step": 1303 + }, + { + "epoch": 0.6304085085810974, + "grad_norm": 0.4343943982087317, + "learning_rate": 3.3036782151205134e-06, + "loss": 0.5269, + "step": 1304 + }, + { + "epoch": 0.630891950688905, + "grad_norm": 0.4399879585469454, + "learning_rate": 3.296160619965056e-06, + "loss": 0.5348, + "step": 1305 + }, + { + "epoch": 0.6313753927967126, + "grad_norm": 0.41469773355380823, + "learning_rate": 3.2886473799603793e-06, + "loss": 0.5315, + "step": 1306 + }, + { + "epoch": 0.6318588349045202, + "grad_norm": 0.4118723735223055, + "learning_rate": 3.2811385143109254e-06, + "loss": 0.5199, + "step": 1307 + }, + { + "epoch": 0.6323422770123278, + "grad_norm": 0.42911493217824326, + "learning_rate": 3.2736340422099633e-06, + "loss": 0.5287, + "step": 1308 + }, + { + "epoch": 0.6328257191201354, + "grad_norm": 0.42229266051589764, + "learning_rate": 3.2661339828395263e-06, + "loss": 0.5034, + "step": 1309 + }, + { + "epoch": 0.633309161227943, + "grad_norm": 0.4171629681078508, + "learning_rate": 3.2586383553703723e-06, + "loss": 0.5169, + "step": 1310 + }, + { + "epoch": 0.6337926033357505, + "grad_norm": 0.4335972937122555, + "learning_rate": 3.2511471789619274e-06, + "loss": 0.5084, + "step": 1311 + }, + { + "epoch": 0.6342760454435581, + "grad_norm": 0.42465524204681926, + "learning_rate": 3.2436604727622447e-06, + "loss": 0.5126, + "step": 1312 + }, + { + "epoch": 0.6347594875513657, + "grad_norm": 0.4075010185296818, + "learning_rate": 3.2361782559079465e-06, + "loss": 0.5158, + "step": 1313 + }, + { + "epoch": 0.6352429296591733, + "grad_norm": 0.4308541178278502, + "learning_rate": 3.228700547524184e-06, + "loss": 0.5145, + "step": 1314 + }, + { + "epoch": 0.6357263717669809, + "grad_norm": 0.428226473042128, + "learning_rate": 3.221227366724581e-06, + "loss": 0.5146, + "step": 1315 + }, + { + "epoch": 0.6362098138747885, + "grad_norm": 0.4299253923140349, + "learning_rate": 3.2137587326111896e-06, + "loss": 0.5207, + "step": 1316 + }, + { + "epoch": 0.6366932559825961, + "grad_norm": 0.4181473728080887, + "learning_rate": 3.206294664274443e-06, + "loss": 0.5268, + "step": 1317 + }, + { + "epoch": 0.6371766980904037, + "grad_norm": 0.4474913351660176, + "learning_rate": 3.198835180793097e-06, + "loss": 0.5277, + "step": 1318 + }, + { + "epoch": 0.6376601401982113, + "grad_norm": 0.432676302059824, + "learning_rate": 3.1913803012341987e-06, + "loss": 0.5195, + "step": 1319 + }, + { + "epoch": 0.6381435823060189, + "grad_norm": 0.44131963361999216, + "learning_rate": 3.183930044653014e-06, + "loss": 0.5157, + "step": 1320 + }, + { + "epoch": 0.6386270244138265, + "grad_norm": 0.4300971230394046, + "learning_rate": 3.176484430093007e-06, + "loss": 0.5312, + "step": 1321 + }, + { + "epoch": 0.6391104665216341, + "grad_norm": 0.44998544162450493, + "learning_rate": 3.1690434765857604e-06, + "loss": 0.523, + "step": 1322 + }, + { + "epoch": 0.6395939086294417, + "grad_norm": 0.4472778564601584, + "learning_rate": 3.1616072031509594e-06, + "loss": 0.5155, + "step": 1323 + }, + { + "epoch": 0.6400773507372493, + "grad_norm": 0.4278886331070072, + "learning_rate": 3.154175628796311e-06, + "loss": 0.5214, + "step": 1324 + }, + { + "epoch": 0.6405607928450568, + "grad_norm": 0.4354238172365612, + "learning_rate": 3.146748772517523e-06, + "loss": 0.5202, + "step": 1325 + }, + { + "epoch": 0.6410442349528644, + "grad_norm": 0.48506314375368076, + "learning_rate": 3.139326653298236e-06, + "loss": 0.5226, + "step": 1326 + }, + { + "epoch": 0.6415276770606719, + "grad_norm": 0.42689588677759766, + "learning_rate": 3.1319092901099847e-06, + "loss": 0.5192, + "step": 1327 + }, + { + "epoch": 0.6420111191684795, + "grad_norm": 0.44087925863432936, + "learning_rate": 3.1244967019121496e-06, + "loss": 0.5127, + "step": 1328 + }, + { + "epoch": 0.6424945612762871, + "grad_norm": 0.42605880529211515, + "learning_rate": 3.117088907651902e-06, + "loss": 0.5176, + "step": 1329 + }, + { + "epoch": 0.6429780033840947, + "grad_norm": 0.42151112378627853, + "learning_rate": 3.109685926264161e-06, + "loss": 0.5165, + "step": 1330 + }, + { + "epoch": 0.6434614454919023, + "grad_norm": 0.4268979096606091, + "learning_rate": 3.102287776671544e-06, + "loss": 0.5339, + "step": 1331 + }, + { + "epoch": 0.6439448875997099, + "grad_norm": 0.4392831430463705, + "learning_rate": 3.094894477784318e-06, + "loss": 0.5166, + "step": 1332 + }, + { + "epoch": 0.6444283297075175, + "grad_norm": 0.4146076328411298, + "learning_rate": 3.0875060485003496e-06, + "loss": 0.5274, + "step": 1333 + }, + { + "epoch": 0.6449117718153251, + "grad_norm": 0.42157017978455763, + "learning_rate": 3.080122507705062e-06, + "loss": 0.5243, + "step": 1334 + }, + { + "epoch": 0.6453952139231327, + "grad_norm": 0.41448886842899935, + "learning_rate": 3.0727438742713766e-06, + "loss": 0.4982, + "step": 1335 + }, + { + "epoch": 0.6458786560309403, + "grad_norm": 0.4311221980804021, + "learning_rate": 3.0653701670596805e-06, + "loss": 0.5222, + "step": 1336 + }, + { + "epoch": 0.6463620981387479, + "grad_norm": 0.42860321112689415, + "learning_rate": 3.0580014049177566e-06, + "loss": 0.5203, + "step": 1337 + }, + { + "epoch": 0.6468455402465555, + "grad_norm": 0.42363404336164073, + "learning_rate": 3.0506376066807632e-06, + "loss": 0.5131, + "step": 1338 + }, + { + "epoch": 0.6473289823543631, + "grad_norm": 0.4352925336415172, + "learning_rate": 3.0432787911711553e-06, + "loss": 0.5244, + "step": 1339 + }, + { + "epoch": 0.6478124244621707, + "grad_norm": 0.4213888881156135, + "learning_rate": 3.0359249771986605e-06, + "loss": 0.5114, + "step": 1340 + }, + { + "epoch": 0.6482958665699783, + "grad_norm": 0.44310700039649703, + "learning_rate": 3.028576183560221e-06, + "loss": 0.5345, + "step": 1341 + }, + { + "epoch": 0.6487793086777859, + "grad_norm": 0.46384351518694394, + "learning_rate": 3.021232429039944e-06, + "loss": 0.5103, + "step": 1342 + }, + { + "epoch": 0.6492627507855935, + "grad_norm": 0.420074285156663, + "learning_rate": 3.01389373240906e-06, + "loss": 0.5307, + "step": 1343 + }, + { + "epoch": 0.649746192893401, + "grad_norm": 0.46717936599561755, + "learning_rate": 3.006560112425867e-06, + "loss": 0.5146, + "step": 1344 + }, + { + "epoch": 0.6502296350012086, + "grad_norm": 0.4411181920960548, + "learning_rate": 2.999231587835691e-06, + "loss": 0.5113, + "step": 1345 + }, + { + "epoch": 0.6507130771090162, + "grad_norm": 0.4353633086273451, + "learning_rate": 2.9919081773708293e-06, + "loss": 0.5195, + "step": 1346 + }, + { + "epoch": 0.6511965192168238, + "grad_norm": 0.4555577360086876, + "learning_rate": 2.9845898997505102e-06, + "loss": 0.5201, + "step": 1347 + }, + { + "epoch": 0.6516799613246314, + "grad_norm": 0.43234112941806857, + "learning_rate": 2.9772767736808406e-06, + "loss": 0.5194, + "step": 1348 + }, + { + "epoch": 0.652163403432439, + "grad_norm": 0.43499867909496204, + "learning_rate": 2.9699688178547615e-06, + "loss": 0.5252, + "step": 1349 + }, + { + "epoch": 0.6526468455402465, + "grad_norm": 0.4243771527145887, + "learning_rate": 2.962666050951997e-06, + "loss": 0.5122, + "step": 1350 + }, + { + "epoch": 0.6531302876480541, + "grad_norm": 0.4158873835022681, + "learning_rate": 2.9553684916390053e-06, + "loss": 0.5092, + "step": 1351 + }, + { + "epoch": 0.6536137297558617, + "grad_norm": 0.4494940146550669, + "learning_rate": 2.948076158568939e-06, + "loss": 0.5256, + "step": 1352 + }, + { + "epoch": 0.6540971718636693, + "grad_norm": 0.39860657725442444, + "learning_rate": 2.940789070381587e-06, + "loss": 0.5001, + "step": 1353 + }, + { + "epoch": 0.6545806139714769, + "grad_norm": 0.41241371956595596, + "learning_rate": 2.933507245703335e-06, + "loss": 0.5174, + "step": 1354 + }, + { + "epoch": 0.6550640560792845, + "grad_norm": 0.41370768103275585, + "learning_rate": 2.9262307031471132e-06, + "loss": 0.5142, + "step": 1355 + }, + { + "epoch": 0.6555474981870921, + "grad_norm": 0.4457235721009106, + "learning_rate": 2.918959461312353e-06, + "loss": 0.5212, + "step": 1356 + }, + { + "epoch": 0.6560309402948997, + "grad_norm": 0.44189033261031596, + "learning_rate": 2.911693538784931e-06, + "loss": 0.5315, + "step": 1357 + }, + { + "epoch": 0.6565143824027073, + "grad_norm": 0.417441027459776, + "learning_rate": 2.904432954137136e-06, + "loss": 0.5197, + "step": 1358 + }, + { + "epoch": 0.6569978245105149, + "grad_norm": 0.41446617019595194, + "learning_rate": 2.897177725927599e-06, + "loss": 0.4977, + "step": 1359 + }, + { + "epoch": 0.6574812666183224, + "grad_norm": 0.424230216094722, + "learning_rate": 2.889927872701278e-06, + "loss": 0.5319, + "step": 1360 + }, + { + "epoch": 0.65796470872613, + "grad_norm": 0.42985260891150956, + "learning_rate": 2.8826834129893755e-06, + "loss": 0.5166, + "step": 1361 + }, + { + "epoch": 0.6584481508339376, + "grad_norm": 0.38447115699767576, + "learning_rate": 2.8754443653093186e-06, + "loss": 0.4786, + "step": 1362 + }, + { + "epoch": 0.6589315929417452, + "grad_norm": 0.4044201835297723, + "learning_rate": 2.8682107481646915e-06, + "loss": 0.5216, + "step": 1363 + }, + { + "epoch": 0.6594150350495528, + "grad_norm": 0.4022645350617216, + "learning_rate": 2.8609825800452063e-06, + "loss": 0.4988, + "step": 1364 + }, + { + "epoch": 0.6598984771573604, + "grad_norm": 0.4401229027571195, + "learning_rate": 2.853759879426644e-06, + "loss": 0.5181, + "step": 1365 + }, + { + "epoch": 0.660381919265168, + "grad_norm": 0.42349548120906483, + "learning_rate": 2.8465426647708067e-06, + "loss": 0.5163, + "step": 1366 + }, + { + "epoch": 0.6608653613729756, + "grad_norm": 0.40013997451662586, + "learning_rate": 2.8393309545254776e-06, + "loss": 0.5214, + "step": 1367 + }, + { + "epoch": 0.6613488034807832, + "grad_norm": 0.4359244125864156, + "learning_rate": 2.8321247671243695e-06, + "loss": 0.5179, + "step": 1368 + }, + { + "epoch": 0.6618322455885908, + "grad_norm": 0.41425895295471055, + "learning_rate": 2.82492412098708e-06, + "loss": 0.5081, + "step": 1369 + }, + { + "epoch": 0.6623156876963984, + "grad_norm": 0.4210065663342879, + "learning_rate": 2.8177290345190387e-06, + "loss": 0.5194, + "step": 1370 + }, + { + "epoch": 0.662799129804206, + "grad_norm": 0.4028980901393777, + "learning_rate": 2.8105395261114666e-06, + "loss": 0.5234, + "step": 1371 + }, + { + "epoch": 0.6632825719120136, + "grad_norm": 0.4325922757476261, + "learning_rate": 2.803355614141327e-06, + "loss": 0.5188, + "step": 1372 + }, + { + "epoch": 0.6637660140198212, + "grad_norm": 0.4308186918740408, + "learning_rate": 2.7961773169712803e-06, + "loss": 0.5125, + "step": 1373 + }, + { + "epoch": 0.6642494561276288, + "grad_norm": 0.4211885259856405, + "learning_rate": 2.7890046529496284e-06, + "loss": 0.5233, + "step": 1374 + }, + { + "epoch": 0.6647328982354364, + "grad_norm": 0.4304676159038956, + "learning_rate": 2.7818376404102832e-06, + "loss": 0.5188, + "step": 1375 + }, + { + "epoch": 0.665216340343244, + "grad_norm": 0.4137521174014562, + "learning_rate": 2.774676297672701e-06, + "loss": 0.5248, + "step": 1376 + }, + { + "epoch": 0.6656997824510514, + "grad_norm": 0.4389331875357886, + "learning_rate": 2.7675206430418542e-06, + "loss": 0.5265, + "step": 1377 + }, + { + "epoch": 0.666183224558859, + "grad_norm": 0.46429330512304384, + "learning_rate": 2.7603706948081745e-06, + "loss": 0.5211, + "step": 1378 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.4260734411731187, + "learning_rate": 2.753226471247501e-06, + "loss": 0.517, + "step": 1379 + }, + { + "epoch": 0.6671501087744742, + "grad_norm": 0.4189810127916622, + "learning_rate": 2.7460879906210485e-06, + "loss": 0.5107, + "step": 1380 + }, + { + "epoch": 0.6676335508822818, + "grad_norm": 0.41204910620329505, + "learning_rate": 2.7389552711753477e-06, + "loss": 0.5191, + "step": 1381 + }, + { + "epoch": 0.6681169929900894, + "grad_norm": 0.4267680612975131, + "learning_rate": 2.731828331142207e-06, + "loss": 0.5128, + "step": 1382 + }, + { + "epoch": 0.668600435097897, + "grad_norm": 0.42901984315752384, + "learning_rate": 2.7247071887386544e-06, + "loss": 0.5257, + "step": 1383 + }, + { + "epoch": 0.6690838772057046, + "grad_norm": 0.4146728225846163, + "learning_rate": 2.7175918621669074e-06, + "loss": 0.5184, + "step": 1384 + }, + { + "epoch": 0.6695673193135122, + "grad_norm": 0.4782489091382579, + "learning_rate": 2.7104823696143136e-06, + "loss": 0.5298, + "step": 1385 + }, + { + "epoch": 0.6700507614213198, + "grad_norm": 0.40703765978893935, + "learning_rate": 2.70337872925331e-06, + "loss": 0.5111, + "step": 1386 + }, + { + "epoch": 0.6705342035291274, + "grad_norm": 0.4109547447766556, + "learning_rate": 2.6962809592413726e-06, + "loss": 0.5002, + "step": 1387 + }, + { + "epoch": 0.671017645636935, + "grad_norm": 0.4498968198632276, + "learning_rate": 2.6891890777209696e-06, + "loss": 0.5256, + "step": 1388 + }, + { + "epoch": 0.6715010877447426, + "grad_norm": 0.42208190857564254, + "learning_rate": 2.68210310281953e-06, + "loss": 0.5193, + "step": 1389 + }, + { + "epoch": 0.6719845298525502, + "grad_norm": 0.41822528698390377, + "learning_rate": 2.67502305264937e-06, + "loss": 0.5163, + "step": 1390 + }, + { + "epoch": 0.6724679719603578, + "grad_norm": 0.4218034674050614, + "learning_rate": 2.667948945307674e-06, + "loss": 0.5174, + "step": 1391 + }, + { + "epoch": 0.6729514140681654, + "grad_norm": 0.4206471334382422, + "learning_rate": 2.6608807988764252e-06, + "loss": 0.4936, + "step": 1392 + }, + { + "epoch": 0.6734348561759729, + "grad_norm": 0.42181885072694014, + "learning_rate": 2.653818631422378e-06, + "loss": 0.5138, + "step": 1393 + }, + { + "epoch": 0.6739182982837805, + "grad_norm": 0.4261589725068296, + "learning_rate": 2.6467624609970005e-06, + "loss": 0.5145, + "step": 1394 + }, + { + "epoch": 0.6744017403915881, + "grad_norm": 0.40519700853309554, + "learning_rate": 2.6397123056364364e-06, + "loss": 0.5013, + "step": 1395 + }, + { + "epoch": 0.6748851824993957, + "grad_norm": 0.4510436140721377, + "learning_rate": 2.6326681833614464e-06, + "loss": 0.5184, + "step": 1396 + }, + { + "epoch": 0.6753686246072033, + "grad_norm": 0.45003681113297744, + "learning_rate": 2.6256301121773775e-06, + "loss": 0.5149, + "step": 1397 + }, + { + "epoch": 0.6758520667150109, + "grad_norm": 0.4263810181960221, + "learning_rate": 2.618598110074105e-06, + "loss": 0.5115, + "step": 1398 + }, + { + "epoch": 0.6763355088228185, + "grad_norm": 0.4263005297393967, + "learning_rate": 2.6115721950259977e-06, + "loss": 0.5243, + "step": 1399 + }, + { + "epoch": 0.676818950930626, + "grad_norm": 0.4563967376255983, + "learning_rate": 2.6045523849918553e-06, + "loss": 0.5314, + "step": 1400 + }, + { + "epoch": 0.6773023930384336, + "grad_norm": 0.4478593986013541, + "learning_rate": 2.5975386979148792e-06, + "loss": 0.5179, + "step": 1401 + }, + { + "epoch": 0.6777858351462412, + "grad_norm": 0.40707261007936574, + "learning_rate": 2.590531151722622e-06, + "loss": 0.5165, + "step": 1402 + }, + { + "epoch": 0.6782692772540488, + "grad_norm": 0.45689999209163507, + "learning_rate": 2.5835297643269326e-06, + "loss": 0.5212, + "step": 1403 + }, + { + "epoch": 0.6787527193618564, + "grad_norm": 0.41521250872284, + "learning_rate": 2.576534553623925e-06, + "loss": 0.5197, + "step": 1404 + }, + { + "epoch": 0.679236161469664, + "grad_norm": 0.3969654860159799, + "learning_rate": 2.5695455374939147e-06, + "loss": 0.4939, + "step": 1405 + }, + { + "epoch": 0.6797196035774716, + "grad_norm": 0.4115250925249713, + "learning_rate": 2.5625627338014004e-06, + "loss": 0.5242, + "step": 1406 + }, + { + "epoch": 0.6802030456852792, + "grad_norm": 0.4253454941567133, + "learning_rate": 2.5555861603949832e-06, + "loss": 0.513, + "step": 1407 + }, + { + "epoch": 0.6806864877930868, + "grad_norm": 0.4448844424181978, + "learning_rate": 2.548615835107352e-06, + "loss": 0.5047, + "step": 1408 + }, + { + "epoch": 0.6811699299008944, + "grad_norm": 0.41222858577096244, + "learning_rate": 2.5416517757552157e-06, + "loss": 0.5286, + "step": 1409 + }, + { + "epoch": 0.6816533720087019, + "grad_norm": 0.42285086542458045, + "learning_rate": 2.534694000139273e-06, + "loss": 0.5169, + "step": 1410 + }, + { + "epoch": 0.6821368141165095, + "grad_norm": 0.4122433378845125, + "learning_rate": 2.5277425260441616e-06, + "loss": 0.515, + "step": 1411 + }, + { + "epoch": 0.6826202562243171, + "grad_norm": 0.4362061175188878, + "learning_rate": 2.520797371238406e-06, + "loss": 0.5225, + "step": 1412 + }, + { + "epoch": 0.6831036983321247, + "grad_norm": 0.4411789430289944, + "learning_rate": 2.513858553474382e-06, + "loss": 0.5191, + "step": 1413 + }, + { + "epoch": 0.6835871404399323, + "grad_norm": 0.4415744443134195, + "learning_rate": 2.506926090488269e-06, + "loss": 0.5306, + "step": 1414 + }, + { + "epoch": 0.6840705825477399, + "grad_norm": 0.4477316137829116, + "learning_rate": 2.5000000000000015e-06, + "loss": 0.5248, + "step": 1415 + }, + { + "epoch": 0.6845540246555475, + "grad_norm": 0.41099572818531255, + "learning_rate": 2.4930802997132213e-06, + "loss": 0.5218, + "step": 1416 + }, + { + "epoch": 0.6850374667633551, + "grad_norm": 0.4009913145578469, + "learning_rate": 2.486167007315243e-06, + "loss": 0.5189, + "step": 1417 + }, + { + "epoch": 0.6855209088711627, + "grad_norm": 0.43880257019064667, + "learning_rate": 2.479260140476999e-06, + "loss": 0.5114, + "step": 1418 + }, + { + "epoch": 0.6860043509789703, + "grad_norm": 0.4322007294880164, + "learning_rate": 2.4723597168529984e-06, + "loss": 0.5066, + "step": 1419 + }, + { + "epoch": 0.6864877930867779, + "grad_norm": 0.4106120224272021, + "learning_rate": 2.465465754081277e-06, + "loss": 0.4888, + "step": 1420 + }, + { + "epoch": 0.6869712351945855, + "grad_norm": 0.422067985874925, + "learning_rate": 2.458578269783364e-06, + "loss": 0.5155, + "step": 1421 + }, + { + "epoch": 0.6874546773023931, + "grad_norm": 0.4136266956566046, + "learning_rate": 2.4516972815642166e-06, + "loss": 0.5143, + "step": 1422 + }, + { + "epoch": 0.6879381194102007, + "grad_norm": 0.4335536983962682, + "learning_rate": 2.444822807012204e-06, + "loss": 0.5196, + "step": 1423 + }, + { + "epoch": 0.6884215615180083, + "grad_norm": 0.42723749184962806, + "learning_rate": 2.4379548636990343e-06, + "loss": 0.5136, + "step": 1424 + }, + { + "epoch": 0.6889050036258159, + "grad_norm": 0.4307011628135296, + "learning_rate": 2.4310934691797207e-06, + "loss": 0.5305, + "step": 1425 + }, + { + "epoch": 0.6893884457336233, + "grad_norm": 0.45161428649005025, + "learning_rate": 2.4242386409925435e-06, + "loss": 0.5048, + "step": 1426 + }, + { + "epoch": 0.6898718878414309, + "grad_norm": 0.4351186095813856, + "learning_rate": 2.4173903966589957e-06, + "loss": 0.5216, + "step": 1427 + }, + { + "epoch": 0.6903553299492385, + "grad_norm": 0.4128958039987362, + "learning_rate": 2.410548753683743e-06, + "loss": 0.5206, + "step": 1428 + }, + { + "epoch": 0.6908387720570461, + "grad_norm": 0.4185374425485222, + "learning_rate": 2.4037137295545737e-06, + "loss": 0.5205, + "step": 1429 + }, + { + "epoch": 0.6913222141648537, + "grad_norm": 0.4256083734187945, + "learning_rate": 2.396885341742361e-06, + "loss": 0.4804, + "step": 1430 + }, + { + "epoch": 0.6918056562726613, + "grad_norm": 0.411514639053229, + "learning_rate": 2.390063607701016e-06, + "loss": 0.5194, + "step": 1431 + }, + { + "epoch": 0.6922890983804689, + "grad_norm": 0.43241514860902464, + "learning_rate": 2.3832485448674407e-06, + "loss": 0.53, + "step": 1432 + }, + { + "epoch": 0.6927725404882765, + "grad_norm": 0.4291596725507727, + "learning_rate": 2.3764401706614832e-06, + "loss": 0.5144, + "step": 1433 + }, + { + "epoch": 0.6932559825960841, + "grad_norm": 0.42041788788695633, + "learning_rate": 2.369638502485897e-06, + "loss": 0.5148, + "step": 1434 + }, + { + "epoch": 0.6937394247038917, + "grad_norm": 0.4482987713314786, + "learning_rate": 2.3628435577262947e-06, + "loss": 0.5191, + "step": 1435 + }, + { + "epoch": 0.6942228668116993, + "grad_norm": 0.42573448798758273, + "learning_rate": 2.3560553537511043e-06, + "loss": 0.5021, + "step": 1436 + }, + { + "epoch": 0.6947063089195069, + "grad_norm": 0.41739963072931596, + "learning_rate": 2.3492739079115214e-06, + "loss": 0.5061, + "step": 1437 + }, + { + "epoch": 0.6951897510273145, + "grad_norm": 0.4366261411331466, + "learning_rate": 2.3424992375414655e-06, + "loss": 0.5133, + "step": 1438 + }, + { + "epoch": 0.6956731931351221, + "grad_norm": 0.42225675860612266, + "learning_rate": 2.3357313599575422e-06, + "loss": 0.5254, + "step": 1439 + }, + { + "epoch": 0.6961566352429297, + "grad_norm": 0.4347650420428982, + "learning_rate": 2.3289702924589914e-06, + "loss": 0.5143, + "step": 1440 + }, + { + "epoch": 0.6966400773507373, + "grad_norm": 0.4220266027824235, + "learning_rate": 2.3222160523276486e-06, + "loss": 0.5194, + "step": 1441 + }, + { + "epoch": 0.6971235194585449, + "grad_norm": 0.400495176856287, + "learning_rate": 2.3154686568278933e-06, + "loss": 0.5315, + "step": 1442 + }, + { + "epoch": 0.6976069615663524, + "grad_norm": 0.4149083634198192, + "learning_rate": 2.3087281232066134e-06, + "loss": 0.5109, + "step": 1443 + }, + { + "epoch": 0.69809040367416, + "grad_norm": 0.43831779922906355, + "learning_rate": 2.3019944686931554e-06, + "loss": 0.5256, + "step": 1444 + }, + { + "epoch": 0.6985738457819676, + "grad_norm": 0.4379300687242213, + "learning_rate": 2.2952677104992855e-06, + "loss": 0.5287, + "step": 1445 + }, + { + "epoch": 0.6990572878897752, + "grad_norm": 0.43973213205463885, + "learning_rate": 2.2885478658191364e-06, + "loss": 0.5192, + "step": 1446 + }, + { + "epoch": 0.6995407299975828, + "grad_norm": 0.42002084857343974, + "learning_rate": 2.281834951829174e-06, + "loss": 0.521, + "step": 1447 + }, + { + "epoch": 0.7000241721053904, + "grad_norm": 0.38595076036167364, + "learning_rate": 2.2751289856881487e-06, + "loss": 0.4869, + "step": 1448 + }, + { + "epoch": 0.700507614213198, + "grad_norm": 0.436647846778714, + "learning_rate": 2.268429984537048e-06, + "loss": 0.5216, + "step": 1449 + }, + { + "epoch": 0.7009910563210056, + "grad_norm": 0.4140253730185284, + "learning_rate": 2.2617379654990623e-06, + "loss": 0.5165, + "step": 1450 + }, + { + "epoch": 0.7014744984288132, + "grad_norm": 0.4644944125638521, + "learning_rate": 2.255052945679525e-06, + "loss": 0.5183, + "step": 1451 + }, + { + "epoch": 0.7019579405366208, + "grad_norm": 0.41536119938345195, + "learning_rate": 2.248374942165894e-06, + "loss": 0.5231, + "step": 1452 + }, + { + "epoch": 0.7024413826444283, + "grad_norm": 0.4012349549582878, + "learning_rate": 2.241703972027679e-06, + "loss": 0.5168, + "step": 1453 + }, + { + "epoch": 0.7029248247522359, + "grad_norm": 0.4521292215779327, + "learning_rate": 2.23504005231642e-06, + "loss": 0.5158, + "step": 1454 + }, + { + "epoch": 0.7034082668600435, + "grad_norm": 0.4172271643387044, + "learning_rate": 2.2283832000656304e-06, + "loss": 0.4941, + "step": 1455 + }, + { + "epoch": 0.7038917089678511, + "grad_norm": 0.421958406666486, + "learning_rate": 2.221733432290762e-06, + "loss": 0.5209, + "step": 1456 + }, + { + "epoch": 0.7043751510756587, + "grad_norm": 0.42224698163781604, + "learning_rate": 2.2150907659891566e-06, + "loss": 0.5173, + "step": 1457 + }, + { + "epoch": 0.7048585931834663, + "grad_norm": 0.43523243642666853, + "learning_rate": 2.2084552181400087e-06, + "loss": 0.5186, + "step": 1458 + }, + { + "epoch": 0.7053420352912738, + "grad_norm": 0.4437233504227722, + "learning_rate": 2.201826805704308e-06, + "loss": 0.5125, + "step": 1459 + }, + { + "epoch": 0.7058254773990814, + "grad_norm": 0.42532048824174346, + "learning_rate": 2.195205545624813e-06, + "loss": 0.5243, + "step": 1460 + }, + { + "epoch": 0.706308919506889, + "grad_norm": 0.4322950043512432, + "learning_rate": 2.188591454826e-06, + "loss": 0.5135, + "step": 1461 + }, + { + "epoch": 0.7067923616146966, + "grad_norm": 0.4272575345234204, + "learning_rate": 2.181984550214015e-06, + "loss": 0.5116, + "step": 1462 + }, + { + "epoch": 0.7072758037225042, + "grad_norm": 0.41921770884395154, + "learning_rate": 2.175384848676639e-06, + "loss": 0.5165, + "step": 1463 + }, + { + "epoch": 0.7077592458303118, + "grad_norm": 0.43176187181049736, + "learning_rate": 2.168792367083243e-06, + "loss": 0.5138, + "step": 1464 + }, + { + "epoch": 0.7082426879381194, + "grad_norm": 0.41695232513283254, + "learning_rate": 2.162207122284742e-06, + "loss": 0.5091, + "step": 1465 + }, + { + "epoch": 0.708726130045927, + "grad_norm": 0.41339935320490057, + "learning_rate": 2.155629131113549e-06, + "loss": 0.5158, + "step": 1466 + }, + { + "epoch": 0.7092095721537346, + "grad_norm": 0.40689486411834114, + "learning_rate": 2.1490584103835433e-06, + "loss": 0.4847, + "step": 1467 + }, + { + "epoch": 0.7096930142615422, + "grad_norm": 0.417060588337446, + "learning_rate": 2.142494976890011e-06, + "loss": 0.5241, + "step": 1468 + }, + { + "epoch": 0.7101764563693498, + "grad_norm": 0.4289677663647557, + "learning_rate": 2.135938847409625e-06, + "loss": 0.5206, + "step": 1469 + }, + { + "epoch": 0.7106598984771574, + "grad_norm": 0.43410470718447147, + "learning_rate": 2.1293900387003742e-06, + "loss": 0.4931, + "step": 1470 + }, + { + "epoch": 0.711143340584965, + "grad_norm": 0.42958196993128944, + "learning_rate": 2.1228485675015455e-06, + "loss": 0.5204, + "step": 1471 + }, + { + "epoch": 0.7116267826927726, + "grad_norm": 0.4311771692424152, + "learning_rate": 2.1163144505336634e-06, + "loss": 0.5219, + "step": 1472 + }, + { + "epoch": 0.7121102248005802, + "grad_norm": 0.4150104118521869, + "learning_rate": 2.109787704498459e-06, + "loss": 0.519, + "step": 1473 + }, + { + "epoch": 0.7125936669083878, + "grad_norm": 0.43013467795196153, + "learning_rate": 2.1032683460788223e-06, + "loss": 0.4979, + "step": 1474 + }, + { + "epoch": 0.7130771090161954, + "grad_norm": 0.4303795815833922, + "learning_rate": 2.0967563919387563e-06, + "loss": 0.5256, + "step": 1475 + }, + { + "epoch": 0.7135605511240029, + "grad_norm": 0.4386538663824397, + "learning_rate": 2.0902518587233418e-06, + "loss": 0.5195, + "step": 1476 + }, + { + "epoch": 0.7140439932318104, + "grad_norm": 0.41141211228553354, + "learning_rate": 2.08375476305869e-06, + "loss": 0.5238, + "step": 1477 + }, + { + "epoch": 0.714527435339618, + "grad_norm": 0.3832973623968104, + "learning_rate": 2.077265121551903e-06, + "loss": 0.4914, + "step": 1478 + }, + { + "epoch": 0.7150108774474256, + "grad_norm": 0.4396380345403612, + "learning_rate": 2.0707829507910237e-06, + "loss": 0.5224, + "step": 1479 + }, + { + "epoch": 0.7154943195552332, + "grad_norm": 0.4084969868928133, + "learning_rate": 2.0643082673450053e-06, + "loss": 0.5214, + "step": 1480 + }, + { + "epoch": 0.7159777616630408, + "grad_norm": 0.41940449704789057, + "learning_rate": 2.05784108776366e-06, + "loss": 0.5098, + "step": 1481 + }, + { + "epoch": 0.7164612037708484, + "grad_norm": 0.4368606150106444, + "learning_rate": 2.051381428577622e-06, + "loss": 0.5213, + "step": 1482 + }, + { + "epoch": 0.716944645878656, + "grad_norm": 0.4475169176125263, + "learning_rate": 2.044929306298298e-06, + "loss": 0.5169, + "step": 1483 + }, + { + "epoch": 0.7174280879864636, + "grad_norm": 0.4192404761939798, + "learning_rate": 2.0384847374178346e-06, + "loss": 0.5214, + "step": 1484 + }, + { + "epoch": 0.7179115300942712, + "grad_norm": 0.4000794067095613, + "learning_rate": 2.0320477384090665e-06, + "loss": 0.5002, + "step": 1485 + }, + { + "epoch": 0.7183949722020788, + "grad_norm": 0.4083964682274076, + "learning_rate": 2.0256183257254837e-06, + "loss": 0.5057, + "step": 1486 + }, + { + "epoch": 0.7188784143098864, + "grad_norm": 0.4286205023949667, + "learning_rate": 2.0191965158011854e-06, + "loss": 0.4815, + "step": 1487 + }, + { + "epoch": 0.719361856417694, + "grad_norm": 0.40907099979637535, + "learning_rate": 2.012782325050831e-06, + "loss": 0.5283, + "step": 1488 + }, + { + "epoch": 0.7198452985255016, + "grad_norm": 0.41946463733283473, + "learning_rate": 2.006375769869611e-06, + "loss": 0.522, + "step": 1489 + }, + { + "epoch": 0.7203287406333092, + "grad_norm": 0.4222854300641897, + "learning_rate": 1.9999768666331974e-06, + "loss": 0.5132, + "step": 1490 + }, + { + "epoch": 0.7208121827411168, + "grad_norm": 0.3830302288103666, + "learning_rate": 1.9935856316977044e-06, + "loss": 0.4938, + "step": 1491 + }, + { + "epoch": 0.7212956248489243, + "grad_norm": 0.47757660690611003, + "learning_rate": 1.987202081399639e-06, + "loss": 0.5251, + "step": 1492 + }, + { + "epoch": 0.7217790669567319, + "grad_norm": 0.3992903621119011, + "learning_rate": 1.9808262320558724e-06, + "loss": 0.506, + "step": 1493 + }, + { + "epoch": 0.7222625090645395, + "grad_norm": 0.41142424465140587, + "learning_rate": 1.9744580999635902e-06, + "loss": 0.5143, + "step": 1494 + }, + { + "epoch": 0.7227459511723471, + "grad_norm": 0.4124129943865437, + "learning_rate": 1.968097701400252e-06, + "loss": 0.5245, + "step": 1495 + }, + { + "epoch": 0.7232293932801547, + "grad_norm": 0.4312737875038871, + "learning_rate": 1.9617450526235464e-06, + "loss": 0.5178, + "step": 1496 + }, + { + "epoch": 0.7237128353879623, + "grad_norm": 0.43509903197162936, + "learning_rate": 1.9554001698713572e-06, + "loss": 0.5131, + "step": 1497 + }, + { + "epoch": 0.7241962774957699, + "grad_norm": 0.4260008705271214, + "learning_rate": 1.949063069361717e-06, + "loss": 0.5136, + "step": 1498 + }, + { + "epoch": 0.7246797196035775, + "grad_norm": 0.42356802738060345, + "learning_rate": 1.9427337672927632e-06, + "loss": 0.5146, + "step": 1499 + }, + { + "epoch": 0.7251631617113851, + "grad_norm": 0.4027997963462275, + "learning_rate": 1.936412279842705e-06, + "loss": 0.4913, + "step": 1500 + }, + { + "epoch": 0.7256466038191927, + "grad_norm": 0.4124397793510055, + "learning_rate": 1.9300986231697705e-06, + "loss": 0.5175, + "step": 1501 + }, + { + "epoch": 0.7261300459270003, + "grad_norm": 0.4442811918906246, + "learning_rate": 1.9237928134121757e-06, + "loss": 0.516, + "step": 1502 + }, + { + "epoch": 0.7266134880348079, + "grad_norm": 0.4393627100062481, + "learning_rate": 1.9174948666880805e-06, + "loss": 0.5155, + "step": 1503 + }, + { + "epoch": 0.7270969301426154, + "grad_norm": 0.43133527501756386, + "learning_rate": 1.9112047990955446e-06, + "loss": 0.5136, + "step": 1504 + }, + { + "epoch": 0.727580372250423, + "grad_norm": 0.45322135855021595, + "learning_rate": 1.9049226267124844e-06, + "loss": 0.5172, + "step": 1505 + }, + { + "epoch": 0.7280638143582306, + "grad_norm": 0.41078461158260915, + "learning_rate": 1.8986483655966408e-06, + "loss": 0.5179, + "step": 1506 + }, + { + "epoch": 0.7285472564660382, + "grad_norm": 0.4178604053793329, + "learning_rate": 1.8923820317855307e-06, + "loss": 0.5076, + "step": 1507 + }, + { + "epoch": 0.7290306985738458, + "grad_norm": 0.42623268157040256, + "learning_rate": 1.8861236412964106e-06, + "loss": 0.5172, + "step": 1508 + }, + { + "epoch": 0.7295141406816533, + "grad_norm": 0.42835046843347674, + "learning_rate": 1.879873210126229e-06, + "loss": 0.5259, + "step": 1509 + }, + { + "epoch": 0.7299975827894609, + "grad_norm": 0.4196504177616674, + "learning_rate": 1.873630754251588e-06, + "loss": 0.5177, + "step": 1510 + }, + { + "epoch": 0.7304810248972685, + "grad_norm": 0.4079644120305993, + "learning_rate": 1.8673962896287152e-06, + "loss": 0.5201, + "step": 1511 + }, + { + "epoch": 0.7309644670050761, + "grad_norm": 0.43892341061011425, + "learning_rate": 1.8611698321933991e-06, + "loss": 0.5186, + "step": 1512 + }, + { + "epoch": 0.7314479091128837, + "grad_norm": 0.42683430911112086, + "learning_rate": 1.8549513978609707e-06, + "loss": 0.5111, + "step": 1513 + }, + { + "epoch": 0.7319313512206913, + "grad_norm": 0.41062878136002484, + "learning_rate": 1.8487410025262436e-06, + "loss": 0.5103, + "step": 1514 + }, + { + "epoch": 0.7324147933284989, + "grad_norm": 0.4256013874707191, + "learning_rate": 1.8425386620634961e-06, + "loss": 0.5167, + "step": 1515 + }, + { + "epoch": 0.7328982354363065, + "grad_norm": 0.4388797350675763, + "learning_rate": 1.8363443923264046e-06, + "loss": 0.5125, + "step": 1516 + }, + { + "epoch": 0.7333816775441141, + "grad_norm": 0.4394233254146738, + "learning_rate": 1.8301582091480264e-06, + "loss": 0.5217, + "step": 1517 + }, + { + "epoch": 0.7338651196519217, + "grad_norm": 0.41564422037394944, + "learning_rate": 1.8239801283407393e-06, + "loss": 0.5164, + "step": 1518 + }, + { + "epoch": 0.7343485617597293, + "grad_norm": 0.4173422643681329, + "learning_rate": 1.8178101656962188e-06, + "loss": 0.5205, + "step": 1519 + }, + { + "epoch": 0.7348320038675369, + "grad_norm": 0.39698118648442665, + "learning_rate": 1.8116483369853853e-06, + "loss": 0.4835, + "step": 1520 + }, + { + "epoch": 0.7353154459753445, + "grad_norm": 0.42300362992419904, + "learning_rate": 1.8054946579583732e-06, + "loss": 0.5143, + "step": 1521 + }, + { + "epoch": 0.7357988880831521, + "grad_norm": 0.42464469919772974, + "learning_rate": 1.7993491443444771e-06, + "loss": 0.5129, + "step": 1522 + }, + { + "epoch": 0.7362823301909597, + "grad_norm": 0.4501988280108448, + "learning_rate": 1.7932118118521274e-06, + "loss": 0.5131, + "step": 1523 + }, + { + "epoch": 0.7367657722987673, + "grad_norm": 0.41493548901611477, + "learning_rate": 1.787082676168842e-06, + "loss": 0.5268, + "step": 1524 + }, + { + "epoch": 0.7372492144065748, + "grad_norm": 0.4436917707906808, + "learning_rate": 1.7809617529611828e-06, + "loss": 0.5126, + "step": 1525 + }, + { + "epoch": 0.7377326565143824, + "grad_norm": 0.39767655781448813, + "learning_rate": 1.7748490578747257e-06, + "loss": 0.4945, + "step": 1526 + }, + { + "epoch": 0.73821609862219, + "grad_norm": 0.4281607415979641, + "learning_rate": 1.7687446065340074e-06, + "loss": 0.5189, + "step": 1527 + }, + { + "epoch": 0.7386995407299976, + "grad_norm": 0.4123906023331037, + "learning_rate": 1.7626484145425038e-06, + "loss": 0.5117, + "step": 1528 + }, + { + "epoch": 0.7391829828378051, + "grad_norm": 0.39861909677156787, + "learning_rate": 1.7565604974825678e-06, + "loss": 0.4917, + "step": 1529 + }, + { + "epoch": 0.7396664249456127, + "grad_norm": 0.4164290248459804, + "learning_rate": 1.7504808709154104e-06, + "loss": 0.5187, + "step": 1530 + }, + { + "epoch": 0.7401498670534203, + "grad_norm": 0.4149617264710624, + "learning_rate": 1.744409550381041e-06, + "loss": 0.529, + "step": 1531 + }, + { + "epoch": 0.7406333091612279, + "grad_norm": 0.402995768205116, + "learning_rate": 1.7383465513982517e-06, + "loss": 0.4906, + "step": 1532 + }, + { + "epoch": 0.7411167512690355, + "grad_norm": 0.4357911248878148, + "learning_rate": 1.7322918894645525e-06, + "loss": 0.5209, + "step": 1533 + }, + { + "epoch": 0.7416001933768431, + "grad_norm": 0.4310636351470309, + "learning_rate": 1.7262455800561456e-06, + "loss": 0.529, + "step": 1534 + }, + { + "epoch": 0.7420836354846507, + "grad_norm": 0.40110062198063573, + "learning_rate": 1.7202076386278876e-06, + "loss": 0.5218, + "step": 1535 + }, + { + "epoch": 0.7425670775924583, + "grad_norm": 0.4044655145984996, + "learning_rate": 1.7141780806132429e-06, + "loss": 0.5038, + "step": 1536 + }, + { + "epoch": 0.7430505197002659, + "grad_norm": 0.4169687562172726, + "learning_rate": 1.70815692142425e-06, + "loss": 0.5094, + "step": 1537 + }, + { + "epoch": 0.7435339618080735, + "grad_norm": 0.3892005945860465, + "learning_rate": 1.702144176451473e-06, + "loss": 0.4909, + "step": 1538 + }, + { + "epoch": 0.7440174039158811, + "grad_norm": 0.4059894671987348, + "learning_rate": 1.696139861063974e-06, + "loss": 0.5231, + "step": 1539 + }, + { + "epoch": 0.7445008460236887, + "grad_norm": 0.4235285224343199, + "learning_rate": 1.690143990609268e-06, + "loss": 0.5116, + "step": 1540 + }, + { + "epoch": 0.7449842881314963, + "grad_norm": 0.4066059462995061, + "learning_rate": 1.6841565804132843e-06, + "loss": 0.5159, + "step": 1541 + }, + { + "epoch": 0.7454677302393038, + "grad_norm": 0.41374792014057904, + "learning_rate": 1.6781776457803227e-06, + "loss": 0.5146, + "step": 1542 + }, + { + "epoch": 0.7459511723471114, + "grad_norm": 0.41330516594974576, + "learning_rate": 1.6722072019930242e-06, + "loss": 0.4841, + "step": 1543 + }, + { + "epoch": 0.746434614454919, + "grad_norm": 0.4342078760633199, + "learning_rate": 1.6662452643123234e-06, + "loss": 0.5181, + "step": 1544 + }, + { + "epoch": 0.7469180565627266, + "grad_norm": 0.4366803318877013, + "learning_rate": 1.660291847977415e-06, + "loss": 0.5056, + "step": 1545 + }, + { + "epoch": 0.7474014986705342, + "grad_norm": 0.4107968782550443, + "learning_rate": 1.6543469682057105e-06, + "loss": 0.5102, + "step": 1546 + }, + { + "epoch": 0.7478849407783418, + "grad_norm": 0.43703346533243426, + "learning_rate": 1.6484106401927991e-06, + "loss": 0.517, + "step": 1547 + }, + { + "epoch": 0.7483683828861494, + "grad_norm": 0.4185149815126949, + "learning_rate": 1.6424828791124159e-06, + "loss": 0.5162, + "step": 1548 + }, + { + "epoch": 0.748851824993957, + "grad_norm": 0.3941815905233016, + "learning_rate": 1.6365637001163958e-06, + "loss": 0.4694, + "step": 1549 + }, + { + "epoch": 0.7493352671017646, + "grad_norm": 0.4069386532862478, + "learning_rate": 1.6306531183346387e-06, + "loss": 0.5172, + "step": 1550 + }, + { + "epoch": 0.7498187092095722, + "grad_norm": 0.44449597102378385, + "learning_rate": 1.624751148875065e-06, + "loss": 0.5227, + "step": 1551 + }, + { + "epoch": 0.7503021513173798, + "grad_norm": 0.4200070436877298, + "learning_rate": 1.6188578068235855e-06, + "loss": 0.5227, + "step": 1552 + }, + { + "epoch": 0.7507855934251874, + "grad_norm": 0.4134676341568954, + "learning_rate": 1.6129731072440586e-06, + "loss": 0.5197, + "step": 1553 + }, + { + "epoch": 0.751269035532995, + "grad_norm": 0.4342416540931307, + "learning_rate": 1.6070970651782514e-06, + "loss": 0.5234, + "step": 1554 + }, + { + "epoch": 0.7517524776408026, + "grad_norm": 0.4621699665968105, + "learning_rate": 1.6012296956457972e-06, + "loss": 0.5224, + "step": 1555 + }, + { + "epoch": 0.7522359197486101, + "grad_norm": 0.39794619123328484, + "learning_rate": 1.5953710136441685e-06, + "loss": 0.5222, + "step": 1556 + }, + { + "epoch": 0.7527193618564177, + "grad_norm": 0.39795969856270086, + "learning_rate": 1.5895210341486279e-06, + "loss": 0.4697, + "step": 1557 + }, + { + "epoch": 0.7532028039642252, + "grad_norm": 0.4348573897259895, + "learning_rate": 1.583679772112196e-06, + "loss": 0.5256, + "step": 1558 + }, + { + "epoch": 0.7536862460720328, + "grad_norm": 0.4108494121358044, + "learning_rate": 1.5778472424656083e-06, + "loss": 0.5185, + "step": 1559 + }, + { + "epoch": 0.7541696881798404, + "grad_norm": 0.41224584403564757, + "learning_rate": 1.5720234601172767e-06, + "loss": 0.5203, + "step": 1560 + }, + { + "epoch": 0.754653130287648, + "grad_norm": 0.4348874788487397, + "learning_rate": 1.566208439953265e-06, + "loss": 0.5189, + "step": 1561 + }, + { + "epoch": 0.7551365723954556, + "grad_norm": 0.42842919833727694, + "learning_rate": 1.5604021968372286e-06, + "loss": 0.5111, + "step": 1562 + }, + { + "epoch": 0.7556200145032632, + "grad_norm": 0.43772492324957596, + "learning_rate": 1.5546047456103964e-06, + "loss": 0.5147, + "step": 1563 + }, + { + "epoch": 0.7561034566110708, + "grad_norm": 0.41431446343362865, + "learning_rate": 1.548816101091517e-06, + "loss": 0.5149, + "step": 1564 + }, + { + "epoch": 0.7565868987188784, + "grad_norm": 0.40777837421338714, + "learning_rate": 1.5430362780768343e-06, + "loss": 0.5117, + "step": 1565 + }, + { + "epoch": 0.757070340826686, + "grad_norm": 0.4454487846070906, + "learning_rate": 1.537265291340042e-06, + "loss": 0.5074, + "step": 1566 + }, + { + "epoch": 0.7575537829344936, + "grad_norm": 0.46396843002779686, + "learning_rate": 1.531503155632249e-06, + "loss": 0.5223, + "step": 1567 + }, + { + "epoch": 0.7580372250423012, + "grad_norm": 0.41741600165011983, + "learning_rate": 1.5257498856819353e-06, + "loss": 0.5158, + "step": 1568 + }, + { + "epoch": 0.7585206671501088, + "grad_norm": 0.4059061868499258, + "learning_rate": 1.5200054961949233e-06, + "loss": 0.5049, + "step": 1569 + }, + { + "epoch": 0.7590041092579164, + "grad_norm": 0.41330390270516437, + "learning_rate": 1.5142700018543382e-06, + "loss": 0.5305, + "step": 1570 + }, + { + "epoch": 0.759487551365724, + "grad_norm": 0.43099056056318497, + "learning_rate": 1.508543417320562e-06, + "loss": 0.5212, + "step": 1571 + }, + { + "epoch": 0.7599709934735316, + "grad_norm": 0.39882553101049034, + "learning_rate": 1.5028257572312105e-06, + "loss": 0.4883, + "step": 1572 + }, + { + "epoch": 0.7604544355813392, + "grad_norm": 0.4581685557000849, + "learning_rate": 1.4971170362010774e-06, + "loss": 0.5225, + "step": 1573 + }, + { + "epoch": 0.7609378776891468, + "grad_norm": 0.4428964310587446, + "learning_rate": 1.4914172688221213e-06, + "loss": 0.5195, + "step": 1574 + }, + { + "epoch": 0.7614213197969543, + "grad_norm": 0.4170791170307987, + "learning_rate": 1.485726469663401e-06, + "loss": 0.5294, + "step": 1575 + }, + { + "epoch": 0.7619047619047619, + "grad_norm": 0.4212168944035229, + "learning_rate": 1.4800446532710627e-06, + "loss": 0.5143, + "step": 1576 + }, + { + "epoch": 0.7623882040125695, + "grad_norm": 0.4317778496296824, + "learning_rate": 1.4743718341682806e-06, + "loss": 0.5242, + "step": 1577 + }, + { + "epoch": 0.7628716461203771, + "grad_norm": 0.3887549768642727, + "learning_rate": 1.468708026855245e-06, + "loss": 0.4927, + "step": 1578 + }, + { + "epoch": 0.7633550882281847, + "grad_norm": 0.41991973562573803, + "learning_rate": 1.463053245809099e-06, + "loss": 0.5248, + "step": 1579 + }, + { + "epoch": 0.7638385303359922, + "grad_norm": 0.41267795471721197, + "learning_rate": 1.457407505483921e-06, + "loss": 0.5187, + "step": 1580 + }, + { + "epoch": 0.7643219724437998, + "grad_norm": 0.44716407911896383, + "learning_rate": 1.4517708203106763e-06, + "loss": 0.523, + "step": 1581 + }, + { + "epoch": 0.7648054145516074, + "grad_norm": 0.4254440302923612, + "learning_rate": 1.446143204697187e-06, + "loss": 0.5233, + "step": 1582 + }, + { + "epoch": 0.765288856659415, + "grad_norm": 0.39996785018921494, + "learning_rate": 1.4405246730280946e-06, + "loss": 0.5172, + "step": 1583 + }, + { + "epoch": 0.7657722987672226, + "grad_norm": 0.443369622770567, + "learning_rate": 1.4349152396648153e-06, + "loss": 0.5183, + "step": 1584 + }, + { + "epoch": 0.7662557408750302, + "grad_norm": 0.40505843584897416, + "learning_rate": 1.4293149189455146e-06, + "loss": 0.5161, + "step": 1585 + }, + { + "epoch": 0.7667391829828378, + "grad_norm": 0.4077704595280849, + "learning_rate": 1.4237237251850634e-06, + "loss": 0.5107, + "step": 1586 + }, + { + "epoch": 0.7672226250906454, + "grad_norm": 0.40791039312028615, + "learning_rate": 1.4181416726750052e-06, + "loss": 0.5146, + "step": 1587 + }, + { + "epoch": 0.767706067198453, + "grad_norm": 0.41705043398231784, + "learning_rate": 1.4125687756835132e-06, + "loss": 0.4812, + "step": 1588 + }, + { + "epoch": 0.7681895093062606, + "grad_norm": 0.4235182346193989, + "learning_rate": 1.4070050484553644e-06, + "loss": 0.5129, + "step": 1589 + }, + { + "epoch": 0.7686729514140682, + "grad_norm": 0.414137655909364, + "learning_rate": 1.4014505052118893e-06, + "loss": 0.5236, + "step": 1590 + }, + { + "epoch": 0.7691563935218757, + "grad_norm": 0.43611300077847176, + "learning_rate": 1.3959051601509537e-06, + "loss": 0.5345, + "step": 1591 + }, + { + "epoch": 0.7696398356296833, + "grad_norm": 0.410845648388898, + "learning_rate": 1.3903690274469029e-06, + "loss": 0.5115, + "step": 1592 + }, + { + "epoch": 0.7701232777374909, + "grad_norm": 0.3961083948871449, + "learning_rate": 1.3848421212505404e-06, + "loss": 0.5168, + "step": 1593 + }, + { + "epoch": 0.7706067198452985, + "grad_norm": 0.42179325369386034, + "learning_rate": 1.37932445568908e-06, + "loss": 0.5125, + "step": 1594 + }, + { + "epoch": 0.7710901619531061, + "grad_norm": 0.4213217250215216, + "learning_rate": 1.3738160448661253e-06, + "loss": 0.5267, + "step": 1595 + }, + { + "epoch": 0.7715736040609137, + "grad_norm": 0.4143253090473424, + "learning_rate": 1.3683169028616155e-06, + "loss": 0.5178, + "step": 1596 + }, + { + "epoch": 0.7720570461687213, + "grad_norm": 0.4171850827541685, + "learning_rate": 1.3628270437317993e-06, + "loss": 0.5211, + "step": 1597 + }, + { + "epoch": 0.7725404882765289, + "grad_norm": 0.39565458081679644, + "learning_rate": 1.3573464815092003e-06, + "loss": 0.5055, + "step": 1598 + }, + { + "epoch": 0.7730239303843365, + "grad_norm": 0.4271922188091497, + "learning_rate": 1.3518752302025773e-06, + "loss": 0.5279, + "step": 1599 + }, + { + "epoch": 0.7735073724921441, + "grad_norm": 0.4151739224827406, + "learning_rate": 1.3464133037968914e-06, + "loss": 0.5239, + "step": 1600 + }, + { + "epoch": 0.7739908145999517, + "grad_norm": 0.3960683162461613, + "learning_rate": 1.3409607162532628e-06, + "loss": 0.4987, + "step": 1601 + }, + { + "epoch": 0.7744742567077593, + "grad_norm": 0.43044333694614223, + "learning_rate": 1.3355174815089477e-06, + "loss": 0.5273, + "step": 1602 + }, + { + "epoch": 0.7749576988155669, + "grad_norm": 0.4121649380386113, + "learning_rate": 1.3300836134772916e-06, + "loss": 0.5162, + "step": 1603 + }, + { + "epoch": 0.7754411409233745, + "grad_norm": 0.4005354058641754, + "learning_rate": 1.3246591260477015e-06, + "loss": 0.5167, + "step": 1604 + }, + { + "epoch": 0.7759245830311821, + "grad_norm": 0.3951020817933521, + "learning_rate": 1.3192440330856005e-06, + "loss": 0.5251, + "step": 1605 + }, + { + "epoch": 0.7764080251389897, + "grad_norm": 0.42611917105831465, + "learning_rate": 1.3138383484324063e-06, + "loss": 0.5252, + "step": 1606 + }, + { + "epoch": 0.7768914672467973, + "grad_norm": 0.40098636118444037, + "learning_rate": 1.308442085905482e-06, + "loss": 0.5101, + "step": 1607 + }, + { + "epoch": 0.7773749093546047, + "grad_norm": 0.4404415072756006, + "learning_rate": 1.30305525929811e-06, + "loss": 0.5224, + "step": 1608 + }, + { + "epoch": 0.7778583514624123, + "grad_norm": 0.40390400609014704, + "learning_rate": 1.297677882379455e-06, + "loss": 0.5191, + "step": 1609 + }, + { + "epoch": 0.7783417935702199, + "grad_norm": 0.43645719023114843, + "learning_rate": 1.2923099688945234e-06, + "loss": 0.5096, + "step": 1610 + }, + { + "epoch": 0.7788252356780275, + "grad_norm": 0.401799031041578, + "learning_rate": 1.2869515325641357e-06, + "loss": 0.4812, + "step": 1611 + }, + { + "epoch": 0.7793086777858351, + "grad_norm": 0.40544675897829047, + "learning_rate": 1.281602587084887e-06, + "loss": 0.5211, + "step": 1612 + }, + { + "epoch": 0.7797921198936427, + "grad_norm": 0.4166351291750946, + "learning_rate": 1.2762631461291148e-06, + "loss": 0.5294, + "step": 1613 + }, + { + "epoch": 0.7802755620014503, + "grad_norm": 0.4334981607396633, + "learning_rate": 1.2709332233448573e-06, + "loss": 0.5096, + "step": 1614 + }, + { + "epoch": 0.7807590041092579, + "grad_norm": 0.437984950036233, + "learning_rate": 1.2656128323558286e-06, + "loss": 0.5135, + "step": 1615 + }, + { + "epoch": 0.7812424462170655, + "grad_norm": 0.41467240914944964, + "learning_rate": 1.2603019867613764e-06, + "loss": 0.5162, + "step": 1616 + }, + { + "epoch": 0.7817258883248731, + "grad_norm": 0.40797210573439474, + "learning_rate": 1.2550007001364518e-06, + "loss": 0.5064, + "step": 1617 + }, + { + "epoch": 0.7822093304326807, + "grad_norm": 0.40625079236189654, + "learning_rate": 1.2497089860315675e-06, + "loss": 0.5057, + "step": 1618 + }, + { + "epoch": 0.7826927725404883, + "grad_norm": 0.3973135238618207, + "learning_rate": 1.244426857972773e-06, + "loss": 0.5125, + "step": 1619 + }, + { + "epoch": 0.7831762146482959, + "grad_norm": 0.41758654400468537, + "learning_rate": 1.239154329461615e-06, + "loss": 0.5146, + "step": 1620 + }, + { + "epoch": 0.7836596567561035, + "grad_norm": 0.4546571879884002, + "learning_rate": 1.233891413975098e-06, + "loss": 0.5138, + "step": 1621 + }, + { + "epoch": 0.7841430988639111, + "grad_norm": 0.4501304501527847, + "learning_rate": 1.228638124965661e-06, + "loss": 0.5111, + "step": 1622 + }, + { + "epoch": 0.7846265409717187, + "grad_norm": 0.40173574952002505, + "learning_rate": 1.223394475861131e-06, + "loss": 0.5134, + "step": 1623 + }, + { + "epoch": 0.7851099830795262, + "grad_norm": 0.4105768174048188, + "learning_rate": 1.2181604800646996e-06, + "loss": 0.5092, + "step": 1624 + }, + { + "epoch": 0.7855934251873338, + "grad_norm": 0.39390517153871624, + "learning_rate": 1.212936150954882e-06, + "loss": 0.498, + "step": 1625 + }, + { + "epoch": 0.7860768672951414, + "grad_norm": 0.41453725871465896, + "learning_rate": 1.207721501885486e-06, + "loss": 0.5063, + "step": 1626 + }, + { + "epoch": 0.786560309402949, + "grad_norm": 0.44249465126635484, + "learning_rate": 1.2025165461855714e-06, + "loss": 0.5212, + "step": 1627 + }, + { + "epoch": 0.7870437515107566, + "grad_norm": 0.4079816768267276, + "learning_rate": 1.1973212971594262e-06, + "loss": 0.5155, + "step": 1628 + }, + { + "epoch": 0.7875271936185642, + "grad_norm": 0.4318458945961838, + "learning_rate": 1.1921357680865258e-06, + "loss": 0.5183, + "step": 1629 + }, + { + "epoch": 0.7880106357263718, + "grad_norm": 0.40656198305401237, + "learning_rate": 1.1869599722215013e-06, + "loss": 0.4949, + "step": 1630 + }, + { + "epoch": 0.7884940778341794, + "grad_norm": 0.4056814293942294, + "learning_rate": 1.181793922794102e-06, + "loss": 0.5206, + "step": 1631 + }, + { + "epoch": 0.788977519941987, + "grad_norm": 0.42895763169120843, + "learning_rate": 1.1766376330091684e-06, + "loss": 0.503, + "step": 1632 + }, + { + "epoch": 0.7894609620497945, + "grad_norm": 0.4165970675717556, + "learning_rate": 1.1714911160465924e-06, + "loss": 0.5255, + "step": 1633 + }, + { + "epoch": 0.7899444041576021, + "grad_norm": 0.4123917311937627, + "learning_rate": 1.1663543850612847e-06, + "loss": 0.5169, + "step": 1634 + }, + { + "epoch": 0.7904278462654097, + "grad_norm": 0.41612583641837364, + "learning_rate": 1.1612274531831463e-06, + "loss": 0.4938, + "step": 1635 + }, + { + "epoch": 0.7909112883732173, + "grad_norm": 0.40728900719245686, + "learning_rate": 1.1561103335170242e-06, + "loss": 0.5222, + "step": 1636 + }, + { + "epoch": 0.7913947304810249, + "grad_norm": 0.4348645075910405, + "learning_rate": 1.1510030391426941e-06, + "loss": 0.5192, + "step": 1637 + }, + { + "epoch": 0.7918781725888325, + "grad_norm": 0.4086546804175218, + "learning_rate": 1.1459055831148074e-06, + "loss": 0.5232, + "step": 1638 + }, + { + "epoch": 0.7923616146966401, + "grad_norm": 0.40880965205946446, + "learning_rate": 1.140817978462876e-06, + "loss": 0.5212, + "step": 1639 + }, + { + "epoch": 0.7928450568044476, + "grad_norm": 0.3893016631161895, + "learning_rate": 1.1357402381912224e-06, + "loss": 0.4873, + "step": 1640 + }, + { + "epoch": 0.7933284989122552, + "grad_norm": 0.4215992969510908, + "learning_rate": 1.1306723752789672e-06, + "loss": 0.5211, + "step": 1641 + }, + { + "epoch": 0.7938119410200628, + "grad_norm": 0.420615559845491, + "learning_rate": 1.1256144026799703e-06, + "loss": 0.5179, + "step": 1642 + }, + { + "epoch": 0.7942953831278704, + "grad_norm": 0.39236133338098145, + "learning_rate": 1.1205663333228217e-06, + "loss": 0.4911, + "step": 1643 + }, + { + "epoch": 0.794778825235678, + "grad_norm": 0.4158254754636244, + "learning_rate": 1.1155281801107897e-06, + "loss": 0.5146, + "step": 1644 + }, + { + "epoch": 0.7952622673434856, + "grad_norm": 0.4092049660763265, + "learning_rate": 1.1104999559218022e-06, + "loss": 0.5063, + "step": 1645 + }, + { + "epoch": 0.7957457094512932, + "grad_norm": 0.43121118572534733, + "learning_rate": 1.1054816736084057e-06, + "loss": 0.5122, + "step": 1646 + }, + { + "epoch": 0.7962291515591008, + "grad_norm": 0.40574034047521074, + "learning_rate": 1.1004733459977325e-06, + "loss": 0.5089, + "step": 1647 + }, + { + "epoch": 0.7967125936669084, + "grad_norm": 0.4343773778355907, + "learning_rate": 1.0954749858914727e-06, + "loss": 0.5177, + "step": 1648 + }, + { + "epoch": 0.797196035774716, + "grad_norm": 0.429877165339691, + "learning_rate": 1.0904866060658376e-06, + "loss": 0.5211, + "step": 1649 + }, + { + "epoch": 0.7976794778825236, + "grad_norm": 0.4108995062804379, + "learning_rate": 1.0855082192715294e-06, + "loss": 0.5174, + "step": 1650 + }, + { + "epoch": 0.7981629199903312, + "grad_norm": 0.4018616150052113, + "learning_rate": 1.0805398382337035e-06, + "loss": 0.5049, + "step": 1651 + }, + { + "epoch": 0.7986463620981388, + "grad_norm": 0.4089174910335269, + "learning_rate": 1.0755814756519445e-06, + "loss": 0.5226, + "step": 1652 + }, + { + "epoch": 0.7991298042059464, + "grad_norm": 0.3964537076582955, + "learning_rate": 1.0706331442002226e-06, + "loss": 0.5095, + "step": 1653 + }, + { + "epoch": 0.799613246313754, + "grad_norm": 0.4267767025207229, + "learning_rate": 1.0656948565268782e-06, + "loss": 0.5168, + "step": 1654 + }, + { + "epoch": 0.8000966884215616, + "grad_norm": 0.41363796984886936, + "learning_rate": 1.0607666252545673e-06, + "loss": 0.5128, + "step": 1655 + }, + { + "epoch": 0.8005801305293692, + "grad_norm": 0.43264146945425214, + "learning_rate": 1.0558484629802502e-06, + "loss": 0.514, + "step": 1656 + }, + { + "epoch": 0.8010635726371766, + "grad_norm": 0.42544390140386235, + "learning_rate": 1.0509403822751425e-06, + "loss": 0.512, + "step": 1657 + }, + { + "epoch": 0.8015470147449842, + "grad_norm": 0.3932679351449648, + "learning_rate": 1.0460423956846955e-06, + "loss": 0.4941, + "step": 1658 + }, + { + "epoch": 0.8020304568527918, + "grad_norm": 0.4096876585407803, + "learning_rate": 1.041154515728559e-06, + "loss": 0.5088, + "step": 1659 + }, + { + "epoch": 0.8025138989605994, + "grad_norm": 0.4173497731763413, + "learning_rate": 1.0362767549005454e-06, + "loss": 0.5119, + "step": 1660 + }, + { + "epoch": 0.802997341068407, + "grad_norm": 0.41012015779324845, + "learning_rate": 1.0314091256686065e-06, + "loss": 0.5212, + "step": 1661 + }, + { + "epoch": 0.8034807831762146, + "grad_norm": 0.38447439239259856, + "learning_rate": 1.0265516404747943e-06, + "loss": 0.5052, + "step": 1662 + }, + { + "epoch": 0.8039642252840222, + "grad_norm": 0.40948392634706504, + "learning_rate": 1.0217043117352337e-06, + "loss": 0.5109, + "step": 1663 + }, + { + "epoch": 0.8044476673918298, + "grad_norm": 0.40148827230751766, + "learning_rate": 1.0168671518400853e-06, + "loss": 0.5118, + "step": 1664 + }, + { + "epoch": 0.8049311094996374, + "grad_norm": 0.3939565441232479, + "learning_rate": 1.0120401731535213e-06, + "loss": 0.4879, + "step": 1665 + }, + { + "epoch": 0.805414551607445, + "grad_norm": 0.4394864393242481, + "learning_rate": 1.0072233880136872e-06, + "loss": 0.5104, + "step": 1666 + }, + { + "epoch": 0.8058979937152526, + "grad_norm": 0.4318770671908104, + "learning_rate": 1.0024168087326764e-06, + "loss": 0.5235, + "step": 1667 + }, + { + "epoch": 0.8063814358230602, + "grad_norm": 0.4281259140520081, + "learning_rate": 9.976204475964907e-07, + "loss": 0.5149, + "step": 1668 + }, + { + "epoch": 0.8068648779308678, + "grad_norm": 0.43979946361695016, + "learning_rate": 9.92834316865015e-07, + "loss": 0.5191, + "step": 1669 + }, + { + "epoch": 0.8073483200386754, + "grad_norm": 0.4312412015437643, + "learning_rate": 9.88058428771987e-07, + "loss": 0.5188, + "step": 1670 + }, + { + "epoch": 0.807831762146483, + "grad_norm": 0.4461824252192259, + "learning_rate": 9.832927955249605e-07, + "loss": 0.518, + "step": 1671 + }, + { + "epoch": 0.8083152042542906, + "grad_norm": 0.40455517199845253, + "learning_rate": 9.785374293052802e-07, + "loss": 0.5279, + "step": 1672 + }, + { + "epoch": 0.8087986463620981, + "grad_norm": 0.40500700400967726, + "learning_rate": 9.737923422680424e-07, + "loss": 0.5267, + "step": 1673 + }, + { + "epoch": 0.8092820884699057, + "grad_norm": 0.4053422468834684, + "learning_rate": 9.690575465420733e-07, + "loss": 0.5098, + "step": 1674 + }, + { + "epoch": 0.8097655305777133, + "grad_norm": 0.41221923071964073, + "learning_rate": 9.643330542298929e-07, + "loss": 0.5171, + "step": 1675 + }, + { + "epoch": 0.8102489726855209, + "grad_norm": 0.4289210188727792, + "learning_rate": 9.596188774076849e-07, + "loss": 0.5164, + "step": 1676 + }, + { + "epoch": 0.8107324147933285, + "grad_norm": 0.4119920227929362, + "learning_rate": 9.549150281252633e-07, + "loss": 0.5167, + "step": 1677 + }, + { + "epoch": 0.8112158569011361, + "grad_norm": 0.43146374267443927, + "learning_rate": 9.50221518406047e-07, + "loss": 0.5198, + "step": 1678 + }, + { + "epoch": 0.8116992990089437, + "grad_norm": 0.3915995001014536, + "learning_rate": 9.455383602470247e-07, + "loss": 0.5194, + "step": 1679 + }, + { + "epoch": 0.8121827411167513, + "grad_norm": 0.42092897815810126, + "learning_rate": 9.408655656187282e-07, + "loss": 0.5154, + "step": 1680 + }, + { + "epoch": 0.8126661832245589, + "grad_norm": 0.43929014126287974, + "learning_rate": 9.362031464651955e-07, + "loss": 0.5111, + "step": 1681 + }, + { + "epoch": 0.8131496253323665, + "grad_norm": 0.419403258433708, + "learning_rate": 9.31551114703943e-07, + "loss": 0.5175, + "step": 1682 + }, + { + "epoch": 0.813633067440174, + "grad_norm": 0.4235039718034734, + "learning_rate": 9.269094822259439e-07, + "loss": 0.5219, + "step": 1683 + }, + { + "epoch": 0.8141165095479816, + "grad_norm": 0.403949404981181, + "learning_rate": 9.22278260895581e-07, + "loss": 0.5257, + "step": 1684 + }, + { + "epoch": 0.8145999516557892, + "grad_norm": 0.40201626032689436, + "learning_rate": 9.176574625506324e-07, + "loss": 0.5065, + "step": 1685 + }, + { + "epoch": 0.8150833937635968, + "grad_norm": 0.42029809516611727, + "learning_rate": 9.130470990022283e-07, + "loss": 0.5198, + "step": 1686 + }, + { + "epoch": 0.8155668358714044, + "grad_norm": 0.4443584968330059, + "learning_rate": 9.084471820348306e-07, + "loss": 0.5054, + "step": 1687 + }, + { + "epoch": 0.816050277979212, + "grad_norm": 0.4011266291605723, + "learning_rate": 9.038577234061979e-07, + "loss": 0.481, + "step": 1688 + }, + { + "epoch": 0.8165337200870196, + "grad_norm": 0.4116565403445696, + "learning_rate": 8.992787348473575e-07, + "loss": 0.512, + "step": 1689 + }, + { + "epoch": 0.8170171621948271, + "grad_norm": 0.3855753519601646, + "learning_rate": 8.947102280625708e-07, + "loss": 0.4919, + "step": 1690 + }, + { + "epoch": 0.8175006043026347, + "grad_norm": 0.3998193393341577, + "learning_rate": 8.901522147293107e-07, + "loss": 0.5063, + "step": 1691 + }, + { + "epoch": 0.8179840464104423, + "grad_norm": 0.40465428030335077, + "learning_rate": 8.856047064982276e-07, + "loss": 0.4969, + "step": 1692 + }, + { + "epoch": 0.8184674885182499, + "grad_norm": 0.3993077607842942, + "learning_rate": 8.810677149931168e-07, + "loss": 0.5123, + "step": 1693 + }, + { + "epoch": 0.8189509306260575, + "grad_norm": 0.41845032917424874, + "learning_rate": 8.765412518108957e-07, + "loss": 0.5222, + "step": 1694 + }, + { + "epoch": 0.8194343727338651, + "grad_norm": 0.4482989172909152, + "learning_rate": 8.720253285215685e-07, + "loss": 0.5245, + "step": 1695 + }, + { + "epoch": 0.8199178148416727, + "grad_norm": 0.4096945568958353, + "learning_rate": 8.675199566682002e-07, + "loss": 0.4987, + "step": 1696 + }, + { + "epoch": 0.8204012569494803, + "grad_norm": 0.42715377043083036, + "learning_rate": 8.630251477668828e-07, + "loss": 0.4956, + "step": 1697 + }, + { + "epoch": 0.8208846990572879, + "grad_norm": 0.42586545844645524, + "learning_rate": 8.585409133067119e-07, + "loss": 0.5096, + "step": 1698 + }, + { + "epoch": 0.8213681411650955, + "grad_norm": 0.43766586659276707, + "learning_rate": 8.540672647497483e-07, + "loss": 0.5136, + "step": 1699 + }, + { + "epoch": 0.8218515832729031, + "grad_norm": 0.4371618341766256, + "learning_rate": 8.49604213531004e-07, + "loss": 0.5213, + "step": 1700 + }, + { + "epoch": 0.8223350253807107, + "grad_norm": 0.4375571316772861, + "learning_rate": 8.451517710583934e-07, + "loss": 0.5051, + "step": 1701 + }, + { + "epoch": 0.8228184674885183, + "grad_norm": 0.4132441919616583, + "learning_rate": 8.407099487127207e-07, + "loss": 0.5257, + "step": 1702 + }, + { + "epoch": 0.8233019095963259, + "grad_norm": 0.42607745465695845, + "learning_rate": 8.362787578476395e-07, + "loss": 0.5249, + "step": 1703 + }, + { + "epoch": 0.8237853517041335, + "grad_norm": 0.4075673839523143, + "learning_rate": 8.318582097896316e-07, + "loss": 0.5058, + "step": 1704 + }, + { + "epoch": 0.8242687938119411, + "grad_norm": 0.42693741052199397, + "learning_rate": 8.274483158379759e-07, + "loss": 0.5111, + "step": 1705 + }, + { + "epoch": 0.8247522359197486, + "grad_norm": 0.39832416179935565, + "learning_rate": 8.230490872647146e-07, + "loss": 0.4938, + "step": 1706 + }, + { + "epoch": 0.8252356780275562, + "grad_norm": 0.422151557962671, + "learning_rate": 8.18660535314631e-07, + "loss": 0.5183, + "step": 1707 + }, + { + "epoch": 0.8257191201353637, + "grad_norm": 0.4003210551929738, + "learning_rate": 8.142826712052177e-07, + "loss": 0.5131, + "step": 1708 + }, + { + "epoch": 0.8262025622431713, + "grad_norm": 0.41552515229148246, + "learning_rate": 8.099155061266495e-07, + "loss": 0.5104, + "step": 1709 + }, + { + "epoch": 0.8266860043509789, + "grad_norm": 0.4199192751255081, + "learning_rate": 8.055590512417499e-07, + "loss": 0.504, + "step": 1710 + }, + { + "epoch": 0.8271694464587865, + "grad_norm": 0.4183052253157522, + "learning_rate": 8.012133176859705e-07, + "loss": 0.5183, + "step": 1711 + }, + { + "epoch": 0.8276528885665941, + "grad_norm": 0.40771463289221466, + "learning_rate": 7.968783165673554e-07, + "loss": 0.5134, + "step": 1712 + }, + { + "epoch": 0.8281363306744017, + "grad_norm": 0.4201027836512912, + "learning_rate": 7.925540589665187e-07, + "loss": 0.5074, + "step": 1713 + }, + { + "epoch": 0.8286197727822093, + "grad_norm": 0.395143526726159, + "learning_rate": 7.882405559366091e-07, + "loss": 0.4907, + "step": 1714 + }, + { + "epoch": 0.8291032148900169, + "grad_norm": 0.39924930985003787, + "learning_rate": 7.839378185032897e-07, + "loss": 0.5107, + "step": 1715 + }, + { + "epoch": 0.8295866569978245, + "grad_norm": 0.4132095601626946, + "learning_rate": 7.796458576647015e-07, + "loss": 0.5185, + "step": 1716 + }, + { + "epoch": 0.8300700991056321, + "grad_norm": 0.40587201306044, + "learning_rate": 7.753646843914465e-07, + "loss": 0.5182, + "step": 1717 + }, + { + "epoch": 0.8305535412134397, + "grad_norm": 0.4094094956774689, + "learning_rate": 7.710943096265461e-07, + "loss": 0.5029, + "step": 1718 + }, + { + "epoch": 0.8310369833212473, + "grad_norm": 0.41067812349491495, + "learning_rate": 7.668347442854218e-07, + "loss": 0.5021, + "step": 1719 + }, + { + "epoch": 0.8315204254290549, + "grad_norm": 0.39956787890532264, + "learning_rate": 7.625859992558665e-07, + "loss": 0.5206, + "step": 1720 + }, + { + "epoch": 0.8320038675368625, + "grad_norm": 0.43928086956712875, + "learning_rate": 7.583480853980158e-07, + "loss": 0.5134, + "step": 1721 + }, + { + "epoch": 0.8324873096446701, + "grad_norm": 0.4100632271699525, + "learning_rate": 7.541210135443188e-07, + "loss": 0.5184, + "step": 1722 + }, + { + "epoch": 0.8329707517524776, + "grad_norm": 0.3961555211112688, + "learning_rate": 7.499047944995108e-07, + "loss": 0.5222, + "step": 1723 + }, + { + "epoch": 0.8334541938602852, + "grad_norm": 0.41401758140390904, + "learning_rate": 7.45699439040588e-07, + "loss": 0.5149, + "step": 1724 + }, + { + "epoch": 0.8339376359680928, + "grad_norm": 0.41725576477900833, + "learning_rate": 7.415049579167783e-07, + "loss": 0.5086, + "step": 1725 + }, + { + "epoch": 0.8344210780759004, + "grad_norm": 0.40808361223845036, + "learning_rate": 7.37321361849514e-07, + "loss": 0.5171, + "step": 1726 + }, + { + "epoch": 0.834904520183708, + "grad_norm": 0.4044441513281848, + "learning_rate": 7.331486615324024e-07, + "loss": 0.4931, + "step": 1727 + }, + { + "epoch": 0.8353879622915156, + "grad_norm": 0.39255016483428246, + "learning_rate": 7.289868676312023e-07, + "loss": 0.4895, + "step": 1728 + }, + { + "epoch": 0.8358714043993232, + "grad_norm": 0.4273894357037594, + "learning_rate": 7.248359907837959e-07, + "loss": 0.5141, + "step": 1729 + }, + { + "epoch": 0.8363548465071308, + "grad_norm": 0.41270523260835523, + "learning_rate": 7.206960416001563e-07, + "loss": 0.5053, + "step": 1730 + }, + { + "epoch": 0.8368382886149384, + "grad_norm": 0.42210989792552517, + "learning_rate": 7.165670306623296e-07, + "loss": 0.515, + "step": 1731 + }, + { + "epoch": 0.837321730722746, + "grad_norm": 0.4005116526979819, + "learning_rate": 7.124489685243985e-07, + "loss": 0.5084, + "step": 1732 + }, + { + "epoch": 0.8378051728305536, + "grad_norm": 0.42730888005294004, + "learning_rate": 7.08341865712463e-07, + "loss": 0.5149, + "step": 1733 + }, + { + "epoch": 0.8382886149383612, + "grad_norm": 0.3946117211995092, + "learning_rate": 7.042457327246088e-07, + "loss": 0.5272, + "step": 1734 + }, + { + "epoch": 0.8387720570461688, + "grad_norm": 0.40058125990145727, + "learning_rate": 7.001605800308825e-07, + "loss": 0.5173, + "step": 1735 + }, + { + "epoch": 0.8392554991539763, + "grad_norm": 0.39419621537510763, + "learning_rate": 6.960864180732618e-07, + "loss": 0.5182, + "step": 1736 + }, + { + "epoch": 0.8397389412617839, + "grad_norm": 0.4302451888948554, + "learning_rate": 6.920232572656349e-07, + "loss": 0.5145, + "step": 1737 + }, + { + "epoch": 0.8402223833695915, + "grad_norm": 0.39221396906385003, + "learning_rate": 6.879711079937667e-07, + "loss": 0.5079, + "step": 1738 + }, + { + "epoch": 0.840705825477399, + "grad_norm": 0.4210023704512398, + "learning_rate": 6.839299806152799e-07, + "loss": 0.5061, + "step": 1739 + }, + { + "epoch": 0.8411892675852066, + "grad_norm": 0.4031707044630559, + "learning_rate": 6.79899885459619e-07, + "loss": 0.5174, + "step": 1740 + }, + { + "epoch": 0.8416727096930142, + "grad_norm": 0.40104705743190977, + "learning_rate": 6.758808328280325e-07, + "loss": 0.4981, + "step": 1741 + }, + { + "epoch": 0.8421561518008218, + "grad_norm": 0.4158859718137932, + "learning_rate": 6.718728329935448e-07, + "loss": 0.5216, + "step": 1742 + }, + { + "epoch": 0.8426395939086294, + "grad_norm": 0.4140963838597211, + "learning_rate": 6.678758962009241e-07, + "loss": 0.5154, + "step": 1743 + }, + { + "epoch": 0.843123036016437, + "grad_norm": 0.41926365963573253, + "learning_rate": 6.638900326666653e-07, + "loss": 0.5181, + "step": 1744 + }, + { + "epoch": 0.8436064781242446, + "grad_norm": 0.4007033614343704, + "learning_rate": 6.599152525789531e-07, + "loss": 0.4772, + "step": 1745 + }, + { + "epoch": 0.8440899202320522, + "grad_norm": 0.4266694328755557, + "learning_rate": 6.559515660976506e-07, + "loss": 0.5153, + "step": 1746 + }, + { + "epoch": 0.8445733623398598, + "grad_norm": 0.42158713984389296, + "learning_rate": 6.519989833542567e-07, + "loss": 0.5218, + "step": 1747 + }, + { + "epoch": 0.8450568044476674, + "grad_norm": 0.4190422236566301, + "learning_rate": 6.480575144518931e-07, + "loss": 0.5267, + "step": 1748 + }, + { + "epoch": 0.845540246555475, + "grad_norm": 0.42322451653416415, + "learning_rate": 6.441271694652701e-07, + "loss": 0.517, + "step": 1749 + }, + { + "epoch": 0.8460236886632826, + "grad_norm": 0.4050974433698499, + "learning_rate": 6.402079584406673e-07, + "loss": 0.523, + "step": 1750 + }, + { + "epoch": 0.8465071307710902, + "grad_norm": 0.39927068510798064, + "learning_rate": 6.36299891395904e-07, + "loss": 0.4943, + "step": 1751 + }, + { + "epoch": 0.8469905728788978, + "grad_norm": 0.40520913199613756, + "learning_rate": 6.32402978320315e-07, + "loss": 0.519, + "step": 1752 + }, + { + "epoch": 0.8474740149867054, + "grad_norm": 0.41524557234436116, + "learning_rate": 6.285172291747232e-07, + "loss": 0.5087, + "step": 1753 + }, + { + "epoch": 0.847957457094513, + "grad_norm": 0.39348055940589066, + "learning_rate": 6.246426538914174e-07, + "loss": 0.5135, + "step": 1754 + }, + { + "epoch": 0.8484408992023206, + "grad_norm": 0.40472211918575973, + "learning_rate": 6.207792623741249e-07, + "loss": 0.5181, + "step": 1755 + }, + { + "epoch": 0.8489243413101281, + "grad_norm": 0.3797781522780497, + "learning_rate": 6.169270644979836e-07, + "loss": 0.4718, + "step": 1756 + }, + { + "epoch": 0.8494077834179357, + "grad_norm": 0.4172564454240539, + "learning_rate": 6.130860701095226e-07, + "loss": 0.5093, + "step": 1757 + }, + { + "epoch": 0.8498912255257433, + "grad_norm": 0.460481903524328, + "learning_rate": 6.092562890266341e-07, + "loss": 0.5245, + "step": 1758 + }, + { + "epoch": 0.8503746676335509, + "grad_norm": 0.40544203017797725, + "learning_rate": 6.054377310385479e-07, + "loss": 0.5067, + "step": 1759 + }, + { + "epoch": 0.8508581097413584, + "grad_norm": 0.4225253280006634, + "learning_rate": 6.016304059058031e-07, + "loss": 0.5169, + "step": 1760 + }, + { + "epoch": 0.851341551849166, + "grad_norm": 0.40769266639259943, + "learning_rate": 5.97834323360233e-07, + "loss": 0.5243, + "step": 1761 + }, + { + "epoch": 0.8518249939569736, + "grad_norm": 0.42284940262412657, + "learning_rate": 5.940494931049262e-07, + "loss": 0.5194, + "step": 1762 + }, + { + "epoch": 0.8523084360647812, + "grad_norm": 0.3916025337851957, + "learning_rate": 5.902759248142187e-07, + "loss": 0.4975, + "step": 1763 + }, + { + "epoch": 0.8527918781725888, + "grad_norm": 0.41326270414280697, + "learning_rate": 5.86513628133652e-07, + "loss": 0.5154, + "step": 1764 + }, + { + "epoch": 0.8532753202803964, + "grad_norm": 0.40856588365868324, + "learning_rate": 5.827626126799613e-07, + "loss": 0.5154, + "step": 1765 + }, + { + "epoch": 0.853758762388204, + "grad_norm": 0.42831173680710594, + "learning_rate": 5.790228880410426e-07, + "loss": 0.5163, + "step": 1766 + }, + { + "epoch": 0.8542422044960116, + "grad_norm": 0.4218590594382107, + "learning_rate": 5.75294463775935e-07, + "loss": 0.517, + "step": 1767 + }, + { + "epoch": 0.8547256466038192, + "grad_norm": 0.38253864809006055, + "learning_rate": 5.715773494147919e-07, + "loss": 0.4929, + "step": 1768 + }, + { + "epoch": 0.8552090887116268, + "grad_norm": 0.40270548702028475, + "learning_rate": 5.678715544588547e-07, + "loss": 0.5088, + "step": 1769 + }, + { + "epoch": 0.8556925308194344, + "grad_norm": 0.4229953125269584, + "learning_rate": 5.641770883804365e-07, + "loss": 0.5258, + "step": 1770 + }, + { + "epoch": 0.856175972927242, + "grad_norm": 0.4037677845049078, + "learning_rate": 5.604939606228887e-07, + "loss": 0.5095, + "step": 1771 + }, + { + "epoch": 0.8566594150350495, + "grad_norm": 0.39977977942883575, + "learning_rate": 5.568221806005847e-07, + "loss": 0.5128, + "step": 1772 + }, + { + "epoch": 0.8571428571428571, + "grad_norm": 0.4175904938844971, + "learning_rate": 5.531617576988879e-07, + "loss": 0.5114, + "step": 1773 + }, + { + "epoch": 0.8576262992506647, + "grad_norm": 0.41812393010867166, + "learning_rate": 5.495127012741352e-07, + "loss": 0.5188, + "step": 1774 + }, + { + "epoch": 0.8581097413584723, + "grad_norm": 0.38871202154348194, + "learning_rate": 5.45875020653609e-07, + "loss": 0.4882, + "step": 1775 + }, + { + "epoch": 0.8585931834662799, + "grad_norm": 0.40175664384357557, + "learning_rate": 5.422487251355146e-07, + "loss": 0.5088, + "step": 1776 + }, + { + "epoch": 0.8590766255740875, + "grad_norm": 0.4258611448475652, + "learning_rate": 5.386338239889549e-07, + "loss": 0.5136, + "step": 1777 + }, + { + "epoch": 0.8595600676818951, + "grad_norm": 0.38040545155326977, + "learning_rate": 5.350303264539091e-07, + "loss": 0.4692, + "step": 1778 + }, + { + "epoch": 0.8600435097897027, + "grad_norm": 0.4185751036827134, + "learning_rate": 5.314382417412062e-07, + "loss": 0.516, + "step": 1779 + }, + { + "epoch": 0.8605269518975103, + "grad_norm": 0.4237092619379993, + "learning_rate": 5.278575790325052e-07, + "loss": 0.5146, + "step": 1780 + }, + { + "epoch": 0.8610103940053179, + "grad_norm": 0.4173802982789206, + "learning_rate": 5.242883474802696e-07, + "loss": 0.5125, + "step": 1781 + }, + { + "epoch": 0.8614938361131255, + "grad_norm": 0.41838440801291993, + "learning_rate": 5.207305562077403e-07, + "loss": 0.5177, + "step": 1782 + }, + { + "epoch": 0.8619772782209331, + "grad_norm": 0.4779855097218796, + "learning_rate": 5.1718421430892e-07, + "loss": 0.5304, + "step": 1783 + }, + { + "epoch": 0.8624607203287407, + "grad_norm": 0.37738685143261025, + "learning_rate": 5.136493308485446e-07, + "loss": 0.486, + "step": 1784 + }, + { + "epoch": 0.8629441624365483, + "grad_norm": 0.39963258309250466, + "learning_rate": 5.101259148620618e-07, + "loss": 0.4959, + "step": 1785 + }, + { + "epoch": 0.8634276045443559, + "grad_norm": 0.39604391770722097, + "learning_rate": 5.066139753556049e-07, + "loss": 0.4993, + "step": 1786 + }, + { + "epoch": 0.8639110466521635, + "grad_norm": 0.40732958269577874, + "learning_rate": 5.031135213059756e-07, + "loss": 0.5153, + "step": 1787 + }, + { + "epoch": 0.864394488759971, + "grad_norm": 0.3897806967927546, + "learning_rate": 4.99624561660616e-07, + "loss": 0.4871, + "step": 1788 + }, + { + "epoch": 0.8648779308677785, + "grad_norm": 0.4048723969181331, + "learning_rate": 4.961471053375899e-07, + "loss": 0.512, + "step": 1789 + }, + { + "epoch": 0.8653613729755861, + "grad_norm": 0.4203351282800037, + "learning_rate": 4.926811612255539e-07, + "loss": 0.5121, + "step": 1790 + }, + { + "epoch": 0.8658448150833937, + "grad_norm": 0.39858565202586066, + "learning_rate": 4.892267381837396e-07, + "loss": 0.5011, + "step": 1791 + }, + { + "epoch": 0.8663282571912013, + "grad_norm": 0.4344627773200746, + "learning_rate": 4.857838450419339e-07, + "loss": 0.5103, + "step": 1792 + }, + { + "epoch": 0.8668116992990089, + "grad_norm": 0.40293448022650774, + "learning_rate": 4.823524906004468e-07, + "loss": 0.5138, + "step": 1793 + }, + { + "epoch": 0.8672951414068165, + "grad_norm": 0.41801337173969716, + "learning_rate": 4.789326836300983e-07, + "loss": 0.5151, + "step": 1794 + }, + { + "epoch": 0.8677785835146241, + "grad_norm": 0.4058943681689954, + "learning_rate": 4.7552443287218866e-07, + "loss": 0.5098, + "step": 1795 + }, + { + "epoch": 0.8682620256224317, + "grad_norm": 0.42652856984845416, + "learning_rate": 4.7212774703848273e-07, + "loss": 0.508, + "step": 1796 + }, + { + "epoch": 0.8687454677302393, + "grad_norm": 0.4211824745719729, + "learning_rate": 4.687426348111834e-07, + "loss": 0.5122, + "step": 1797 + }, + { + "epoch": 0.8692289098380469, + "grad_norm": 0.4022753726796167, + "learning_rate": 4.65369104842911e-07, + "loss": 0.52, + "step": 1798 + }, + { + "epoch": 0.8697123519458545, + "grad_norm": 0.40283890754002527, + "learning_rate": 4.620071657566777e-07, + "loss": 0.5072, + "step": 1799 + }, + { + "epoch": 0.8701957940536621, + "grad_norm": 0.4283203699114763, + "learning_rate": 4.586568261458729e-07, + "loss": 0.5096, + "step": 1800 + }, + { + "epoch": 0.8706792361614697, + "grad_norm": 0.38328034805235095, + "learning_rate": 4.553180945742336e-07, + "loss": 0.4861, + "step": 1801 + }, + { + "epoch": 0.8711626782692773, + "grad_norm": 0.4122501087059972, + "learning_rate": 4.5199097957582816e-07, + "loss": 0.5136, + "step": 1802 + }, + { + "epoch": 0.8716461203770849, + "grad_norm": 0.41264272327652995, + "learning_rate": 4.486754896550288e-07, + "loss": 0.5012, + "step": 1803 + }, + { + "epoch": 0.8721295624848925, + "grad_norm": 0.41725003600600513, + "learning_rate": 4.45371633286496e-07, + "loss": 0.5185, + "step": 1804 + }, + { + "epoch": 0.8726130045927, + "grad_norm": 0.4078148663174146, + "learning_rate": 4.4207941891515335e-07, + "loss": 0.5135, + "step": 1805 + }, + { + "epoch": 0.8730964467005076, + "grad_norm": 0.4209684818924423, + "learning_rate": 4.3879885495616505e-07, + "loss": 0.512, + "step": 1806 + }, + { + "epoch": 0.8735798888083152, + "grad_norm": 0.3832255061477332, + "learning_rate": 4.3552994979491836e-07, + "loss": 0.5131, + "step": 1807 + }, + { + "epoch": 0.8740633309161228, + "grad_norm": 0.415646535369065, + "learning_rate": 4.322727117869951e-07, + "loss": 0.5156, + "step": 1808 + }, + { + "epoch": 0.8745467730239304, + "grad_norm": 0.403529021224522, + "learning_rate": 4.290271492581627e-07, + "loss": 0.5225, + "step": 1809 + }, + { + "epoch": 0.875030215131738, + "grad_norm": 0.4248226663595473, + "learning_rate": 4.257932705043372e-07, + "loss": 0.5276, + "step": 1810 + }, + { + "epoch": 0.8755136572395456, + "grad_norm": 0.42279657022545747, + "learning_rate": 4.2257108379157586e-07, + "loss": 0.5224, + "step": 1811 + }, + { + "epoch": 0.8759970993473531, + "grad_norm": 0.4140176038814713, + "learning_rate": 4.1936059735604497e-07, + "loss": 0.5161, + "step": 1812 + }, + { + "epoch": 0.8764805414551607, + "grad_norm": 0.39792458444383394, + "learning_rate": 4.161618194040079e-07, + "loss": 0.5277, + "step": 1813 + }, + { + "epoch": 0.8769639835629683, + "grad_norm": 0.39549573015495143, + "learning_rate": 4.129747581117993e-07, + "loss": 0.5053, + "step": 1814 + }, + { + "epoch": 0.8774474256707759, + "grad_norm": 0.4160802633412061, + "learning_rate": 4.0979942162580387e-07, + "loss": 0.516, + "step": 1815 + }, + { + "epoch": 0.8779308677785835, + "grad_norm": 0.41239251805984983, + "learning_rate": 4.06635818062438e-07, + "loss": 0.5278, + "step": 1816 + }, + { + "epoch": 0.8784143098863911, + "grad_norm": 0.40871873580107365, + "learning_rate": 4.0348395550812713e-07, + "loss": 0.5294, + "step": 1817 + }, + { + "epoch": 0.8788977519941987, + "grad_norm": 0.40365670038657436, + "learning_rate": 4.003438420192873e-07, + "loss": 0.5158, + "step": 1818 + }, + { + "epoch": 0.8793811941020063, + "grad_norm": 0.4255428234546921, + "learning_rate": 3.9721548562229985e-07, + "loss": 0.5114, + "step": 1819 + }, + { + "epoch": 0.8798646362098139, + "grad_norm": 0.41203315649756733, + "learning_rate": 3.9409889431349656e-07, + "loss": 0.5116, + "step": 1820 + }, + { + "epoch": 0.8803480783176215, + "grad_norm": 0.4149872650348109, + "learning_rate": 3.9099407605913576e-07, + "loss": 0.5099, + "step": 1821 + }, + { + "epoch": 0.880831520425429, + "grad_norm": 0.4258100076362105, + "learning_rate": 3.879010387953841e-07, + "loss": 0.5175, + "step": 1822 + }, + { + "epoch": 0.8813149625332366, + "grad_norm": 0.3902355927247227, + "learning_rate": 3.84819790428293e-07, + "loss": 0.498, + "step": 1823 + }, + { + "epoch": 0.8817984046410442, + "grad_norm": 0.40842472365457144, + "learning_rate": 3.8175033883378233e-07, + "loss": 0.518, + "step": 1824 + }, + { + "epoch": 0.8822818467488518, + "grad_norm": 0.4221970543634826, + "learning_rate": 3.7869269185761613e-07, + "loss": 0.5216, + "step": 1825 + }, + { + "epoch": 0.8827652888566594, + "grad_norm": 0.40616883661281006, + "learning_rate": 3.7564685731538985e-07, + "loss": 0.5066, + "step": 1826 + }, + { + "epoch": 0.883248730964467, + "grad_norm": 0.4061562407072031, + "learning_rate": 3.7261284299249967e-07, + "loss": 0.517, + "step": 1827 + }, + { + "epoch": 0.8837321730722746, + "grad_norm": 0.4079225433423233, + "learning_rate": 3.695906566441304e-07, + "loss": 0.4959, + "step": 1828 + }, + { + "epoch": 0.8842156151800822, + "grad_norm": 0.38197368709112006, + "learning_rate": 3.665803059952344e-07, + "loss": 0.4871, + "step": 1829 + }, + { + "epoch": 0.8846990572878898, + "grad_norm": 0.411849076052872, + "learning_rate": 3.63581798740511e-07, + "loss": 0.5143, + "step": 1830 + }, + { + "epoch": 0.8851824993956974, + "grad_norm": 0.393276210273132, + "learning_rate": 3.605951425443871e-07, + "loss": 0.4936, + "step": 1831 + }, + { + "epoch": 0.885665941503505, + "grad_norm": 0.369604359657528, + "learning_rate": 3.576203450409943e-07, + "loss": 0.4684, + "step": 1832 + }, + { + "epoch": 0.8861493836113126, + "grad_norm": 0.43326466002005165, + "learning_rate": 3.5465741383415684e-07, + "loss": 0.5104, + "step": 1833 + }, + { + "epoch": 0.8866328257191202, + "grad_norm": 0.41527359664646213, + "learning_rate": 3.5170635649736497e-07, + "loss": 0.519, + "step": 1834 + }, + { + "epoch": 0.8871162678269278, + "grad_norm": 0.41356740894281485, + "learning_rate": 3.487671805737597e-07, + "loss": 0.508, + "step": 1835 + }, + { + "epoch": 0.8875997099347354, + "grad_norm": 0.4050751048123327, + "learning_rate": 3.4583989357611037e-07, + "loss": 0.5135, + "step": 1836 + }, + { + "epoch": 0.888083152042543, + "grad_norm": 0.3923610722591795, + "learning_rate": 3.4292450298679945e-07, + "loss": 0.5075, + "step": 1837 + }, + { + "epoch": 0.8885665941503504, + "grad_norm": 0.41919225013002887, + "learning_rate": 3.400210162577999e-07, + "loss": 0.5166, + "step": 1838 + }, + { + "epoch": 0.889050036258158, + "grad_norm": 0.42118222715491443, + "learning_rate": 3.371294408106585e-07, + "loss": 0.523, + "step": 1839 + }, + { + "epoch": 0.8895334783659656, + "grad_norm": 0.3952238335142466, + "learning_rate": 3.3424978403647443e-07, + "loss": 0.5138, + "step": 1840 + }, + { + "epoch": 0.8900169204737732, + "grad_norm": 0.4163195177412695, + "learning_rate": 3.313820532958817e-07, + "loss": 0.5274, + "step": 1841 + }, + { + "epoch": 0.8905003625815808, + "grad_norm": 0.3930314520659748, + "learning_rate": 3.285262559190322e-07, + "loss": 0.4991, + "step": 1842 + }, + { + "epoch": 0.8909838046893884, + "grad_norm": 0.4336804309313973, + "learning_rate": 3.256823992055741e-07, + "loss": 0.5009, + "step": 1843 + }, + { + "epoch": 0.891467246797196, + "grad_norm": 0.41714068524986875, + "learning_rate": 3.228504904246349e-07, + "loss": 0.5238, + "step": 1844 + }, + { + "epoch": 0.8919506889050036, + "grad_norm": 0.41848606366751967, + "learning_rate": 3.20030536814801e-07, + "loss": 0.5202, + "step": 1845 + }, + { + "epoch": 0.8924341310128112, + "grad_norm": 0.422964314144621, + "learning_rate": 3.1722254558410047e-07, + "loss": 0.5104, + "step": 1846 + }, + { + "epoch": 0.8929175731206188, + "grad_norm": 0.41539348703446205, + "learning_rate": 3.144265239099864e-07, + "loss": 0.5152, + "step": 1847 + }, + { + "epoch": 0.8934010152284264, + "grad_norm": 0.3936271006898258, + "learning_rate": 3.1164247893931575e-07, + "loss": 0.5071, + "step": 1848 + }, + { + "epoch": 0.893884457336234, + "grad_norm": 0.4152031331913687, + "learning_rate": 3.088704177883306e-07, + "loss": 0.5181, + "step": 1849 + }, + { + "epoch": 0.8943678994440416, + "grad_norm": 0.4176432021270733, + "learning_rate": 3.06110347542643e-07, + "loss": 0.5235, + "step": 1850 + }, + { + "epoch": 0.8948513415518492, + "grad_norm": 0.3954219378639727, + "learning_rate": 3.033622752572157e-07, + "loss": 0.5019, + "step": 1851 + }, + { + "epoch": 0.8953347836596568, + "grad_norm": 0.45830856560980365, + "learning_rate": 3.0062620795634214e-07, + "loss": 0.5263, + "step": 1852 + }, + { + "epoch": 0.8958182257674644, + "grad_norm": 0.4009466020951186, + "learning_rate": 2.9790215263363174e-07, + "loss": 0.5222, + "step": 1853 + }, + { + "epoch": 0.896301667875272, + "grad_norm": 0.3933495297633584, + "learning_rate": 2.951901162519877e-07, + "loss": 0.5233, + "step": 1854 + }, + { + "epoch": 0.8967851099830795, + "grad_norm": 0.39895160904445, + "learning_rate": 2.9249010574359636e-07, + "loss": 0.5212, + "step": 1855 + }, + { + "epoch": 0.8972685520908871, + "grad_norm": 0.42068899596041226, + "learning_rate": 2.898021280098995e-07, + "loss": 0.5168, + "step": 1856 + }, + { + "epoch": 0.8977519941986947, + "grad_norm": 0.39971963228555085, + "learning_rate": 2.8712618992158656e-07, + "loss": 0.5084, + "step": 1857 + }, + { + "epoch": 0.8982354363065023, + "grad_norm": 0.3999616227972635, + "learning_rate": 2.8446229831856964e-07, + "loss": 0.5088, + "step": 1858 + }, + { + "epoch": 0.8987188784143099, + "grad_norm": 0.4001447692276326, + "learning_rate": 2.8181046000997136e-07, + "loss": 0.521, + "step": 1859 + }, + { + "epoch": 0.8992023205221175, + "grad_norm": 0.41592034251039167, + "learning_rate": 2.791706817741041e-07, + "loss": 0.5072, + "step": 1860 + }, + { + "epoch": 0.8996857626299251, + "grad_norm": 0.4445686187455443, + "learning_rate": 2.765429703584538e-07, + "loss": 0.5148, + "step": 1861 + }, + { + "epoch": 0.9001692047377327, + "grad_norm": 0.40228802491920107, + "learning_rate": 2.739273324796621e-07, + "loss": 0.5262, + "step": 1862 + }, + { + "epoch": 0.9006526468455403, + "grad_norm": 0.40404504261863744, + "learning_rate": 2.7132377482351037e-07, + "loss": 0.5147, + "step": 1863 + }, + { + "epoch": 0.9011360889533478, + "grad_norm": 0.3986359660989621, + "learning_rate": 2.687323040449025e-07, + "loss": 0.5172, + "step": 1864 + }, + { + "epoch": 0.9016195310611554, + "grad_norm": 0.42039178580411435, + "learning_rate": 2.6615292676784533e-07, + "loss": 0.5191, + "step": 1865 + }, + { + "epoch": 0.902102973168963, + "grad_norm": 0.4168785648766661, + "learning_rate": 2.635856495854372e-07, + "loss": 0.5116, + "step": 1866 + }, + { + "epoch": 0.9025864152767706, + "grad_norm": 0.4006359687639295, + "learning_rate": 2.6103047905984224e-07, + "loss": 0.5243, + "step": 1867 + }, + { + "epoch": 0.9030698573845782, + "grad_norm": 0.4136741219117099, + "learning_rate": 2.584874217222855e-07, + "loss": 0.516, + "step": 1868 + }, + { + "epoch": 0.9035532994923858, + "grad_norm": 0.41454758895188654, + "learning_rate": 2.5595648407302496e-07, + "loss": 0.5299, + "step": 1869 + }, + { + "epoch": 0.9040367416001934, + "grad_norm": 0.43072596167116733, + "learning_rate": 2.53437672581342e-07, + "loss": 0.5192, + "step": 1870 + }, + { + "epoch": 0.9045201837080009, + "grad_norm": 0.413346134850188, + "learning_rate": 2.5093099368551974e-07, + "loss": 0.5135, + "step": 1871 + }, + { + "epoch": 0.9050036258158085, + "grad_norm": 0.44414111234791465, + "learning_rate": 2.484364537928341e-07, + "loss": 0.5248, + "step": 1872 + }, + { + "epoch": 0.9054870679236161, + "grad_norm": 0.41031454686253116, + "learning_rate": 2.45954059279529e-07, + "loss": 0.5198, + "step": 1873 + }, + { + "epoch": 0.9059705100314237, + "grad_norm": 0.3982976345229948, + "learning_rate": 2.4348381649080486e-07, + "loss": 0.5163, + "step": 1874 + }, + { + "epoch": 0.9064539521392313, + "grad_norm": 0.4007617837820295, + "learning_rate": 2.41025731740801e-07, + "loss": 0.511, + "step": 1875 + }, + { + "epoch": 0.9069373942470389, + "grad_norm": 0.40168617787804406, + "learning_rate": 2.3857981131258037e-07, + "loss": 0.5114, + "step": 1876 + }, + { + "epoch": 0.9074208363548465, + "grad_norm": 0.38110421429609603, + "learning_rate": 2.3614606145811347e-07, + "loss": 0.4992, + "step": 1877 + }, + { + "epoch": 0.9079042784626541, + "grad_norm": 0.3870732423514054, + "learning_rate": 2.3372448839825978e-07, + "loss": 0.4887, + "step": 1878 + }, + { + "epoch": 0.9083877205704617, + "grad_norm": 0.39979584331802676, + "learning_rate": 2.3131509832275633e-07, + "loss": 0.5122, + "step": 1879 + }, + { + "epoch": 0.9088711626782693, + "grad_norm": 0.3996732608438804, + "learning_rate": 2.2891789739019733e-07, + "loss": 0.5102, + "step": 1880 + }, + { + "epoch": 0.9093546047860769, + "grad_norm": 0.40968516048558534, + "learning_rate": 2.2653289172802295e-07, + "loss": 0.5049, + "step": 1881 + }, + { + "epoch": 0.9098380468938845, + "grad_norm": 0.4006751726323446, + "learning_rate": 2.241600874324984e-07, + "loss": 0.5144, + "step": 1882 + }, + { + "epoch": 0.9103214890016921, + "grad_norm": 0.4066456668668, + "learning_rate": 2.2179949056870432e-07, + "loss": 0.5184, + "step": 1883 + }, + { + "epoch": 0.9108049311094997, + "grad_norm": 0.4179374057794063, + "learning_rate": 2.194511071705141e-07, + "loss": 0.5131, + "step": 1884 + }, + { + "epoch": 0.9112883732173073, + "grad_norm": 0.419480536858942, + "learning_rate": 2.1711494324058724e-07, + "loss": 0.5147, + "step": 1885 + }, + { + "epoch": 0.9117718153251149, + "grad_norm": 0.40624640146953556, + "learning_rate": 2.1479100475034598e-07, + "loss": 0.5084, + "step": 1886 + }, + { + "epoch": 0.9122552574329225, + "grad_norm": 0.40367583928635464, + "learning_rate": 2.1247929763996534e-07, + "loss": 0.4832, + "step": 1887 + }, + { + "epoch": 0.91273869954073, + "grad_norm": 0.3989060344990105, + "learning_rate": 2.101798278183542e-07, + "loss": 0.5144, + "step": 1888 + }, + { + "epoch": 0.9132221416485375, + "grad_norm": 0.3998308893808953, + "learning_rate": 2.0789260116314215e-07, + "loss": 0.5081, + "step": 1889 + }, + { + "epoch": 0.9137055837563451, + "grad_norm": 0.4063990008087812, + "learning_rate": 2.0561762352066638e-07, + "loss": 0.5109, + "step": 1890 + }, + { + "epoch": 0.9141890258641527, + "grad_norm": 0.4167108480628528, + "learning_rate": 2.0335490070595208e-07, + "loss": 0.5186, + "step": 1891 + }, + { + "epoch": 0.9146724679719603, + "grad_norm": 0.39430080435851855, + "learning_rate": 2.011044385027011e-07, + "loss": 0.5101, + "step": 1892 + }, + { + "epoch": 0.9151559100797679, + "grad_norm": 0.42096559238441866, + "learning_rate": 1.988662426632765e-07, + "loss": 0.5078, + "step": 1893 + }, + { + "epoch": 0.9156393521875755, + "grad_norm": 0.39723951707790667, + "learning_rate": 1.9664031890868795e-07, + "loss": 0.5223, + "step": 1894 + }, + { + "epoch": 0.9161227942953831, + "grad_norm": 0.3912147208179025, + "learning_rate": 1.9442667292857432e-07, + "loss": 0.509, + "step": 1895 + }, + { + "epoch": 0.9166062364031907, + "grad_norm": 0.4054442997347736, + "learning_rate": 1.922253103811944e-07, + "loss": 0.4972, + "step": 1896 + }, + { + "epoch": 0.9170896785109983, + "grad_norm": 0.4117401816100168, + "learning_rate": 1.9003623689340777e-07, + "loss": 0.5143, + "step": 1897 + }, + { + "epoch": 0.9175731206188059, + "grad_norm": 0.40528953423093284, + "learning_rate": 1.8785945806066297e-07, + "loss": 0.5186, + "step": 1898 + }, + { + "epoch": 0.9180565627266135, + "grad_norm": 0.4027696401480633, + "learning_rate": 1.85694979446982e-07, + "loss": 0.5167, + "step": 1899 + }, + { + "epoch": 0.9185400048344211, + "grad_norm": 0.38938110778215645, + "learning_rate": 1.835428065849465e-07, + "loss": 0.5141, + "step": 1900 + }, + { + "epoch": 0.9190234469422287, + "grad_norm": 0.3958049685314876, + "learning_rate": 1.814029449756849e-07, + "loss": 0.5231, + "step": 1901 + }, + { + "epoch": 0.9195068890500363, + "grad_norm": 0.4039199277502588, + "learning_rate": 1.7927540008885414e-07, + "loss": 0.5088, + "step": 1902 + }, + { + "epoch": 0.9199903311578439, + "grad_norm": 0.40426884197944674, + "learning_rate": 1.7716017736263192e-07, + "loss": 0.5129, + "step": 1903 + }, + { + "epoch": 0.9204737732656514, + "grad_norm": 0.41358470698939953, + "learning_rate": 1.7505728220369667e-07, + "loss": 0.5203, + "step": 1904 + }, + { + "epoch": 0.920957215373459, + "grad_norm": 0.4250820090378729, + "learning_rate": 1.729667199872187e-07, + "loss": 0.5223, + "step": 1905 + }, + { + "epoch": 0.9214406574812666, + "grad_norm": 0.40899977989644076, + "learning_rate": 1.70888496056843e-07, + "loss": 0.5107, + "step": 1906 + }, + { + "epoch": 0.9219240995890742, + "grad_norm": 0.4187760713922149, + "learning_rate": 1.6882261572467862e-07, + "loss": 0.5142, + "step": 1907 + }, + { + "epoch": 0.9224075416968818, + "grad_norm": 0.39684261118945696, + "learning_rate": 1.6676908427128103e-07, + "loss": 0.4847, + "step": 1908 + }, + { + "epoch": 0.9228909838046894, + "grad_norm": 0.4124141033869449, + "learning_rate": 1.64727906945642e-07, + "loss": 0.5063, + "step": 1909 + }, + { + "epoch": 0.923374425912497, + "grad_norm": 0.4104731721152495, + "learning_rate": 1.6269908896517638e-07, + "loss": 0.5035, + "step": 1910 + }, + { + "epoch": 0.9238578680203046, + "grad_norm": 0.38208183163995635, + "learning_rate": 1.6068263551570596e-07, + "loss": 0.4855, + "step": 1911 + }, + { + "epoch": 0.9243413101281122, + "grad_norm": 0.37943822460943005, + "learning_rate": 1.5867855175144885e-07, + "loss": 0.4863, + "step": 1912 + }, + { + "epoch": 0.9248247522359198, + "grad_norm": 0.4169103989292416, + "learning_rate": 1.5668684279500245e-07, + "loss": 0.5077, + "step": 1913 + }, + { + "epoch": 0.9253081943437274, + "grad_norm": 0.41157707540822663, + "learning_rate": 1.5470751373733773e-07, + "loss": 0.5184, + "step": 1914 + }, + { + "epoch": 0.925791636451535, + "grad_norm": 0.39771451862665147, + "learning_rate": 1.5274056963777817e-07, + "loss": 0.5094, + "step": 1915 + }, + { + "epoch": 0.9262750785593425, + "grad_norm": 0.4092987974762817, + "learning_rate": 1.507860155239921e-07, + "loss": 0.5154, + "step": 1916 + }, + { + "epoch": 0.9267585206671501, + "grad_norm": 0.3854503813446518, + "learning_rate": 1.488438563919764e-07, + "loss": 0.4938, + "step": 1917 + }, + { + "epoch": 0.9272419627749577, + "grad_norm": 0.3900052964813903, + "learning_rate": 1.4691409720604732e-07, + "loss": 0.5077, + "step": 1918 + }, + { + "epoch": 0.9277254048827653, + "grad_norm": 0.40750712678387396, + "learning_rate": 1.449967428988247e-07, + "loss": 0.5145, + "step": 1919 + }, + { + "epoch": 0.9282088469905729, + "grad_norm": 0.4023813113333878, + "learning_rate": 1.4309179837122045e-07, + "loss": 0.5291, + "step": 1920 + }, + { + "epoch": 0.9286922890983804, + "grad_norm": 0.38502235475455626, + "learning_rate": 1.411992684924257e-07, + "loss": 0.5119, + "step": 1921 + }, + { + "epoch": 0.929175731206188, + "grad_norm": 0.40862887218787325, + "learning_rate": 1.3931915809990039e-07, + "loss": 0.5106, + "step": 1922 + }, + { + "epoch": 0.9296591733139956, + "grad_norm": 0.4123756674563694, + "learning_rate": 1.374514719993575e-07, + "loss": 0.5126, + "step": 1923 + }, + { + "epoch": 0.9301426154218032, + "grad_norm": 0.41456641529199556, + "learning_rate": 1.3559621496475438e-07, + "loss": 0.5145, + "step": 1924 + }, + { + "epoch": 0.9306260575296108, + "grad_norm": 0.4049152537963314, + "learning_rate": 1.3375339173827551e-07, + "loss": 0.5261, + "step": 1925 + }, + { + "epoch": 0.9311094996374184, + "grad_norm": 0.37450439680837744, + "learning_rate": 1.3192300703032733e-07, + "loss": 0.474, + "step": 1926 + }, + { + "epoch": 0.931592941745226, + "grad_norm": 0.41100475742292075, + "learning_rate": 1.3010506551952018e-07, + "loss": 0.5134, + "step": 1927 + }, + { + "epoch": 0.9320763838530336, + "grad_norm": 0.41369315234307685, + "learning_rate": 1.2829957185265863e-07, + "loss": 0.52, + "step": 1928 + }, + { + "epoch": 0.9325598259608412, + "grad_norm": 0.3885589982730842, + "learning_rate": 1.2650653064473106e-07, + "loss": 0.5031, + "step": 1929 + }, + { + "epoch": 0.9330432680686488, + "grad_norm": 0.3951920703691663, + "learning_rate": 1.2472594647889357e-07, + "loss": 0.5092, + "step": 1930 + }, + { + "epoch": 0.9335267101764564, + "grad_norm": 0.40947647060207415, + "learning_rate": 1.2295782390646494e-07, + "loss": 0.5177, + "step": 1931 + }, + { + "epoch": 0.934010152284264, + "grad_norm": 0.390574491653679, + "learning_rate": 1.2120216744690716e-07, + "loss": 0.5133, + "step": 1932 + }, + { + "epoch": 0.9344935943920716, + "grad_norm": 0.4045498383765011, + "learning_rate": 1.194589815878211e-07, + "loss": 0.5163, + "step": 1933 + }, + { + "epoch": 0.9349770364998792, + "grad_norm": 0.40440549648310886, + "learning_rate": 1.177282707849281e-07, + "loss": 0.5181, + "step": 1934 + }, + { + "epoch": 0.9354604786076868, + "grad_norm": 0.4024689599876574, + "learning_rate": 1.1601003946206723e-07, + "loss": 0.5181, + "step": 1935 + }, + { + "epoch": 0.9359439207154944, + "grad_norm": 0.3986512567562451, + "learning_rate": 1.1430429201117476e-07, + "loss": 0.5032, + "step": 1936 + }, + { + "epoch": 0.9364273628233019, + "grad_norm": 0.39397430112101045, + "learning_rate": 1.1261103279227858e-07, + "loss": 0.5178, + "step": 1937 + }, + { + "epoch": 0.9369108049311095, + "grad_norm": 0.4291769455926264, + "learning_rate": 1.1093026613348601e-07, + "loss": 0.5196, + "step": 1938 + }, + { + "epoch": 0.937394247038917, + "grad_norm": 0.3917679927009391, + "learning_rate": 1.0926199633097156e-07, + "loss": 0.4919, + "step": 1939 + }, + { + "epoch": 0.9378776891467246, + "grad_norm": 0.42599062790587783, + "learning_rate": 1.0760622764896866e-07, + "loss": 0.5147, + "step": 1940 + }, + { + "epoch": 0.9383611312545322, + "grad_norm": 0.4023777838627757, + "learning_rate": 1.0596296431975406e-07, + "loss": 0.5156, + "step": 1941 + }, + { + "epoch": 0.9388445733623398, + "grad_norm": 0.3966354448847634, + "learning_rate": 1.0433221054364174e-07, + "loss": 0.5065, + "step": 1942 + }, + { + "epoch": 0.9393280154701474, + "grad_norm": 0.4013413460541232, + "learning_rate": 1.0271397048897014e-07, + "loss": 0.5053, + "step": 1943 + }, + { + "epoch": 0.939811457577955, + "grad_norm": 0.37653088174864213, + "learning_rate": 1.0110824829209164e-07, + "loss": 0.4939, + "step": 1944 + }, + { + "epoch": 0.9402948996857626, + "grad_norm": 0.399035469753345, + "learning_rate": 9.951504805735979e-08, + "loss": 0.5106, + "step": 1945 + }, + { + "epoch": 0.9407783417935702, + "grad_norm": 0.3991989592342914, + "learning_rate": 9.793437385712479e-08, + "loss": 0.5153, + "step": 1946 + }, + { + "epoch": 0.9412617839013778, + "grad_norm": 0.4057585743893453, + "learning_rate": 9.636622973171583e-08, + "loss": 0.51, + "step": 1947 + }, + { + "epoch": 0.9417452260091854, + "grad_norm": 0.4054528739627977, + "learning_rate": 9.481061968943717e-08, + "loss": 0.516, + "step": 1948 + }, + { + "epoch": 0.942228668116993, + "grad_norm": 0.3921980636127477, + "learning_rate": 9.3267547706552e-08, + "loss": 0.5051, + "step": 1949 + }, + { + "epoch": 0.9427121102248006, + "grad_norm": 0.39913144156030567, + "learning_rate": 9.17370177272775e-08, + "loss": 0.5055, + "step": 1950 + }, + { + "epoch": 0.9431955523326082, + "grad_norm": 0.4004586843938766, + "learning_rate": 9.021903366377093e-08, + "loss": 0.5164, + "step": 1951 + }, + { + "epoch": 0.9436789944404158, + "grad_norm": 0.4037223050343566, + "learning_rate": 8.8713599396123e-08, + "loss": 0.5098, + "step": 1952 + }, + { + "epoch": 0.9441624365482234, + "grad_norm": 0.39850858877215634, + "learning_rate": 8.72207187723445e-08, + "loss": 0.5211, + "step": 1953 + }, + { + "epoch": 0.9446458786560309, + "grad_norm": 0.41059877409881057, + "learning_rate": 8.5740395608358e-08, + "loss": 0.5121, + "step": 1954 + }, + { + "epoch": 0.9451293207638385, + "grad_norm": 0.40573184845060545, + "learning_rate": 8.427263368798955e-08, + "loss": 0.5256, + "step": 1955 + }, + { + "epoch": 0.9456127628716461, + "grad_norm": 0.3966583772201167, + "learning_rate": 8.281743676295639e-08, + "loss": 0.5183, + "step": 1956 + }, + { + "epoch": 0.9460962049794537, + "grad_norm": 0.40701943797191764, + "learning_rate": 8.13748085528604e-08, + "loss": 0.5135, + "step": 1957 + }, + { + "epoch": 0.9465796470872613, + "grad_norm": 0.37606341196980025, + "learning_rate": 7.99447527451741e-08, + "loss": 0.4903, + "step": 1958 + }, + { + "epoch": 0.9470630891950689, + "grad_norm": 0.4114856897492863, + "learning_rate": 7.852727299523577e-08, + "loss": 0.5068, + "step": 1959 + }, + { + "epoch": 0.9475465313028765, + "grad_norm": 0.4093526523044555, + "learning_rate": 7.71223729262377e-08, + "loss": 0.5127, + "step": 1960 + }, + { + "epoch": 0.9480299734106841, + "grad_norm": 0.4130076310229578, + "learning_rate": 7.573005612921903e-08, + "loss": 0.5121, + "step": 1961 + }, + { + "epoch": 0.9485134155184917, + "grad_norm": 0.40254945616875554, + "learning_rate": 7.435032616305238e-08, + "loss": 0.5178, + "step": 1962 + }, + { + "epoch": 0.9489968576262993, + "grad_norm": 0.4108181664423654, + "learning_rate": 7.298318655443893e-08, + "loss": 0.5078, + "step": 1963 + }, + { + "epoch": 0.9494802997341069, + "grad_norm": 0.3954161759006289, + "learning_rate": 7.162864079789777e-08, + "loss": 0.5137, + "step": 1964 + }, + { + "epoch": 0.9499637418419145, + "grad_norm": 0.3993428213266096, + "learning_rate": 7.028669235575714e-08, + "loss": 0.496, + "step": 1965 + }, + { + "epoch": 0.950447183949722, + "grad_norm": 0.41493027982851327, + "learning_rate": 6.895734465814597e-08, + "loss": 0.5257, + "step": 1966 + }, + { + "epoch": 0.9509306260575296, + "grad_norm": 0.38537633628397905, + "learning_rate": 6.764060110298287e-08, + "loss": 0.5208, + "step": 1967 + }, + { + "epoch": 0.9514140681653372, + "grad_norm": 0.41057398606285567, + "learning_rate": 6.633646505597113e-08, + "loss": 0.5224, + "step": 1968 + }, + { + "epoch": 0.9518975102731448, + "grad_norm": 0.4420797620121168, + "learning_rate": 6.504493985058813e-08, + "loss": 0.5108, + "step": 1969 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.39854773939873966, + "learning_rate": 6.376602878807592e-08, + "loss": 0.5134, + "step": 1970 + }, + { + "epoch": 0.9528643944887599, + "grad_norm": 0.4104047856111181, + "learning_rate": 6.249973513743345e-08, + "loss": 0.5079, + "step": 1971 + }, + { + "epoch": 0.9533478365965675, + "grad_norm": 0.40077931999667527, + "learning_rate": 6.124606213541052e-08, + "loss": 0.5196, + "step": 1972 + }, + { + "epoch": 0.9538312787043751, + "grad_norm": 0.43500257686302385, + "learning_rate": 6.000501298649653e-08, + "loss": 0.5197, + "step": 1973 + }, + { + "epoch": 0.9543147208121827, + "grad_norm": 0.4186094433656202, + "learning_rate": 5.8776590862911764e-08, + "loss": 0.5135, + "step": 1974 + }, + { + "epoch": 0.9547981629199903, + "grad_norm": 0.4119358911199865, + "learning_rate": 5.756079890460342e-08, + "loss": 0.5137, + "step": 1975 + }, + { + "epoch": 0.9552816050277979, + "grad_norm": 0.39694645564275877, + "learning_rate": 5.635764021923229e-08, + "loss": 0.5121, + "step": 1976 + }, + { + "epoch": 0.9557650471356055, + "grad_norm": 0.4154887872586203, + "learning_rate": 5.5167117882171104e-08, + "loss": 0.516, + "step": 1977 + }, + { + "epoch": 0.9562484892434131, + "grad_norm": 0.7692472130509296, + "learning_rate": 5.3989234936489556e-08, + "loss": 0.5055, + "step": 1978 + }, + { + "epoch": 0.9567319313512207, + "grad_norm": 0.4198618304821996, + "learning_rate": 5.2823994392951497e-08, + "loss": 0.5094, + "step": 1979 + }, + { + "epoch": 0.9572153734590283, + "grad_norm": 0.39385026351820934, + "learning_rate": 5.167139923000553e-08, + "loss": 0.4933, + "step": 1980 + }, + { + "epoch": 0.9576988155668359, + "grad_norm": 0.4159053427086944, + "learning_rate": 5.053145239377777e-08, + "loss": 0.4936, + "step": 1981 + }, + { + "epoch": 0.9581822576746435, + "grad_norm": 0.3990167973444839, + "learning_rate": 4.940415679806465e-08, + "loss": 0.5124, + "step": 1982 + }, + { + "epoch": 0.9586656997824511, + "grad_norm": 0.4012277528608715, + "learning_rate": 4.828951532432457e-08, + "loss": 0.5151, + "step": 1983 + }, + { + "epoch": 0.9591491418902587, + "grad_norm": 0.4099731484035176, + "learning_rate": 4.718753082167071e-08, + "loss": 0.5191, + "step": 1984 + }, + { + "epoch": 0.9596325839980663, + "grad_norm": 0.41474696363438857, + "learning_rate": 4.6098206106863774e-08, + "loss": 0.515, + "step": 1985 + }, + { + "epoch": 0.9601160261058739, + "grad_norm": 0.4044716506352786, + "learning_rate": 4.5021543964306466e-08, + "loss": 0.5123, + "step": 1986 + }, + { + "epoch": 0.9605994682136814, + "grad_norm": 0.40133573312591214, + "learning_rate": 4.395754714603351e-08, + "loss": 0.5133, + "step": 1987 + }, + { + "epoch": 0.961082910321489, + "grad_norm": 0.4089192998561785, + "learning_rate": 4.290621837170661e-08, + "loss": 0.5236, + "step": 1988 + }, + { + "epoch": 0.9615663524292966, + "grad_norm": 0.39452360352891674, + "learning_rate": 4.186756032860728e-08, + "loss": 0.5137, + "step": 1989 + }, + { + "epoch": 0.9620497945371042, + "grad_norm": 0.39867371724056727, + "learning_rate": 4.08415756716285e-08, + "loss": 0.5093, + "step": 1990 + }, + { + "epoch": 0.9625332366449117, + "grad_norm": 0.357065447847406, + "learning_rate": 3.9828267023269696e-08, + "loss": 0.4505, + "step": 1991 + }, + { + "epoch": 0.9630166787527193, + "grad_norm": 0.427089982663271, + "learning_rate": 3.8827636973630126e-08, + "loss": 0.5101, + "step": 1992 + }, + { + "epoch": 0.9635001208605269, + "grad_norm": 0.4025101063369687, + "learning_rate": 3.783968808039995e-08, + "loss": 0.5245, + "step": 1993 + }, + { + "epoch": 0.9639835629683345, + "grad_norm": 0.4012223737061637, + "learning_rate": 3.68644228688575e-08, + "loss": 0.514, + "step": 1994 + }, + { + "epoch": 0.9644670050761421, + "grad_norm": 0.39715847085154765, + "learning_rate": 3.590184383185758e-08, + "loss": 0.507, + "step": 1995 + }, + { + "epoch": 0.9649504471839497, + "grad_norm": 0.4019019064729592, + "learning_rate": 3.4951953429831484e-08, + "loss": 0.5093, + "step": 1996 + }, + { + "epoch": 0.9654338892917573, + "grad_norm": 0.3997438820964838, + "learning_rate": 3.401475409077426e-08, + "loss": 0.4987, + "step": 1997 + }, + { + "epoch": 0.9659173313995649, + "grad_norm": 0.42247021949710184, + "learning_rate": 3.309024821024354e-08, + "loss": 0.5099, + "step": 1998 + }, + { + "epoch": 0.9664007735073725, + "grad_norm": 0.4228197536210846, + "learning_rate": 3.2178438151350685e-08, + "loss": 0.5181, + "step": 1999 + }, + { + "epoch": 0.9668842156151801, + "grad_norm": 0.40195549014330173, + "learning_rate": 3.127932624475638e-08, + "loss": 0.5118, + "step": 2000 + }, + { + "epoch": 0.9673676577229877, + "grad_norm": 0.40083823310970984, + "learning_rate": 3.039291478866169e-08, + "loss": 0.5265, + "step": 2001 + }, + { + "epoch": 0.9678510998307953, + "grad_norm": 0.4054162095867977, + "learning_rate": 2.9519206048807535e-08, + "loss": 0.5173, + "step": 2002 + }, + { + "epoch": 0.9683345419386028, + "grad_norm": 0.4091589042260666, + "learning_rate": 2.8658202258462498e-08, + "loss": 0.5199, + "step": 2003 + }, + { + "epoch": 0.9688179840464104, + "grad_norm": 0.37360554914951866, + "learning_rate": 2.7809905618422227e-08, + "loss": 0.4667, + "step": 2004 + }, + { + "epoch": 0.969301426154218, + "grad_norm": 0.4264262470861418, + "learning_rate": 2.6974318297001144e-08, + "loss": 0.5208, + "step": 2005 + }, + { + "epoch": 0.9697848682620256, + "grad_norm": 0.4133603239690626, + "learning_rate": 2.615144243002743e-08, + "loss": 0.5049, + "step": 2006 + }, + { + "epoch": 0.9702683103698332, + "grad_norm": 0.41234915425778607, + "learning_rate": 2.534128012083914e-08, + "loss": 0.5215, + "step": 2007 + }, + { + "epoch": 0.9707517524776408, + "grad_norm": 0.39530605693418713, + "learning_rate": 2.4543833440275332e-08, + "loss": 0.5096, + "step": 2008 + }, + { + "epoch": 0.9712351945854484, + "grad_norm": 0.42034129099753553, + "learning_rate": 2.375910442667495e-08, + "loss": 0.5111, + "step": 2009 + }, + { + "epoch": 0.971718636693256, + "grad_norm": 0.4128961831040994, + "learning_rate": 2.298709508586794e-08, + "loss": 0.5136, + "step": 2010 + }, + { + "epoch": 0.9722020788010636, + "grad_norm": 0.40946352601157776, + "learning_rate": 2.2227807391172474e-08, + "loss": 0.5239, + "step": 2011 + }, + { + "epoch": 0.9726855209088712, + "grad_norm": 0.3999251664775986, + "learning_rate": 2.1481243283389408e-08, + "loss": 0.514, + "step": 2012 + }, + { + "epoch": 0.9731689630166788, + "grad_norm": 0.412440530125608, + "learning_rate": 2.074740467079672e-08, + "loss": 0.5174, + "step": 2013 + }, + { + "epoch": 0.9736524051244864, + "grad_norm": 0.40416741933458183, + "learning_rate": 2.002629342914453e-08, + "loss": 0.5173, + "step": 2014 + }, + { + "epoch": 0.974135847232294, + "grad_norm": 0.4091688587167212, + "learning_rate": 1.9317911401651734e-08, + "loss": 0.5035, + "step": 2015 + }, + { + "epoch": 0.9746192893401016, + "grad_norm": 0.41181207482580323, + "learning_rate": 1.862226039899995e-08, + "loss": 0.5194, + "step": 2016 + }, + { + "epoch": 0.9751027314479092, + "grad_norm": 0.38917062513507206, + "learning_rate": 1.7939342199329023e-08, + "loss": 0.5081, + "step": 2017 + }, + { + "epoch": 0.9755861735557168, + "grad_norm": 0.4058095413062891, + "learning_rate": 1.7269158548232633e-08, + "loss": 0.514, + "step": 2018 + }, + { + "epoch": 0.9760696156635243, + "grad_norm": 0.3909310997249257, + "learning_rate": 1.661171115875493e-08, + "loss": 0.5086, + "step": 2019 + }, + { + "epoch": 0.9765530577713318, + "grad_norm": 0.3924463631554743, + "learning_rate": 1.5967001711383877e-08, + "loss": 0.5074, + "step": 2020 + }, + { + "epoch": 0.9770364998791394, + "grad_norm": 0.3897349184690982, + "learning_rate": 1.5335031854049055e-08, + "loss": 0.5164, + "step": 2021 + }, + { + "epoch": 0.977519941986947, + "grad_norm": 0.4006696563415638, + "learning_rate": 1.4715803202116075e-08, + "loss": 0.516, + "step": 2022 + }, + { + "epoch": 0.9780033840947546, + "grad_norm": 0.39127207135897235, + "learning_rate": 1.4109317338383832e-08, + "loss": 0.4864, + "step": 2023 + }, + { + "epoch": 0.9784868262025622, + "grad_norm": 0.41074499955315413, + "learning_rate": 1.3515575813078386e-08, + "loss": 0.5276, + "step": 2024 + }, + { + "epoch": 0.9789702683103698, + "grad_norm": 0.3989638057067789, + "learning_rate": 1.2934580143851294e-08, + "loss": 0.5116, + "step": 2025 + }, + { + "epoch": 0.9794537104181774, + "grad_norm": 0.4179669163774858, + "learning_rate": 1.2366331815774069e-08, + "loss": 0.5169, + "step": 2026 + }, + { + "epoch": 0.979937152525985, + "grad_norm": 0.420952308284563, + "learning_rate": 1.1810832281335394e-08, + "loss": 0.5221, + "step": 2027 + }, + { + "epoch": 0.9804205946337926, + "grad_norm": 0.40444089801366945, + "learning_rate": 1.1268082960436688e-08, + "loss": 0.526, + "step": 2028 + }, + { + "epoch": 0.9809040367416002, + "grad_norm": 0.39774599938725236, + "learning_rate": 1.0738085240389883e-08, + "loss": 0.5158, + "step": 2029 + }, + { + "epoch": 0.9813874788494078, + "grad_norm": 0.40528876629152616, + "learning_rate": 1.0220840475910765e-08, + "loss": 0.5148, + "step": 2030 + }, + { + "epoch": 0.9818709209572154, + "grad_norm": 0.3880426443734388, + "learning_rate": 9.716349989118412e-09, + "loss": 0.4977, + "step": 2031 + }, + { + "epoch": 0.982354363065023, + "grad_norm": 0.397796748759872, + "learning_rate": 9.224615069532428e-09, + "loss": 0.5183, + "step": 2032 + }, + { + "epoch": 0.9828378051728306, + "grad_norm": 0.41455233095701044, + "learning_rate": 8.745636974066274e-09, + "loss": 0.5151, + "step": 2033 + }, + { + "epoch": 0.9833212472806382, + "grad_norm": 0.39149878382311915, + "learning_rate": 8.279416927026163e-09, + "loss": 0.4852, + "step": 2034 + }, + { + "epoch": 0.9838046893884458, + "grad_norm": 0.428044296219464, + "learning_rate": 7.82595612010828e-09, + "loss": 0.5088, + "step": 2035 + }, + { + "epoch": 0.9842881314962533, + "grad_norm": 0.39066146033771326, + "learning_rate": 7.385255712395456e-09, + "loss": 0.5092, + "step": 2036 + }, + { + "epoch": 0.9847715736040609, + "grad_norm": 0.3944546565780817, + "learning_rate": 6.9573168303532775e-09, + "loss": 0.5048, + "step": 2037 + }, + { + "epoch": 0.9852550157118685, + "grad_norm": 0.3875137169857006, + "learning_rate": 6.542140567827871e-09, + "loss": 0.5166, + "step": 2038 + }, + { + "epoch": 0.9857384578196761, + "grad_norm": 0.3942008356766705, + "learning_rate": 6.1397279860431205e-09, + "loss": 0.4846, + "step": 2039 + }, + { + "epoch": 0.9862218999274837, + "grad_norm": 0.4122295335735729, + "learning_rate": 5.750080113598455e-09, + "loss": 0.5191, + "step": 2040 + }, + { + "epoch": 0.9867053420352913, + "grad_norm": 0.4027922091033447, + "learning_rate": 5.373197946464403e-09, + "loss": 0.509, + "step": 2041 + }, + { + "epoch": 0.9871887841430989, + "grad_norm": 0.41834346896126373, + "learning_rate": 5.009082447983149e-09, + "loss": 0.52, + "step": 2042 + }, + { + "epoch": 0.9876722262509064, + "grad_norm": 0.4293813449158595, + "learning_rate": 4.65773454886298e-09, + "loss": 0.5131, + "step": 2043 + }, + { + "epoch": 0.988155668358714, + "grad_norm": 0.3928243234499177, + "learning_rate": 4.319155147176624e-09, + "loss": 0.515, + "step": 2044 + }, + { + "epoch": 0.9886391104665216, + "grad_norm": 0.4041253868270013, + "learning_rate": 3.9933451083612464e-09, + "loss": 0.5001, + "step": 2045 + }, + { + "epoch": 0.9891225525743292, + "grad_norm": 0.4010404057661429, + "learning_rate": 3.6803052652134572e-09, + "loss": 0.5077, + "step": 2046 + }, + { + "epoch": 0.9896059946821368, + "grad_norm": 0.4298866341542551, + "learning_rate": 3.3800364178881996e-09, + "loss": 0.5112, + "step": 2047 + }, + { + "epoch": 0.9900894367899444, + "grad_norm": 0.39490801058055036, + "learning_rate": 3.092539333896527e-09, + "loss": 0.5087, + "step": 2048 + }, + { + "epoch": 0.990572878897752, + "grad_norm": 0.4110997944280951, + "learning_rate": 2.817814748104497e-09, + "loss": 0.5044, + "step": 2049 + }, + { + "epoch": 0.9910563210055596, + "grad_norm": 0.4386800273412446, + "learning_rate": 2.555863362730393e-09, + "loss": 0.5217, + "step": 2050 + }, + { + "epoch": 0.9915397631133672, + "grad_norm": 0.411463570514358, + "learning_rate": 2.30668584734306e-09, + "loss": 0.5117, + "step": 2051 + }, + { + "epoch": 0.9920232052211748, + "grad_norm": 0.40419424446848334, + "learning_rate": 2.070282838859683e-09, + "loss": 0.5056, + "step": 2052 + }, + { + "epoch": 0.9925066473289823, + "grad_norm": 0.40703300041841234, + "learning_rate": 1.8466549415463442e-09, + "loss": 0.5319, + "step": 2053 + }, + { + "epoch": 0.9929900894367899, + "grad_norm": 0.42530845899182246, + "learning_rate": 1.635802727013025e-09, + "loss": 0.5138, + "step": 2054 + }, + { + "epoch": 0.9934735315445975, + "grad_norm": 0.4058607106461754, + "learning_rate": 1.4377267342158274e-09, + "loss": 0.4883, + "step": 2055 + }, + { + "epoch": 0.9939569736524051, + "grad_norm": 0.4062782037922318, + "learning_rate": 1.2524274694525329e-09, + "loss": 0.5225, + "step": 2056 + }, + { + "epoch": 0.9944404157602127, + "grad_norm": 4.724922277274833, + "learning_rate": 1.0799054063626024e-09, + "loss": 0.5232, + "step": 2057 + }, + { + "epoch": 0.9949238578680203, + "grad_norm": 0.3906054598062824, + "learning_rate": 9.201609859271765e-10, + "loss": 0.5132, + "step": 2058 + }, + { + "epoch": 0.9954072999758279, + "grad_norm": 0.3973857264750407, + "learning_rate": 7.731946164657445e-10, + "loss": 0.5083, + "step": 2059 + }, + { + "epoch": 0.9958907420836355, + "grad_norm": 0.39787099969142303, + "learning_rate": 6.390066736355893e-10, + "loss": 0.5138, + "step": 2060 + }, + { + "epoch": 0.9963741841914431, + "grad_norm": 0.4073985533388715, + "learning_rate": 5.17597500432343e-10, + "loss": 0.5134, + "step": 2061 + }, + { + "epoch": 0.9968576262992507, + "grad_norm": 0.4217388952314898, + "learning_rate": 4.089674071872107e-10, + "loss": 0.5204, + "step": 2062 + }, + { + "epoch": 0.9973410684070583, + "grad_norm": 0.3938957473860418, + "learning_rate": 3.131166715680811e-10, + "loss": 0.5134, + "step": 2063 + }, + { + "epoch": 0.9978245105148659, + "grad_norm": 0.4133454970429142, + "learning_rate": 2.3004553857675082e-10, + "loss": 0.5136, + "step": 2064 + }, + { + "epoch": 0.9983079526226735, + "grad_norm": 3.9528345451843885, + "learning_rate": 1.5975422055003465e-10, + "loss": 0.5088, + "step": 2065 + }, + { + "epoch": 0.9987913947304811, + "grad_norm": 0.4124191883874225, + "learning_rate": 1.022428971581002e-10, + "loss": 0.5106, + "step": 2066 + }, + { + "epoch": 0.9992748368382887, + "grad_norm": 0.42214447971757757, + "learning_rate": 5.751171540391287e-11, + "loss": 0.513, + "step": 2067 + }, + { + "epoch": 0.9997582789460963, + "grad_norm": 0.4373529560530007, + "learning_rate": 2.556078962490105e-11, + "loss": 0.5278, + "step": 2068 + }, + { + "epoch": 1.0, + "grad_norm": 0.4373529560530007, + "learning_rate": 6.390201489625547e-12, + "loss": 0.4723, + "step": 2069 + }, + { + "epoch": 1.0, + "step": 2069, + "total_flos": 2898754626256896.0, + "train_loss": 0.5475362745847836, + "train_runtime": 128435.5344, + "train_samples_per_second": 2.061, + "train_steps_per_second": 0.016 + } + ], + "logging_steps": 1, + "max_steps": 2069, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 208, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2898754626256896.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..5b22eb7 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:494959cdfdb84582ebc6eea82f4dedb093b3703fd15a96ab9a2baabb48f1e5f5 +size 8081