commit 2e4a8d6a8318f7393848c4b353077dab400dc0a4
Author: ModelHub XC <noreply@modelhub.org.cn>
Date:   Tue Apr 28 07:59:12 2026 +0800

    初始化项目，由ModelHub XC社区提供模型
    
    Model: abhid1234/qwen-0.5b-tool-agent-grpo
    Source: Original Platform

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..52373fe
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..bc5f30d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,199 @@
+---
+library_name: transformers
+tags: []
+---
+
+# Model Card for Model ID
+
+<!-- Provide a quick summary of what the model is/does. -->
+
+
+
+## Model Details
+
+### Model Description
+
+<!-- Provide a longer summary of what this model is. -->
+
+This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
+
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+
+### Model Sources [optional]
+
+<!-- Provide the basic links for the model. -->
+
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+
+## Uses
+
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+
+### Direct Use
+
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+
+[More Information Needed]
+
+### Downstream Use [optional]
+
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+
+[More Information Needed]
+
+### Out-of-Scope Use
+
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+
+[More Information Needed]
+
+## Bias, Risks, and Limitations
+
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+
+[More Information Needed]
+
+### Recommendations
+
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+
+## How to Get Started with the Model
+
+Use the code below to get started with the model.
+
+[More Information Needed]
+
+## Training Details
+
+### Training Data
+
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+
+[More Information Needed]
+
+### Training Procedure
+
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+
+#### Preprocessing [optional]
+
+[More Information Needed]
+
+
+#### Training Hyperparameters
+
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+
+#### Speeds, Sizes, Times [optional]
+
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+
+[More Information Needed]
+
+## Evaluation
+
+<!-- This section describes the evaluation protocols and provides the results. -->
+
+### Testing Data, Factors & Metrics
+
+#### Testing Data
+
+<!-- This should link to a Dataset Card if possible. -->
+
+[More Information Needed]
+
+#### Factors
+
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+
+[More Information Needed]
+
+#### Metrics
+
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+
+[More Information Needed]
+
+### Results
+
+[More Information Needed]
+
+#### Summary
+
+
+
+## Model Examination [optional]
+
+<!-- Relevant interpretability work for the model goes here -->
+
+[More Information Needed]
+
+## Environmental Impact
+
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+
+## Technical Specifications [optional]
+
+### Model Architecture and Objective
+
+[More Information Needed]
+
+### Compute Infrastructure
+
+[More Information Needed]
+
+#### Hardware
+
+[More Information Needed]
+
+#### Software
+
+[More Information Needed]
+
+## Citation [optional]
+
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+
+**BibTeX:**
+
+[More Information Needed]
+
+**APA:**
+
+[More Information Needed]
+
+## Glossary [optional]
+
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+
+[More Information Needed]
+
+## More Information [optional]
+
+[More Information Needed]
+
+## Model Card Authors [optional]
+
+[More Information Needed]
+
+## Model Card Contact
+
+[More Information Needed]
\ No newline at end of file
diff --git a/artifacts/eval_results.json b/artifacts/eval_results.json
new file mode 100644
index 0000000..84071a5
--- /dev/null
+++ b/artifacts/eval_results.json
@@ -0,0 +1,412 @@
+{
+  "step": 15,
+  "scenarios_path": "data/scenarios_val.jsonl",
+  "num_generations": 8,
+  "total_scenarios": 50,
+  "total_rollouts": 400,
+  "successes": 18,
+  "accuracy_pct": 4.5,
+  "avg_reward": -1.8850000000000002,
+  "per_scenario": [
+    {
+      "scenario_index": 0,
+      "task": "Convert 98 kg to lbs.",
+      "mean_reward": 3.125,
+      "max_reward": 4.0,
+      "success_count": 7,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 1,
+      "task": "What is the speed of light?",
+      "mean_reward": -2.5,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 2,
+      "task": "What is the distance from Earth to the Sun in km in miles?",
+      "mean_reward": 0.5625,
+      "max_reward": 2.5,
+      "success_count": 1,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 3,
+      "task": "What is 441 plus 23?",
+      "mean_reward": 2.25,
+      "max_reward": 4.0,
+      "success_count": 1,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 4,
+      "task": "Convert 62 kg to lbs.",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 5,
+      "task": "Which is hotter right now, London or Mumbai?",
+      "mean_reward": -2.875,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 6,
+      "task": "What is 185 plus 89?",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 7,
+      "task": "What's the weather like in Dubai?",
+      "mean_reward": -1.375,
+      "max_reward": 4.0,
+      "success_count": 2,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 8,
+      "task": "What is the population of Germany divided by its area in km2?",
+      "mean_reward": -2.041666666666667,
+      "max_reward": 1.666666666666666,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 9,
+      "task": "What is the boiling point of water?",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 10,
+      "task": "Which is hotter right now, London or Mumbai?",
+      "mean_reward": -2.0,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 11,
+      "task": "What is the population of India divided by its area in km2?",
+      "mean_reward": -2.125,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 12,
+      "task": "What is India's population density in people per square mile?",
+      "mean_reward": -1.25,
+      "max_reward": 1.333333333333333,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 13,
+      "task": "What is the tallest mountain?",
+      "mean_reward": -2.875,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 14,
+      "task": "What is the distance from Earth to the Sun in km in miles?",
+      "mean_reward": -1.875,
+      "max_reward": 1.5,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 15,
+      "task": "What is the population of Japan divided by its area in km2?",
+      "mean_reward": -2.875,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 16,
+      "task": "What is Germany's population density in people per square mile?",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 17,
+      "task": "Convert 74 kg to lbs.",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 18,
+      "task": "Which is hotter right now, Paris or Cairo?",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 19,
+      "task": "What is India's population density in people per square mile?",
+      "mean_reward": -1.5,
+      "max_reward": 1.333333333333333,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 20,
+      "task": "Which country has a larger population, France or Brazil?",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 21,
+      "task": "Convert 64 kg to lbs.",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 22,
+      "task": "Which country has a larger population, Japan or India?",
+      "mean_reward": -1.5625,
+      "max_reward": 3.0,
+      "success_count": 2,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 23,
+      "task": "What is the GDP of Japan?",
+      "mean_reward": -2.75,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 24,
+      "task": "What is the population of France divided by its area in km2?",
+      "mean_reward": -2.875,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 25,
+      "task": "What is France's population density in people per square mile?",
+      "mean_reward": -2.5,
+      "max_reward": 1.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 26,
+      "task": "Convert 26 kg to lbs.",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 27,
+      "task": "What is 660 times 87?",
+      "mean_reward": 1.0,
+      "max_reward": 4.0,
+      "success_count": 1,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 28,
+      "task": "What is the boiling point of water?",
+      "mean_reward": -2.5,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 29,
+      "task": "What is the population of Germany divided by its area in km2?",
+      "mean_reward": -1.125,
+      "max_reward": 2.333333333333333,
+      "success_count": 1,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 30,
+      "task": "Convert 40 kg to lbs.",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 31,
+      "task": "What is the speed of light?",
+      "mean_reward": -2.375,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 32,
+      "task": "How old was Guido van Rossum in 2024?",
+      "mean_reward": -2.875,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 33,
+      "task": "Which is hotter right now, Paris or Dubai?",
+      "mean_reward": -3.0,
+      "max_reward": -3.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 34,
+      "task": "Which is hotter right now, Tokyo or Dubai?",
+      "mean_reward": -2.875,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 35,
+      "task": "Which is hotter right now, London or Cairo?",
+      "mean_reward": -2.375,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 36,
+      "task": "What is the value of pi?",
+      "mean_reward": -2.125,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 37,
+      "task": "What is the population of Japan divided by its area in km2?",
+      "mean_reward": -1.6666666666666667,
+      "max_reward": 1.666666666666666,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 38,
+      "task": "What is the temperature in London in Fahrenheit?",
+      "mean_reward": -1.375,
+      "max_reward": 1.5,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 39,
+      "task": "What is 464 plus 30?",
+      "mean_reward": -1.75,
+      "max_reward": 2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 40,
+      "task": "Which country has a larger population, France or India?",
+      "mean_reward": -2.1875,
+      "max_reward": 2.5,
+      "success_count": 1,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 41,
+      "task": "What is the distance from Earth to the Sun in km in miles?",
+      "mean_reward": -1.625,
+      "max_reward": 2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 42,
+      "task": "What is the tallest mountain?",
+      "mean_reward": -1.125,
+      "max_reward": 2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 43,
+      "task": "What is the temperature in London in Fahrenheit?",
+      "mean_reward": 0.6875,
+      "max_reward": 2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 44,
+      "task": "What is 496 minus 24?",
+      "mean_reward": 1.0,
+      "max_reward": 2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 45,
+      "task": "What's the weather like in Cairo?",
+      "mean_reward": -1.25,
+      "max_reward": 4.0,
+      "success_count": 2,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 46,
+      "task": "What is the tallest mountain?",
+      "mean_reward": -2.5,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 47,
+      "task": "What is India's population density in people per square mile?",
+      "mean_reward": -1.7916666666666667,
+      "max_reward": 1.333333333333333,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 48,
+      "task": "What is the GDP of France?",
+      "mean_reward": -2.625,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    },
+    {
+      "scenario_index": 49,
+      "task": "How old was Guido van Rossum in 2024?",
+      "mean_reward": -2.75,
+      "max_reward": -2.0,
+      "success_count": 0,
+      "total_attempts": 8
+    }
+  ]
+}
\ No newline at end of file
diff --git a/artifacts/reward_curve.txt b/artifacts/reward_curve.txt
new file mode 100644
index 0000000..7a6e995
--- /dev/null
+++ b/artifacts/reward_curve.txt
@@ -0,0 +1,16 @@
+  Avg reward: -0.208 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+  Avg reward: 1.969 | Avg tools/rollout: 1.0 | groups with variance: 1/4
+  Avg reward: 0.854 | Avg tools/rollout: 1.0 | groups with variance: 4/4
+  Avg reward: 1.193 | Avg tools/rollout: 0.9 | groups with variance: 3/4
+  Avg reward: -2.094 | Avg tools/rollout: 0.8 | groups with variance: 3/4
+  Avg reward: 0.505 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+  Avg reward: -0.141 | Avg tools/rollout: 0.8 | groups with variance: 4/4
+  Avg reward: -0.797 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+  Avg reward: 0.307 | Avg tools/rollout: 0.9 | groups with variance: 3/4
+  Avg reward: -1.125 | Avg tools/rollout: 1.0 | groups with variance: 1/4
+  Avg reward: -1.359 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+  Avg reward: 0.484 | Avg tools/rollout: 1.0 | groups with variance: 3/4
+  Avg reward: -0.073 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+  Avg reward: 1.740 | Avg tools/rollout: 1.0 | groups with variance: 3/4
+  Avg reward: 0.635 | Avg tools/rollout: 1.0 | groups with variance: 3/4
+  Avg reward: 1.615 | Avg tools/rollout: 0.9 | groups with variance: 2/4
diff --git a/artifacts/training.log b/artifacts/training.log
new file mode 100644
index 0000000..9c114ca
--- /dev/null
+++ b/artifacts/training.log
@@ -0,0 +1,1054 @@
+Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0             Please see https://github.com/pytorch/ao/issues/2919 for more info
+Loaded 200 train, 50 val scenarios
+GRPO config: 4 scenarios/step × 8 rollouts/scenario = 32 rollouts/step
+Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0             Please see https://github.com/pytorch/ao/issues/2919 for more info
+/usr/local/lib/python3.12/dist-packages/art/__init__.py:37: UserWarning: WARNING: Unsloth should be imported before [transformers] to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations.
+
+Please restructure your imports with 'import unsloth' at the top of your file.
+  import unsloth  # noqa: F401
+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+==((====))==  Unsloth 2026.3.3: Fast Qwen2 patching. Transformers: 5.2.0. vLLM: 0.17.0+art1.
+   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.252 GB. Platform: Linux.
+O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.6.0
+\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.35. FA2 = False]
+ "-____-"     Free license: http://github.com/unslothai/unsloth
+Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
+Loading weights:   0%|          | 0/290 [00:00<?, ?it/s]Loading weights:   0%|          | 1/290 [00:00<00:00, 6626.07it/s, Materializing param=model.embed_tokens.weight]Loading weights:   0%|          | 1/290 [00:00<00:00, 3975.64it/s, Materializing param=model.embed_tokens.weight]Loading weights:   1%|          | 2/290 [00:00<00:58,  4.92it/s, Materializing param=model.embed_tokens.weight]  Loading weights:   1%|          | 2/290 [00:00<00:58,  4.92it/s, Materializing param=model.layers.0.input_layernorm.weight]Loading weights:   1%|          | 2/290 [00:00<00:58,  4.92it/s, Materializing param=model.layers.0.input_layernorm.weight]Loading weights:   1%|          | 3/290 [00:00<00:58,  4.92it/s, Materializing param=model.layers.0.mlp.down_proj.weight]  Loading weights:   1%|          | 3/290 [00:00<00:58,  4.92it/s, Materializing param=model.layers.0.mlp.down_proj.weight]Loading weights:   1%|▏         | 4/290 [00:00<00:58,  4.92it/s, Materializing param=model.layers.0.mlp.gate_proj.weight]Loading weights:   1%|▏         | 4/290 [00:00<00:58,  4.92it/s, Materializing param=model.layers.0.mlp.gate_proj.weight]Loading weights:   2%|▏         | 5/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.mlp.up_proj.weight]  Loading weights:   2%|▏         | 5/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.mlp.up_proj.weight]Loading weights:   2%|▏         | 6/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.post_attention_layernorm.weight]Loading weights:   2%|▏         | 6/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.post_attention_layernorm.weight]Loading weights:   2%|▏         | 7/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.self_attn.k_proj.bias]          Loading weights:   2%|▏         | 7/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.self_attn.k_proj.bias]Loading weights:   3%|▎         | 8/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.self_attn.k_proj.weight]Loading weights:   3%|▎         | 8/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.self_attn.k_proj.weight]Loading weights:   3%|▎         | 9/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.self_attn.o_proj.weight]Loading weights:   3%|▎         | 9/290 [00:00<00:57,  4.92it/s, Materializing param=model.layers.0.self_attn.o_proj.weight]Loading weights:   3%|▎         | 10/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.0.self_attn.q_proj.bias] Loading weights:   3%|▎         | 10/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.0.self_attn.q_proj.bias]Loading weights:   4%|▍         | 11/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.0.self_attn.q_proj.weight]Loading weights:   4%|▍         | 11/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.0.self_attn.q_proj.weight]Loading weights:   4%|▍         | 12/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.0.self_attn.v_proj.bias]  Loading weights:   4%|▍         | 12/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.0.self_attn.v_proj.bias]Loading weights:   4%|▍         | 13/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.0.self_attn.v_proj.weight]Loading weights:   4%|▍         | 13/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.0.self_attn.v_proj.weight]Loading weights:   5%|▍         | 14/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.1.input_layernorm.weight] Loading weights:   5%|▍         | 14/290 [00:00<00:56,  4.92it/s, Materializing param=model.layers.1.input_layernorm.weight]Loading weights:   5%|▌         | 15/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.mlp.down_proj.weight]  Loading weights:   5%|▌         | 15/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.mlp.down_proj.weight]Loading weights:   6%|▌         | 16/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.mlp.gate_proj.weight]Loading weights:   6%|▌         | 16/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.mlp.gate_proj.weight]Loading weights:   6%|▌         | 17/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.mlp.up_proj.weight]  Loading weights:   6%|▌         | 17/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.mlp.up_proj.weight]Loading weights:   6%|▌         | 18/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.post_attention_layernorm.weight]Loading weights:   6%|▌         | 18/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.post_attention_layernorm.weight]Loading weights:   7%|▋         | 19/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.self_attn.k_proj.bias]          Loading weights:   7%|▋         | 19/290 [00:00<00:55,  4.92it/s, Materializing param=model.layers.1.self_attn.k_proj.bias]Loading weights:   7%|▋         | 20/290 [00:00<00:54,  4.92it/s, Materializing param=model.layers.1.self_attn.k_proj.weight]Loading weights:   7%|▋         | 20/290 [00:00<00:54,  4.92it/s, Materializing param=model.layers.1.self_attn.k_proj.weight]Loading weights:   7%|▋         | 21/290 [00:00<00:54,  4.92it/s, Materializing param=model.layers.1.self_attn.o_proj.weight]Loading weights:   7%|▋         | 21/290 [00:00<00:54,  4.92it/s, Materializing param=model.layers.1.self_attn.o_proj.weight]Loading weights:   8%|▊         | 22/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.1.self_attn.o_proj.weight]Loading weights:   8%|▊         | 22/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.1.self_attn.q_proj.bias]  Loading weights:   8%|▊         | 22/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.1.self_attn.q_proj.bias]Loading weights:   8%|▊         | 23/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.1.self_attn.q_proj.weight]Loading weights:   8%|▊         | 23/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.1.self_attn.q_proj.weight]Loading weights:   8%|▊         | 24/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.1.self_attn.v_proj.bias]  Loading weights:   8%|▊         | 24/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.1.self_attn.v_proj.bias]Loading weights:   9%|▊         | 25/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.1.self_attn.v_proj.weight]Loading weights:   9%|▊         | 25/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.1.self_attn.v_proj.weight]Loading weights:   9%|▉         | 26/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.input_layernorm.weight] Loading weights:   9%|▉         | 26/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.input_layernorm.weight]Loading weights:   9%|▉         | 27/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.mlp.down_proj.weight]  Loading weights:   9%|▉         | 27/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.mlp.down_proj.weight]Loading weights:  10%|▉         | 28/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.mlp.gate_proj.weight]Loading weights:  10%|▉         | 28/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.mlp.gate_proj.weight]Loading weights:  10%|█         | 29/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.mlp.up_proj.weight]  Loading weights:  10%|█         | 29/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.mlp.up_proj.weight]Loading weights:  10%|█         | 30/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.post_attention_layernorm.weight]Loading weights:  10%|█         | 30/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.post_attention_layernorm.weight]Loading weights:  11%|█         | 31/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.k_proj.bias]          Loading weights:  11%|█         | 31/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.k_proj.bias]Loading weights:  11%|█         | 32/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.k_proj.weight]Loading weights:  11%|█         | 32/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.k_proj.weight]Loading weights:  11%|█▏        | 33/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.o_proj.weight]Loading weights:  11%|█▏        | 33/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.o_proj.weight]Loading weights:  12%|█▏        | 34/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.q_proj.bias]  Loading weights:  12%|█▏        | 34/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.q_proj.bias]Loading weights:  12%|█▏        | 35/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.q_proj.weight]Loading weights:  12%|█▏        | 35/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.q_proj.weight]Loading weights:  12%|█▏        | 36/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.v_proj.bias]  Loading weights:  12%|█▏        | 36/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.v_proj.bias]Loading weights:  13%|█▎        | 37/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]Loading weights:  13%|█▎        | 37/290 [00:00<00:04, 55.55it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]Loading weights:  13%|█▎        | 38/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]Loading weights:  13%|█▎        | 38/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.input_layernorm.weight] Loading weights:  13%|█▎        | 38/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.input_layernorm.weight]Loading weights:  13%|█▎        | 39/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.mlp.down_proj.weight]  Loading weights:  13%|█▎        | 39/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.mlp.down_proj.weight]Loading weights:  14%|█▍        | 40/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.mlp.gate_proj.weight]Loading weights:  14%|█▍        | 40/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.mlp.gate_proj.weight]Loading weights:  14%|█▍        | 41/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.mlp.up_proj.weight]  Loading weights:  14%|█▍        | 41/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.mlp.up_proj.weight]Loading weights:  14%|█▍        | 42/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.post_attention_layernorm.weight]Loading weights:  14%|█▍        | 42/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.post_attention_layernorm.weight]Loading weights:  15%|█▍        | 43/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.k_proj.bias]          Loading weights:  15%|█▍        | 43/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.k_proj.bias]Loading weights:  15%|█▌        | 44/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.k_proj.weight]Loading weights:  15%|█▌        | 44/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.k_proj.weight]Loading weights:  16%|█▌        | 45/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.o_proj.weight]Loading weights:  16%|█▌        | 45/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.o_proj.weight]Loading weights:  16%|█▌        | 46/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.q_proj.bias]  Loading weights:  16%|█▌        | 46/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.q_proj.bias]Loading weights:  16%|█▌        | 47/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.q_proj.weight]Loading weights:  16%|█▌        | 47/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.q_proj.weight]Loading weights:  17%|█▋        | 48/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.v_proj.bias]  Loading weights:  17%|█▋        | 48/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.v_proj.bias]Loading weights:  17%|█▋        | 49/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.v_proj.weight]Loading weights:  17%|█▋        | 49/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.3.self_attn.v_proj.weight]Loading weights:  17%|█▋        | 50/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.input_layernorm.weight] Loading weights:  17%|█▋        | 50/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.input_layernorm.weight]Loading weights:  18%|█▊        | 51/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.mlp.down_proj.weight]  Loading weights:  18%|█▊        | 51/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.mlp.down_proj.weight]Loading weights:  18%|█▊        | 52/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.mlp.gate_proj.weight]Loading weights:  18%|█▊        | 52/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.mlp.gate_proj.weight]Loading weights:  18%|█▊        | 53/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.mlp.up_proj.weight]  Loading weights:  18%|█▊        | 53/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.mlp.up_proj.weight]Loading weights:  19%|█▊        | 54/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.post_attention_layernorm.weight]Loading weights:  19%|█▊        | 54/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.post_attention_layernorm.weight]Loading weights:  19%|█▉        | 55/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.k_proj.bias]          Loading weights:  19%|█▉        | 55/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.k_proj.bias]Loading weights:  19%|█▉        | 56/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.k_proj.weight]Loading weights:  19%|█▉        | 56/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.k_proj.weight]Loading weights:  20%|█▉        | 57/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.o_proj.weight]Loading weights:  20%|█▉        | 57/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.o_proj.weight]Loading weights:  20%|██        | 58/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.q_proj.bias]  Loading weights:  20%|██        | 58/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.q_proj.bias]Loading weights:  20%|██        | 59/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.q_proj.weight]Loading weights:  20%|██        | 59/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.q_proj.weight]Loading weights:  21%|██        | 60/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.v_proj.bias]  Loading weights:  21%|██        | 60/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.v_proj.bias]Loading weights:  21%|██        | 61/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.v_proj.weight]Loading weights:  21%|██        | 61/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.4.self_attn.v_proj.weight]Loading weights:  21%|██▏       | 62/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.input_layernorm.weight] Loading weights:  21%|██▏       | 62/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.input_layernorm.weight]Loading weights:  22%|██▏       | 63/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.mlp.down_proj.weight]  Loading weights:  22%|██▏       | 63/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.mlp.down_proj.weight]Loading weights:  22%|██▏       | 64/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.mlp.gate_proj.weight]Loading weights:  22%|██▏       | 64/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.mlp.gate_proj.weight]Loading weights:  22%|██▏       | 65/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.mlp.up_proj.weight]  Loading weights:  22%|██▏       | 65/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.mlp.up_proj.weight]Loading weights:  23%|██▎       | 66/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.post_attention_layernorm.weight]Loading weights:  23%|██▎       | 66/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.post_attention_layernorm.weight]Loading weights:  23%|██▎       | 67/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.k_proj.bias]          Loading weights:  23%|██▎       | 67/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.k_proj.bias]Loading weights:  23%|██▎       | 68/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.k_proj.weight]Loading weights:  23%|██▎       | 68/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.k_proj.weight]Loading weights:  24%|██▍       | 69/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.o_proj.weight]Loading weights:  24%|██▍       | 69/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.o_proj.weight]Loading weights:  24%|██▍       | 70/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.q_proj.bias]  Loading weights:  24%|██▍       | 70/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.q_proj.bias]Loading weights:  24%|██▍       | 71/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.q_proj.weight]Loading weights:  24%|██▍       | 71/290 [00:00<00:03, 72.81it/s, Materializing param=model.layers.5.self_attn.q_proj.weight]Loading weights:  25%|██▍       | 72/290 [00:00<00:02, 72.81it/s, Materializing param=model.layers.5.self_attn.v_proj.bias]  Loading weights:  25%|██▍       | 72/290 [00:00<00:02, 72.81it/s, Materializing param=model.layers.5.self_attn.v_proj.bias]Loading weights:  25%|██▌       | 73/290 [00:00<00:02, 72.81it/s, Materializing param=model.layers.5.self_attn.v_proj.weight]Loading weights:  25%|██▌       | 73/290 [00:00<00:02, 72.81it/s, Materializing param=model.layers.5.self_attn.v_proj.weight]Loading weights:  26%|██▌       | 74/290 [00:00<00:02, 72.81it/s, Materializing param=model.layers.6.input_layernorm.weight] Loading weights:  26%|██▌       | 74/290 [00:00<00:02, 72.81it/s, Materializing param=model.layers.6.input_layernorm.weight]Loading weights:  26%|██▌       | 75/290 [00:00<00:02, 72.81it/s, Materializing param=model.layers.6.mlp.down_proj.weight]  Loading weights:  26%|██▌       | 75/290 [00:00<00:02, 72.81it/s, Materializing param=model.layers.6.mlp.down_proj.weight]Loading weights:  26%|██▌       | 76/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.mlp.down_proj.weight]Loading weights:  26%|██▌       | 76/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.mlp.gate_proj.weight]Loading weights:  26%|██▌       | 76/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.mlp.gate_proj.weight]Loading weights:  27%|██▋       | 77/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.mlp.up_proj.weight]  Loading weights:  27%|██▋       | 77/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.mlp.up_proj.weight]Loading weights:  27%|██▋       | 78/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.post_attention_layernorm.weight]Loading weights:  27%|██▋       | 78/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.post_attention_layernorm.weight]Loading weights:  27%|██▋       | 79/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.k_proj.bias]          Loading weights:  27%|██▋       | 79/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.k_proj.bias]Loading weights:  28%|██▊       | 80/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.k_proj.weight]Loading weights:  28%|██▊       | 80/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.k_proj.weight]Loading weights:  28%|██▊       | 81/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.o_proj.weight]Loading weights:  28%|██▊       | 81/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.o_proj.weight]Loading weights:  28%|██▊       | 82/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.q_proj.bias]  Loading weights:  28%|██▊       | 82/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.q_proj.bias]Loading weights:  29%|██▊       | 83/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.q_proj.weight]Loading weights:  29%|██▊       | 83/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.q_proj.weight]Loading weights:  29%|██▉       | 84/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.v_proj.bias]  Loading weights:  29%|██▉       | 84/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.v_proj.bias]Loading weights:  29%|██▉       | 85/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.v_proj.weight]Loading weights:  29%|██▉       | 85/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.6.self_attn.v_proj.weight]Loading weights:  30%|██▉       | 86/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.input_layernorm.weight] Loading weights:  30%|██▉       | 86/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.input_layernorm.weight]Loading weights:  30%|███       | 87/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.mlp.down_proj.weight]  Loading weights:  30%|███       | 87/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.mlp.down_proj.weight]Loading weights:  30%|███       | 88/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.mlp.gate_proj.weight]Loading weights:  30%|███       | 88/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.mlp.gate_proj.weight]Loading weights:  31%|███       | 89/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.mlp.up_proj.weight]  Loading weights:  31%|███       | 89/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.mlp.up_proj.weight]Loading weights:  31%|███       | 90/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.post_attention_layernorm.weight]Loading weights:  31%|███       | 90/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.post_attention_layernorm.weight]Loading weights:  31%|███▏      | 91/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.k_proj.bias]          Loading weights:  31%|███▏      | 91/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.k_proj.bias]Loading weights:  32%|███▏      | 92/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.k_proj.weight]Loading weights:  32%|███▏      | 92/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.k_proj.weight]Loading weights:  32%|███▏      | 93/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.o_proj.weight]Loading weights:  32%|███▏      | 93/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.o_proj.weight]Loading weights:  32%|███▏      | 94/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.q_proj.bias]  Loading weights:  32%|███▏      | 94/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.q_proj.bias]Loading weights:  33%|███▎      | 95/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.q_proj.weight]Loading weights:  33%|███▎      | 95/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.q_proj.weight]Loading weights:  33%|███▎      | 96/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.v_proj.bias]  Loading weights:  33%|███▎      | 96/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.v_proj.bias]Loading weights:  33%|███▎      | 97/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.v_proj.weight]Loading weights:  33%|███▎      | 97/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.7.self_attn.v_proj.weight]Loading weights:  34%|███▍      | 98/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.input_layernorm.weight] Loading weights:  34%|███▍      | 98/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.input_layernorm.weight]Loading weights:  34%|███▍      | 99/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.mlp.down_proj.weight]  Loading weights:  34%|███▍      | 99/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.mlp.down_proj.weight]Loading weights:  34%|███▍      | 100/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.mlp.gate_proj.weight]Loading weights:  34%|███▍      | 100/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.mlp.gate_proj.weight]Loading weights:  35%|███▍      | 101/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.mlp.up_proj.weight]  Loading weights:  35%|███▍      | 101/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.mlp.up_proj.weight]Loading weights:  35%|███▌      | 102/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.post_attention_layernorm.weight]Loading weights:  35%|███▌      | 102/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.post_attention_layernorm.weight]Loading weights:  36%|███▌      | 103/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.k_proj.bias]          Loading weights:  36%|███▌      | 103/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.k_proj.bias]Loading weights:  36%|███▌      | 104/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.k_proj.weight]Loading weights:  36%|███▌      | 104/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.k_proj.weight]Loading weights:  36%|███▌      | 105/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.o_proj.weight]Loading weights:  36%|███▌      | 105/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.o_proj.weight]Loading weights:  37%|███▋      | 106/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.q_proj.bias]  Loading weights:  37%|███▋      | 106/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.q_proj.bias]Loading weights:  37%|███▋      | 107/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.q_proj.weight]Loading weights:  37%|███▋      | 107/290 [00:00<00:01, 149.12it/s, Materializing param=model.layers.8.self_attn.q_proj.weight]Loading weights:  37%|███▋      | 108/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.8.self_attn.q_proj.weight]Loading weights:  37%|███▋      | 108/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.8.self_attn.v_proj.bias]  Loading weights:  37%|███▋      | 108/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.8.self_attn.v_proj.bias]Loading weights:  38%|███▊      | 109/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.8.self_attn.v_proj.weight]Loading weights:  38%|███▊      | 109/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.8.self_attn.v_proj.weight]Loading weights:  38%|███▊      | 110/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.input_layernorm.weight] Loading weights:  38%|███▊      | 110/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.input_layernorm.weight]Loading weights:  38%|███▊      | 111/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.mlp.down_proj.weight]  Loading weights:  38%|███▊      | 111/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.mlp.down_proj.weight]Loading weights:  39%|███▊      | 112/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.mlp.gate_proj.weight]Loading weights:  39%|███▊      | 112/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.mlp.gate_proj.weight]Loading weights:  39%|███▉      | 113/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.mlp.up_proj.weight]  Loading weights:  39%|███▉      | 113/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.mlp.up_proj.weight]Loading weights:  39%|███▉      | 114/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.post_attention_layernorm.weight]Loading weights:  39%|███▉      | 114/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.post_attention_layernorm.weight]Loading weights:  40%|███▉      | 115/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.k_proj.bias]          Loading weights:  40%|███▉      | 115/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.k_proj.bias]Loading weights:  40%|████      | 116/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.k_proj.weight]Loading weights:  40%|████      | 116/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.k_proj.weight]Loading weights:  40%|████      | 117/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.o_proj.weight]Loading weights:  40%|████      | 117/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.o_proj.weight]Loading weights:  41%|████      | 118/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.q_proj.bias]  Loading weights:  41%|████      | 118/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.q_proj.bias]Loading weights:  41%|████      | 119/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.q_proj.weight]Loading weights:  41%|████      | 119/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.q_proj.weight]Loading weights:  41%|████▏     | 120/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.v_proj.bias]  Loading weights:  41%|████▏     | 120/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.v_proj.bias]Loading weights:  42%|████▏     | 121/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.v_proj.weight]Loading weights:  42%|████▏     | 121/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.9.self_attn.v_proj.weight]Loading weights:  42%|████▏     | 122/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.input_layernorm.weight]Loading weights:  42%|████▏     | 122/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.input_layernorm.weight]Loading weights:  42%|████▏     | 123/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.mlp.down_proj.weight]  Loading weights:  42%|████▏     | 123/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.mlp.down_proj.weight]Loading weights:  43%|████▎     | 124/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.mlp.gate_proj.weight]Loading weights:  43%|████▎     | 124/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.mlp.gate_proj.weight]Loading weights:  43%|████▎     | 125/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.mlp.up_proj.weight]  Loading weights:  43%|████▎     | 125/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.mlp.up_proj.weight]Loading weights:  43%|████▎     | 126/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.post_attention_layernorm.weight]Loading weights:  43%|████▎     | 126/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.post_attention_layernorm.weight]Loading weights:  44%|████▍     | 127/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.k_proj.bias]          Loading weights:  44%|████▍     | 127/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.k_proj.bias]Loading weights:  44%|████▍     | 128/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.k_proj.weight]Loading weights:  44%|████▍     | 128/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.k_proj.weight]Loading weights:  44%|████▍     | 129/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.o_proj.weight]Loading weights:  44%|████▍     | 129/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.o_proj.weight]Loading weights:  45%|████▍     | 130/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.q_proj.bias]  Loading weights:  45%|████▍     | 130/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.q_proj.bias]Loading weights:  45%|████▌     | 131/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.q_proj.weight]Loading weights:  45%|████▌     | 131/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.q_proj.weight]Loading weights:  46%|████▌     | 132/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.v_proj.bias]  Loading weights:  46%|████▌     | 132/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.v_proj.bias]Loading weights:  46%|████▌     | 133/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.v_proj.weight]Loading weights:  46%|████▌     | 133/290 [00:00<00:00, 188.88it/s, Materializing param=model.layers.10.self_attn.v_proj.weight]Loading weights:  46%|████▌     | 134/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.10.self_attn.v_proj.weight]Loading weights:  46%|████▌     | 134/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.input_layernorm.weight] Loading weights:  46%|████▌     | 134/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.input_layernorm.weight]Loading weights:  47%|████▋     | 135/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.mlp.down_proj.weight]  Loading weights:  47%|████▋     | 135/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.mlp.down_proj.weight]Loading weights:  47%|████▋     | 136/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.mlp.gate_proj.weight]Loading weights:  47%|████▋     | 136/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.mlp.gate_proj.weight]Loading weights:  47%|████▋     | 137/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.mlp.up_proj.weight]  Loading weights:  47%|████▋     | 137/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.mlp.up_proj.weight]Loading weights:  48%|████▊     | 138/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.post_attention_layernorm.weight]Loading weights:  48%|████▊     | 138/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.post_attention_layernorm.weight]Loading weights:  48%|████▊     | 139/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.k_proj.bias]          Loading weights:  48%|████▊     | 139/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.k_proj.bias]Loading weights:  48%|████▊     | 140/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.k_proj.weight]Loading weights:  48%|████▊     | 140/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.k_proj.weight]Loading weights:  49%|████▊     | 141/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.o_proj.weight]Loading weights:  49%|████▊     | 141/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.o_proj.weight]Loading weights:  49%|████▉     | 142/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.q_proj.bias]  Loading weights:  49%|████▉     | 142/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.q_proj.bias]Loading weights:  49%|████▉     | 143/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.q_proj.weight]Loading weights:  49%|████▉     | 143/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.q_proj.weight]Loading weights:  50%|████▉     | 144/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.v_proj.bias]  Loading weights:  50%|████▉     | 144/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.v_proj.bias]Loading weights:  50%|█████     | 145/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.v_proj.weight]Loading weights:  50%|█████     | 145/290 [00:00<00:00, 206.86it/s, Materializing param=model.layers.11.self_attn.v_proj.weight]Loading weights:  50%|█████     | 146/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.input_layernorm.weight] Loading weights:  50%|█████     | 146/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.input_layernorm.weight]Loading weights:  51%|█████     | 147/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.mlp.down_proj.weight]  Loading weights:  51%|█████     | 147/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.mlp.down_proj.weight]Loading weights:  51%|█████     | 148/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.mlp.gate_proj.weight]Loading weights:  51%|█████     | 148/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.mlp.gate_proj.weight]Loading weights:  51%|█████▏    | 149/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.mlp.up_proj.weight]  Loading weights:  51%|█████▏    | 149/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.mlp.up_proj.weight]Loading weights:  52%|█████▏    | 150/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.post_attention_layernorm.weight]Loading weights:  52%|█████▏    | 150/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.post_attention_layernorm.weight]Loading weights:  52%|█████▏    | 151/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.k_proj.bias]          Loading weights:  52%|█████▏    | 151/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.k_proj.bias]Loading weights:  52%|█████▏    | 152/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.k_proj.weight]Loading weights:  52%|█████▏    | 152/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.k_proj.weight]Loading weights:  53%|█████▎    | 153/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.o_proj.weight]Loading weights:  53%|█████▎    | 153/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.o_proj.weight]Loading weights:  53%|█████▎    | 154/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.q_proj.bias]  Loading weights:  53%|█████▎    | 154/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.q_proj.bias]Loading weights:  53%|█████▎    | 155/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.q_proj.weight]Loading weights:  53%|█████▎    | 155/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.q_proj.weight]Loading weights:  54%|█████▍    | 156/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.v_proj.bias]  Loading weights:  54%|█████▍    | 156/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.v_proj.bias]Loading weights:  54%|█████▍    | 157/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.v_proj.weight]Loading weights:  54%|█████▍    | 157/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.12.self_attn.v_proj.weight]Loading weights:  54%|█████▍    | 158/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.input_layernorm.weight] Loading weights:  54%|█████▍    | 158/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.input_layernorm.weight]Loading weights:  55%|█████▍    | 159/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.mlp.down_proj.weight]  Loading weights:  55%|█████▍    | 159/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.mlp.down_proj.weight]Loading weights:  55%|█████▌    | 160/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.mlp.gate_proj.weight]Loading weights:  55%|█████▌    | 160/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.mlp.gate_proj.weight]Loading weights:  56%|█████▌    | 161/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.mlp.up_proj.weight]  Loading weights:  56%|█████▌    | 161/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.mlp.up_proj.weight]Loading weights:  56%|█████▌    | 162/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.post_attention_layernorm.weight]Loading weights:  56%|█████▌    | 162/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.post_attention_layernorm.weight]Loading weights:  56%|█████▌    | 163/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.k_proj.bias]          Loading weights:  56%|█████▌    | 163/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.k_proj.bias]Loading weights:  57%|█████▋    | 164/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.k_proj.weight]Loading weights:  57%|█████▋    | 164/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.k_proj.weight]Loading weights:  57%|█████▋    | 165/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.o_proj.weight]Loading weights:  57%|█████▋    | 165/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.o_proj.weight]Loading weights:  57%|█████▋    | 166/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.q_proj.bias]  Loading weights:  57%|█████▋    | 166/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.q_proj.bias]Loading weights:  58%|█████▊    | 167/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.q_proj.weight]Loading weights:  58%|█████▊    | 167/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.q_proj.weight]Loading weights:  58%|█████▊    | 168/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.v_proj.bias]  Loading weights:  58%|█████▊    | 168/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.v_proj.bias]Loading weights:  58%|█████▊    | 169/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.v_proj.weight]Loading weights:  58%|█████▊    | 169/290 [00:01<00:00, 206.86it/s, Materializing param=model.layers.13.self_attn.v_proj.weight]Loading weights:  59%|█████▊    | 170/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.13.self_attn.v_proj.weight]Loading weights:  59%|█████▊    | 170/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.input_layernorm.weight] Loading weights:  59%|█████▊    | 170/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.input_layernorm.weight]Loading weights:  59%|█████▉    | 171/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.mlp.down_proj.weight]  Loading weights:  59%|█████▉    | 171/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.mlp.down_proj.weight]Loading weights:  59%|█████▉    | 172/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.mlp.gate_proj.weight]Loading weights:  59%|█████▉    | 172/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.mlp.gate_proj.weight]Loading weights:  60%|█████▉    | 173/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.mlp.up_proj.weight]  Loading weights:  60%|█████▉    | 173/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.mlp.up_proj.weight]Loading weights:  60%|██████    | 174/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.post_attention_layernorm.weight]Loading weights:  60%|██████    | 174/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.post_attention_layernorm.weight]Loading weights:  60%|██████    | 175/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.k_proj.bias]          Loading weights:  60%|██████    | 175/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.k_proj.bias]Loading weights:  61%|██████    | 176/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.k_proj.weight]Loading weights:  61%|██████    | 176/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.k_proj.weight]Loading weights:  61%|██████    | 177/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.o_proj.weight]Loading weights:  61%|██████    | 177/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.o_proj.weight]Loading weights:  61%|██████▏   | 178/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.q_proj.bias]  Loading weights:  61%|██████▏   | 178/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.q_proj.bias]Loading weights:  62%|██████▏   | 179/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.q_proj.weight]Loading weights:  62%|██████▏   | 179/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.q_proj.weight]Loading weights:  62%|██████▏   | 180/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.v_proj.bias]  Loading weights:  62%|██████▏   | 180/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.v_proj.bias]Loading weights:  62%|██████▏   | 181/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.v_proj.weight]Loading weights:  62%|██████▏   | 181/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.14.self_attn.v_proj.weight]Loading weights:  63%|██████▎   | 182/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.input_layernorm.weight] Loading weights:  63%|██████▎   | 182/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.input_layernorm.weight]Loading weights:  63%|██████▎   | 183/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.mlp.down_proj.weight]  Loading weights:  63%|██████▎   | 183/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.mlp.down_proj.weight]Loading weights:  63%|██████▎   | 184/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.mlp.gate_proj.weight]Loading weights:  63%|██████▎   | 184/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.mlp.gate_proj.weight]Loading weights:  64%|██████▍   | 185/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.mlp.up_proj.weight]  Loading weights:  64%|██████▍   | 185/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.mlp.up_proj.weight]Loading weights:  64%|██████▍   | 186/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.post_attention_layernorm.weight]Loading weights:  64%|██████▍   | 186/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.post_attention_layernorm.weight]Loading weights:  64%|██████▍   | 187/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.k_proj.bias]          Loading weights:  64%|██████▍   | 187/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.k_proj.bias]Loading weights:  65%|██████▍   | 188/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.k_proj.weight]Loading weights:  65%|██████▍   | 188/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.k_proj.weight]Loading weights:  65%|██████▌   | 189/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.o_proj.weight]Loading weights:  65%|██████▌   | 189/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.o_proj.weight]Loading weights:  66%|██████▌   | 190/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.q_proj.bias]  Loading weights:  66%|██████▌   | 190/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.q_proj.bias]Loading weights:  66%|██████▌   | 191/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.q_proj.weight]Loading weights:  66%|██████▌   | 191/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.q_proj.weight]Loading weights:  66%|██████▌   | 192/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.v_proj.bias]  Loading weights:  66%|██████▌   | 192/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.v_proj.bias]Loading weights:  67%|██████▋   | 193/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.v_proj.weight]Loading weights:  67%|██████▋   | 193/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.15.self_attn.v_proj.weight]Loading weights:  67%|██████▋   | 194/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.input_layernorm.weight] Loading weights:  67%|██████▋   | 194/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.input_layernorm.weight]Loading weights:  67%|██████▋   | 195/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.mlp.down_proj.weight]  Loading weights:  67%|██████▋   | 195/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.mlp.down_proj.weight]Loading weights:  68%|██████▊   | 196/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.mlp.gate_proj.weight]Loading weights:  68%|██████▊   | 196/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.mlp.gate_proj.weight]Loading weights:  68%|██████▊   | 197/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.mlp.up_proj.weight]  Loading weights:  68%|██████▊   | 197/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.mlp.up_proj.weight]Loading weights:  68%|██████▊   | 198/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.post_attention_layernorm.weight]Loading weights:  68%|██████▊   | 198/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.post_attention_layernorm.weight]Loading weights:  69%|██████▊   | 199/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.self_attn.k_proj.bias]          Loading weights:  69%|██████▊   | 199/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.self_attn.k_proj.bias]Loading weights:  69%|██████▉   | 200/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.self_attn.k_proj.weight]Loading weights:  69%|██████▉   | 200/290 [00:01<00:00, 249.02it/s, Materializing param=model.layers.16.self_attn.k_proj.weight]Loading weights:  69%|██████▉   | 201/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.k_proj.weight]Loading weights:  69%|██████▉   | 201/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.o_proj.weight]Loading weights:  69%|██████▉   | 201/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.o_proj.weight]Loading weights:  70%|██████▉   | 202/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.q_proj.bias]  Loading weights:  70%|██████▉   | 202/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.q_proj.bias]Loading weights:  70%|███████   | 203/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.q_proj.weight]Loading weights:  70%|███████   | 203/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.q_proj.weight]Loading weights:  70%|███████   | 204/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.v_proj.bias]  Loading weights:  70%|███████   | 204/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.v_proj.bias]Loading weights:  71%|███████   | 205/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.v_proj.weight]Loading weights:  71%|███████   | 205/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.16.self_attn.v_proj.weight]Loading weights:  71%|███████   | 206/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.input_layernorm.weight] Loading weights:  71%|███████   | 206/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.input_layernorm.weight]Loading weights:  71%|███████▏  | 207/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.mlp.down_proj.weight]  Loading weights:  71%|███████▏  | 207/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.mlp.down_proj.weight]Loading weights:  72%|███████▏  | 208/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.mlp.gate_proj.weight]Loading weights:  72%|███████▏  | 208/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.mlp.gate_proj.weight]Loading weights:  72%|███████▏  | 209/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.mlp.up_proj.weight]  Loading weights:  72%|███████▏  | 209/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.mlp.up_proj.weight]Loading weights:  72%|███████▏  | 210/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.post_attention_layernorm.weight]Loading weights:  72%|███████▏  | 210/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.post_attention_layernorm.weight]Loading weights:  73%|███████▎  | 211/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.k_proj.bias]          Loading weights:  73%|███████▎  | 211/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.k_proj.bias]Loading weights:  73%|███████▎  | 212/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.k_proj.weight]Loading weights:  73%|███████▎  | 212/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.k_proj.weight]Loading weights:  73%|███████▎  | 213/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.o_proj.weight]Loading weights:  73%|███████▎  | 213/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.o_proj.weight]Loading weights:  74%|███████▍  | 214/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.q_proj.bias]  Loading weights:  74%|███████▍  | 214/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.q_proj.bias]Loading weights:  74%|███████▍  | 215/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.q_proj.weight]Loading weights:  74%|███████▍  | 215/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.q_proj.weight]Loading weights:  74%|███████▍  | 216/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.v_proj.bias]  Loading weights:  74%|███████▍  | 216/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.v_proj.bias]Loading weights:  75%|███████▍  | 217/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.v_proj.weight]Loading weights:  75%|███████▍  | 217/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.17.self_attn.v_proj.weight]Loading weights:  75%|███████▌  | 218/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.input_layernorm.weight] Loading weights:  75%|███████▌  | 218/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.input_layernorm.weight]Loading weights:  76%|███████▌  | 219/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.mlp.down_proj.weight]  Loading weights:  76%|███████▌  | 219/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.mlp.down_proj.weight]Loading weights:  76%|███████▌  | 220/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.mlp.gate_proj.weight]Loading weights:  76%|███████▌  | 220/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.mlp.gate_proj.weight]Loading weights:  76%|███████▌  | 221/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.mlp.up_proj.weight]  Loading weights:  76%|███████▌  | 221/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.mlp.up_proj.weight]Loading weights:  77%|███████▋  | 222/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.post_attention_layernorm.weight]Loading weights:  77%|███████▋  | 222/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.post_attention_layernorm.weight]Loading weights:  77%|███████▋  | 223/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.k_proj.bias]          Loading weights:  77%|███████▋  | 223/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.k_proj.bias]Loading weights:  77%|███████▋  | 224/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.k_proj.weight]Loading weights:  77%|███████▋  | 224/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.k_proj.weight]Loading weights:  78%|███████▊  | 225/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.o_proj.weight]Loading weights:  78%|███████▊  | 225/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.o_proj.weight]Loading weights:  78%|███████▊  | 226/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.q_proj.bias]  Loading weights:  78%|███████▊  | 226/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.q_proj.bias]Loading weights:  78%|███████▊  | 227/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.q_proj.weight]Loading weights:  78%|███████▊  | 227/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.q_proj.weight]Loading weights:  79%|███████▊  | 228/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.v_proj.bias]  Loading weights:  79%|███████▊  | 228/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.v_proj.bias]Loading weights:  79%|███████▉  | 229/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.v_proj.weight]Loading weights:  79%|███████▉  | 229/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.18.self_attn.v_proj.weight]Loading weights:  79%|███████▉  | 230/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.input_layernorm.weight] Loading weights:  79%|███████▉  | 230/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.input_layernorm.weight]Loading weights:  80%|███████▉  | 231/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.mlp.down_proj.weight]  Loading weights:  80%|███████▉  | 231/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.mlp.down_proj.weight]Loading weights:  80%|████████  | 232/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.mlp.gate_proj.weight]Loading weights:  80%|████████  | 232/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.mlp.gate_proj.weight]Loading weights:  80%|████████  | 233/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.mlp.up_proj.weight]  Loading weights:  80%|████████  | 233/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.mlp.up_proj.weight]Loading weights:  81%|████████  | 234/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.post_attention_layernorm.weight]Loading weights:  81%|████████  | 234/290 [00:01<00:00, 266.25it/s, Materializing param=model.layers.19.post_attention_layernorm.weight]Loading weights:  81%|████████  | 235/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.post_attention_layernorm.weight]Loading weights:  81%|████████  | 235/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.k_proj.bias]          Loading weights:  81%|████████  | 235/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.k_proj.bias]Loading weights:  81%|████████▏ | 236/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.k_proj.weight]Loading weights:  81%|████████▏ | 236/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.k_proj.weight]Loading weights:  82%|████████▏ | 237/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.o_proj.weight]Loading weights:  82%|████████▏ | 237/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.o_proj.weight]Loading weights:  82%|████████▏ | 238/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.q_proj.bias]  Loading weights:  82%|████████▏ | 238/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.q_proj.bias]Loading weights:  82%|████████▏ | 239/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.q_proj.weight]Loading weights:  82%|████████▏ | 239/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.q_proj.weight]Loading weights:  83%|████████▎ | 240/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.v_proj.bias]  Loading weights:  83%|████████▎ | 240/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.v_proj.bias]Loading weights:  83%|████████▎ | 241/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.v_proj.weight]Loading weights:  83%|████████▎ | 241/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.19.self_attn.v_proj.weight]Loading weights:  83%|████████▎ | 242/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.input_layernorm.weight] Loading weights:  83%|████████▎ | 242/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.input_layernorm.weight]Loading weights:  84%|████████▍ | 243/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.mlp.down_proj.weight]  Loading weights:  84%|████████▍ | 243/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.mlp.down_proj.weight]Loading weights:  84%|████████▍ | 244/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.mlp.gate_proj.weight]Loading weights:  84%|████████▍ | 244/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.mlp.gate_proj.weight]Loading weights:  84%|████████▍ | 245/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.mlp.up_proj.weight]  Loading weights:  84%|████████▍ | 245/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.mlp.up_proj.weight]Loading weights:  85%|████████▍ | 246/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.post_attention_layernorm.weight]Loading weights:  85%|████████▍ | 246/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.post_attention_layernorm.weight]Loading weights:  85%|████████▌ | 247/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.k_proj.bias]          Loading weights:  85%|████████▌ | 247/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.k_proj.bias]Loading weights:  86%|████████▌ | 248/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.k_proj.weight]Loading weights:  86%|████████▌ | 248/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.k_proj.weight]Loading weights:  86%|████████▌ | 249/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.o_proj.weight]Loading weights:  86%|████████▌ | 249/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.o_proj.weight]Loading weights:  86%|████████▌ | 250/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.q_proj.bias]  Loading weights:  86%|████████▌ | 250/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.q_proj.bias]Loading weights:  87%|████████▋ | 251/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.q_proj.weight]Loading weights:  87%|████████▋ | 251/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.q_proj.weight]Loading weights:  87%|████████▋ | 252/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.v_proj.bias]  Loading weights:  87%|████████▋ | 252/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.v_proj.bias]Loading weights:  87%|████████▋ | 253/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.v_proj.weight]Loading weights:  87%|████████▋ | 253/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.20.self_attn.v_proj.weight]Loading weights:  88%|████████▊ | 254/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.input_layernorm.weight] Loading weights:  88%|████████▊ | 254/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.input_layernorm.weight]Loading weights:  88%|████████▊ | 255/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.mlp.down_proj.weight]  Loading weights:  88%|████████▊ | 255/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.mlp.down_proj.weight]Loading weights:  88%|████████▊ | 256/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.mlp.gate_proj.weight]Loading weights:  88%|████████▊ | 256/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.mlp.gate_proj.weight]Loading weights:  89%|████████▊ | 257/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.mlp.up_proj.weight]  Loading weights:  89%|████████▊ | 257/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.mlp.up_proj.weight]Loading weights:  89%|████████▉ | 258/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.post_attention_layernorm.weight]Loading weights:  89%|████████▉ | 258/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.post_attention_layernorm.weight]Loading weights:  89%|████████▉ | 259/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.k_proj.bias]          Loading weights:  89%|████████▉ | 259/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.k_proj.bias]Loading weights:  90%|████████▉ | 260/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.k_proj.weight]Loading weights:  90%|████████▉ | 260/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.k_proj.weight]Loading weights:  90%|█████████ | 261/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.o_proj.weight]Loading weights:  90%|█████████ | 261/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.o_proj.weight]Loading weights:  90%|█████████ | 262/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.q_proj.bias]  Loading weights:  90%|█████████ | 262/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.q_proj.bias]Loading weights:  91%|█████████ | 263/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.q_proj.weight]Loading weights:  91%|█████████ | 263/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.q_proj.weight]Loading weights:  91%|█████████ | 264/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.v_proj.bias]  Loading weights:  91%|█████████ | 264/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.v_proj.bias]Loading weights:  91%|█████████▏| 265/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.v_proj.weight]Loading weights:  91%|█████████▏| 265/290 [00:01<00:00, 287.39it/s, Materializing param=model.layers.21.self_attn.v_proj.weight]Loading weights:  92%|█████████▏| 266/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.21.self_attn.v_proj.weight]Loading weights:  92%|█████████▏| 266/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.input_layernorm.weight] Loading weights:  92%|█████████▏| 266/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.input_layernorm.weight]Loading weights:  92%|█████████▏| 267/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.mlp.down_proj.weight]  Loading weights:  92%|█████████▏| 267/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.mlp.down_proj.weight]Loading weights:  92%|█████████▏| 268/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.mlp.gate_proj.weight]Loading weights:  92%|█████████▏| 268/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.mlp.gate_proj.weight]Loading weights:  93%|█████████▎| 269/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.mlp.up_proj.weight]  Loading weights:  93%|█████████▎| 269/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.mlp.up_proj.weight]Loading weights:  93%|█████████▎| 270/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.post_attention_layernorm.weight]Loading weights:  93%|█████████▎| 270/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.post_attention_layernorm.weight]Loading weights:  93%|█████████▎| 271/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.k_proj.bias]          Loading weights:  93%|█████████▎| 271/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.k_proj.bias]Loading weights:  94%|█████████▍| 272/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.k_proj.weight]Loading weights:  94%|█████████▍| 272/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.k_proj.weight]Loading weights:  94%|█████████▍| 273/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.o_proj.weight]Loading weights:  94%|█████████▍| 273/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.o_proj.weight]Loading weights:  94%|█████████▍| 274/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.q_proj.bias]  Loading weights:  94%|█████████▍| 274/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.q_proj.bias]Loading weights:  95%|█████████▍| 275/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.q_proj.weight]Loading weights:  95%|█████████▍| 275/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.q_proj.weight]Loading weights:  95%|█████████▌| 276/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.v_proj.bias]  Loading weights:  95%|█████████▌| 276/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.v_proj.bias]Loading weights:  96%|█████████▌| 277/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.v_proj.weight]Loading weights:  96%|█████████▌| 277/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.22.self_attn.v_proj.weight]Loading weights:  96%|█████████▌| 278/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.input_layernorm.weight] Loading weights:  96%|█████████▌| 278/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.input_layernorm.weight]Loading weights:  96%|█████████▌| 279/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.mlp.down_proj.weight]  Loading weights:  96%|█████████▌| 279/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.mlp.down_proj.weight]Loading weights:  97%|█████████▋| 280/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.mlp.gate_proj.weight]Loading weights:  97%|█████████▋| 280/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.mlp.gate_proj.weight]Loading weights:  97%|█████████▋| 281/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.mlp.up_proj.weight]  Loading weights:  97%|█████████▋| 281/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.mlp.up_proj.weight]Loading weights:  97%|█████████▋| 282/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.post_attention_layernorm.weight]Loading weights:  97%|█████████▋| 282/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.post_attention_layernorm.weight]Loading weights:  98%|█████████▊| 283/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.k_proj.bias]          Loading weights:  98%|█████████▊| 283/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.k_proj.bias]Loading weights:  98%|█████████▊| 284/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.k_proj.weight]Loading weights:  98%|█████████▊| 284/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.k_proj.weight]Loading weights:  98%|█████████▊| 285/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.o_proj.weight]Loading weights:  98%|█████████▊| 285/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.o_proj.weight]Loading weights:  99%|█████████▊| 286/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.q_proj.bias]  Loading weights:  99%|█████████▊| 286/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.q_proj.bias]Loading weights:  99%|█████████▉| 287/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.q_proj.weight]Loading weights:  99%|█████████▉| 287/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.q_proj.weight]Loading weights:  99%|█████████▉| 288/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.v_proj.bias]  Loading weights:  99%|█████████▉| 288/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.v_proj.bias]Loading weights: 100%|█████████▉| 289/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]Loading weights: 100%|█████████▉| 289/290 [00:01<00:00, 273.64it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]Loading weights: 100%|██████████| 290/290 [00:01<00:00, 273.64it/s, Materializing param=model.norm.weight]                      Loading weights: 100%|██████████| 290/290 [00:01<00:00, 273.64it/s, Materializing param=model.norm.weight]Loading weights: 100%|██████████| 290/290 [00:01<00:00, 197.37it/s, Materializing param=model.norm.weight]
+unsloth/qwen2.5-0.5b-instruct-unsloth-bnb-4bit does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.
+Unsloth 2026.3.3 patched 24 layers with 24 QKV layers, 24 O layers and 24 MLP layers.
+[33mWarning: `huggingface-cli` is deprecated and no longer works. Use `hf` instead.
+[0m
+[90mHint: `hf` is already installed! Use it directly.
+[0m
+[90mHint: Examples:
+  hf auth login
+  hf download unsloth/gemma-4-31B-it-GGUF
+  hf upload my-cool-model . .
+  hf models ls --search "gemma"
+  hf repos ls --format json
+  hf jobs run python:3.12 python -c 'print("Hello!")'
+  hf --help
+[0m
+INFO 04-13 02:20:55 [model.py:531] Resolved architecture: Qwen2ForCausalLM
+INFO 04-13 02:20:55 [model.py:1554] Using max model len 32768
+INFO 04-13 02:20:55 [scheduler.py:231] Chunked prefill is enabled with max_num_batched_tokens=2048.
+INFO 04-13 02:20:55 [vllm.py:747] Asynchronous scheduling is enabled.
+WARNING 04-13 02:20:57 [system_utils.py:152] We must use the `spawn` multiprocessing start method. Overriding VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. See https://docs.vllm.ai/en/latest/usage/troubleshooting.html#python-multiprocessing for more information. Reasons: CUDA is initialized
+Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0             Please see https://github.com/pytorch/ao/issues/2919 for more info
+/usr/local/lib/python3.12/dist-packages/art/__init__.py:37: UserWarning: WARNING: Unsloth should be imported before [transformers] to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations.
+
+Please restructure your imports with 'import unsloth' at the top of your file.
+  import unsloth  # noqa: F401
+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:21 [core.py:101] Initializing a V1 LLM engine (v0.17.0+art1) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, enable_return_routed_experts=False, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser='', reasoning_parser_plugin='', enable_in_reasoning=False), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, kv_cache_metrics=False, kv_cache_metrics_sample=0.01, cudagraph_metrics=False, enable_layerwise_nvtx_tracing=False, enable_mfu_metrics=False, enable_mm_processor_stats=False, enable_logging_iteration_details=False), seed=0, served_model_name=Qwen/Qwen2.5-0.5B-Instruct, enable_prefix_caching=True, enable_chunked_prefill=True, pooler_config=None, compilation_config={'level': None, 'mode': <CompilationMode.VLLM_COMPILE: 3>, 'debug_dump_path': None, 'cache_dir': '', 'compile_cache_save_format': 'binary', 'backend': 'inductor', 'custom_ops': ['none'], 'splitting_ops': ['vllm::unified_attention', 'vllm::unified_attention_with_output', 'vllm::unified_mla_attention', 'vllm::unified_mla_attention_with_output', 'vllm::mamba_mixer2', 'vllm::mamba_mixer', 'vllm::short_conv', 'vllm::linear_attention', 'vllm::plamo2_mamba_mixer', 'vllm::gdn_attention_core', 'vllm::kda_attention', 'vllm::sparse_attn_indexer', 'vllm::rocm_aiter_sparse_attn_indexer', 'vllm::unified_kv_cache_update', 'vllm::unified_mla_kv_cache_update'], 'compile_mm_encoder': False, 'compile_sizes': [], 'compile_ranges_split_points': [2048], 'inductor_compile_config': {'enable_auto_functionalized_v2': False, 'combo_kernels': True, 'benchmark_combo_kernel': True}, 'inductor_passes': {}, 'cudagraph_mode': <CUDAGraphMode.FULL_AND_PIECEWISE: (2, 1)>, 'cudagraph_num_of_warmups': 1, 'cudagraph_capture_sizes': [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256], 'cudagraph_copy_inputs': False, 'cudagraph_specialize_lora': True, 'use_inductor_graph_partition': False, 'pass_config': {'fuse_norm_quant': False, 'fuse_act_quant': False, 'fuse_attn_quant': False, 'enable_sp': False, 'fuse_gemm_comms': False, 'fuse_allreduce_rms': False}, 'max_cudagraph_capture_size': 256, 'dynamic_shapes_config': {'type': <DynamicShapesType.BACKED: 'backed'>, 'evaluate_guards': False, 'assume_32_bit_indexing': False}, 'local_cache_dir': None, 'fast_moe_cold_start': True, 'static_all_moe_layers': []}
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:21 [worker_base.py:283] Injected <class 'art.vllm.engine.WorkerExtension'> into <class 'vllm.v1.worker.gpu_worker.Worker'> for extended collective_rpc calls ['run', 'time']
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:21 [parallel_state.py:1393] world_size=1 rank=0 local_rank=0 distributed_init_method=tcp://172.21.0.2:53693 backend=nccl
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:21 [parallel_state.py:1715] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, PCP rank 0, TP rank 0, EP rank N/A, EPLB rank N/A
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:22 [base.py:106] Offloader set to NoopOffloader
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:22 [gpu_model_runner.py:4255] Starting to load model Qwen/Qwen2.5-0.5B-Instruct...
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:23 [cuda.py:405] Using FLASH_ATTN attention backend out of potential backends: ['FLASH_ATTN', 'FLASHINFER', 'TRITON_ATTN', 'FLEX_ATTENTION'].
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:23 [flash_attn.py:587] Using FlashAttention version 2
+(EngineCore_DP0 pid=13597) <frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
+(EngineCore_DP0 pid=13597) <frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:24 [weight_utils.py:601] No model.safetensors.index.json found in remote.
+(EngineCore_DP0 pid=13597) Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]
+(EngineCore_DP0 pid=13597) Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:01<00:00,  1.94s/it]
+(EngineCore_DP0 pid=13597) Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:01<00:00,  1.94s/it]
+(EngineCore_DP0 pid=13597) 
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:26 [default_loader.py:293] Loading weights took 1.94 seconds
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:26 [punica_selector.py:20] Using PunicaWrapperGPU.
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:27 [gpu_model_runner.py:4338] Model loading took 0.96 GiB memory and 3.730793 seconds
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:42 [decorators.py:465] Directly load AOT compilation from path /root/.cache/vllm/torch_compile_cache/torch_aot_compile/19f16ef5be162d523fe85c0ed27f944cf1ccd27d08e2ae363d4b7c12b35022cc/rank_0_0/model
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:43 [backends.py:916] Using cache directory: /root/.cache/vllm/torch_compile_cache/d97828e2e7/rank_0_0/backbone for vLLM's torch.compile
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:43 [backends.py:976] Dynamo bytecode transform time: 2.93 s
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:45 [backends.py:266] Directly load the compiled graph(s) for compile range (1, 2048) from the cache, took 1.415 s
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:45 [monitor.py:35] torch.compile takes 5.21 s in total
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:46 [gpu_worker.py:424] Available KV cache memory: 70.01 GiB
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:46 [kv_cache_utils.py:1314] GPU KV cache size: 6,117,600 tokens
+(EngineCore_DP0 pid=13597) INFO 04-13 02:21:46 [kv_cache_utils.py:1319] Maximum concurrency for 32,768 tokens per request: 186.69x
+(EngineCore_DP0 pid=13597) Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   0%|          | 0/70 [00:00<?, ?it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   1%|▏         | 1/70 [00:12<14:38, 12.74s/it](EngineCore_DP0 pid=13597) WARNING 04-13 02:22:01 [utils.py:268] Using default LoRA kernel configs
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   3%|▎         | 2/70 [00:12<06:03,  5.35s/it]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   6%|▌         | 4/70 [00:13<02:14,  2.04s/it]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   9%|▊         | 6/70 [00:13<01:10,  1.10s/it]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  11%|█▏        | 8/70 [00:13<00:42,  1.46it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  14%|█▍        | 10/70 [00:13<00:27,  2.20it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  17%|█▋        | 12/70 [00:13<00:18,  3.13it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  20%|██        | 14/70 [00:13<00:13,  4.27it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  23%|██▎       | 16/70 [00:13<00:09,  5.59it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  26%|██▌       | 18/70 [00:13<00:07,  7.06it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  29%|██▊       | 20/70 [00:14<00:05,  8.59it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  31%|███▏      | 22/70 [00:14<00:04,  9.99it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  34%|███▍      | 24/70 [00:14<00:04, 11.41it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  37%|███▋      | 26/70 [00:14<00:03, 12.69it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  40%|████      | 28/70 [00:14<00:03, 13.76it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  43%|████▎     | 30/70 [00:14<00:02, 14.56it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  46%|████▌     | 32/70 [00:14<00:02, 15.26it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  49%|████▊     | 34/70 [00:14<00:02, 15.86it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  51%|█████▏    | 36/70 [00:14<00:02, 15.83it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  54%|█████▍    | 38/70 [00:15<00:02, 15.92it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  57%|█████▋    | 40/70 [00:15<00:01, 16.33it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  60%|██████    | 42/70 [00:15<00:01, 16.45it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  63%|██████▎   | 44/70 [00:15<00:01, 16.65it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  66%|██████▌   | 46/70 [00:15<00:01, 16.50it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  69%|██████▊   | 48/70 [00:15<00:01, 16.71it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  71%|███████▏  | 50/70 [00:15<00:01, 16.90it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  74%|███████▍  | 52/70 [00:15<00:01, 17.13it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  77%|███████▋  | 54/70 [00:16<00:00, 17.37it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  80%|████████  | 56/70 [00:16<00:00, 17.48it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  83%|████████▎ | 58/70 [00:16<00:00, 16.62it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  86%|████████▌ | 60/70 [00:16<00:00, 16.78it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  89%|████████▊ | 62/70 [00:16<00:00, 16.93it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  91%|█████████▏| 64/70 [00:16<00:00, 17.05it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  94%|█████████▍| 66/70 [00:16<00:00, 17.09it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  97%|█████████▋| 68/70 [00:16<00:00, 17.07it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 70/70 [00:17<00:00, 15.27it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 70/70 [00:17<00:00,  4.11it/s]
+(EngineCore_DP0 pid=13597) Capturing CUDA graphs (decode, FULL):   0%|          | 0/38 [00:00<?, ?it/s]Capturing CUDA graphs (decode, FULL):   3%|▎         | 1/38 [00:12<07:56, 12.88s/it]Capturing CUDA graphs (decode, FULL):  11%|█         | 4/38 [00:13<01:24,  2.48s/it]Capturing CUDA graphs (decode, FULL):  16%|█▌        | 6/38 [00:13<00:45,  1.42s/it]Capturing CUDA graphs (decode, FULL):  21%|██        | 8/38 [00:13<00:26,  1.11it/s]Capturing CUDA graphs (decode, FULL):  26%|██▋       | 10/38 [00:13<00:16,  1.66it/s]Capturing CUDA graphs (decode, FULL):  32%|███▏      | 12/38 [00:13<00:10,  2.40it/s]Capturing CUDA graphs (decode, FULL):  37%|███▋      | 14/38 [00:13<00:07,  3.34it/s]Capturing CUDA graphs (decode, FULL):  42%|████▏     | 16/38 [00:13<00:04,  4.50it/s]Capturing CUDA graphs (decode, FULL):  47%|████▋     | 18/38 [00:13<00:03,  5.88it/s]Capturing CUDA graphs (decode, FULL):  53%|█████▎    | 20/38 [00:13<00:02,  7.44it/s]Capturing CUDA graphs (decode, FULL):  58%|█████▊    | 22/38 [00:14<00:01,  9.05it/s]Capturing CUDA graphs (decode, FULL):  63%|██████▎   | 24/38 [00:14<00:01, 10.63it/s]Capturing CUDA graphs (decode, FULL):  68%|██████▊   | 26/38 [00:14<00:00, 12.06it/s]Capturing CUDA graphs (decode, FULL):  74%|███████▎  | 28/38 [00:14<00:00, 13.30it/s]Capturing CUDA graphs (decode, FULL):  79%|███████▉  | 30/38 [00:14<00:00, 14.35it/s]Capturing CUDA graphs (decode, FULL):  84%|████████▍ | 32/38 [00:14<00:00, 15.15it/s]Capturing CUDA graphs (decode, FULL):  89%|████████▉ | 34/38 [00:14<00:00, 15.84it/s]Capturing CUDA graphs (decode, FULL):  95%|█████████▍| 36/38 [00:14<00:00, 16.25it/s]Capturing CUDA graphs (decode, FULL): 100%|██████████| 38/38 [00:14<00:00, 16.42it/s]Capturing CUDA graphs (decode, FULL): 100%|██████████| 38/38 [00:14<00:00,  2.54it/s]
+(EngineCore_DP0 pid=13597) INFO 04-13 02:22:21 [gpu_model_runner.py:5360] Graph capturing finished in 33 secs, took 0.65 GiB
+(EngineCore_DP0 pid=13597) INFO 04-13 02:22:34 [core.py:282] init engine (profile, create kv cache, warmup model) took 66.79 seconds
+(EngineCore_DP0 pid=13597) INFO 04-13 02:22:37 [vllm.py:747] Asynchronous scheduling is enabled.
+Starting from step 0
+
+============================================================
+Step 1/50
+============================================================
+step 1:   0%|          | 0/32 [00:00<?, ?it/s]step 1:   3%|▎         | 1/32 [00:01<00:48,  1.57s/it]step 1:   3%|▎         | 1/32 [00:01<00:48,  1.57s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=44]step 1:   6%|▋         | 2/32 [00:01<00:22,  1.33it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=44]step 1:   6%|▋         | 2/32 [00:01<00:22,  1.33it/s, reward=0.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=33]step 1:   9%|▉         | 3/32 [00:01<00:21,  1.33it/s, reward=1.67, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=29.7]step 1:  12%|█▎        | 4/32 [00:01<00:21,  1.33it/s, reward=2.25, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=28]   step 1:  16%|█▌        | 5/32 [00:01<00:20,  1.33it/s, reward=1.4, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=26.9] step 1:  19%|█▉        | 6/32 [00:01<00:19,  1.33it/s, reward=1.5, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=26.2]step 1:  22%|██▏       | 7/32 [00:01<00:18,  1.33it/s, reward=1.57, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=25.6]step 1:  25%|██▌       | 8/32 [00:01<00:18,  1.33it/s, reward=1.62, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=25.2]step 1:  28%|██▊       | 9/32 [00:01<00:17,  1.33it/s, reward=1.67, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=25]  step 1:  31%|███▏      | 10/32 [00:01<00:16,  1.33it/s, reward=1.3, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=25.4] step 1:  34%|███▍      | 11/32 [00:01<00:02,  9.95it/s, reward=1.3, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=25.4]step 1:  34%|███▍      | 11/32 [00:01<00:02,  9.95it/s, reward=1.36, num_turns=1.91, num_tools=0.909, failed=0, completion_tokens=26.2]step 1:  38%|███▊      | 12/32 [00:01<00:02,  9.95it/s, reward=1.33, num_turns=1.92, num_tools=0.917, failed=0, completion_tokens=26.7]step 1:  41%|████      | 13/32 [00:01<00:01,  9.95it/s, reward=1.08, num_turns=1.92, num_tools=0.923, failed=0, completion_tokens=27.2]step 1:  44%|████▍     | 14/32 [00:01<00:01,  9.95it/s, reward=1.12, num_turns=1.93, num_tools=0.929, failed=0, completion_tokens=27.4]step 1:  47%|████▋     | 15/32 [00:01<00:01, 13.46it/s, reward=1.12, num_turns=1.93, num_tools=0.929, failed=0, completion_tokens=27.4]step 1:  47%|████▋     | 15/32 [00:01<00:01, 13.46it/s, reward=0.911, num_turns=1.93, num_tools=0.933, failed=0, completion_tokens=28.2]step 1:  50%|█████     | 16/32 [00:01<00:01, 13.46it/s, reward=0.729, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=28.9]step 1:  53%|█████▎    | 17/32 [00:02<00:01, 13.46it/s, reward=0.922, num_turns=1.94, num_tools=0.941, failed=0, completion_tokens=28.5]step 1:  56%|█████▋    | 18/32 [00:02<00:01, 13.46it/s, reward=0.759, num_turns=1.94, num_tools=0.944, failed=0, completion_tokens=29.8]step 1:  59%|█████▉    | 19/32 [00:02<00:00, 16.97it/s, reward=0.759, num_turns=1.94, num_tools=0.944, failed=0, completion_tokens=29.8]step 1:  59%|█████▉    | 19/32 [00:02<00:00, 16.97it/s, reward=0.772, num_turns=1.95, num_tools=0.947, failed=0, completion_tokens=29.6]step 1:  62%|██████▎   | 20/32 [00:02<00:00, 16.97it/s, reward=0.783, num_turns=1.95, num_tools=0.95, failed=0, completion_tokens=30.7] step 1:  66%|██████▌   | 21/32 [00:02<00:00, 16.97it/s, reward=0.651, num_turns=1.95, num_tools=0.952, failed=0, completion_tokens=31.9]step 1:  69%|██████▉   | 22/32 [00:02<00:00, 16.97it/s, reward=0.53, num_turns=1.95, num_tools=0.955, failed=0, completion_tokens=32.5] step 1:  72%|███████▏  | 23/32 [00:02<00:00, 17.29it/s, reward=0.53, num_turns=1.95, num_tools=0.955, failed=0, completion_tokens=32.5]step 1:  72%|███████▏  | 23/32 [00:02<00:00, 17.29it/s, reward=0.42, num_turns=1.96, num_tools=0.957, failed=0, completion_tokens=34.3]step 1:  75%|███████▌  | 24/32 [00:02<00:00, 17.29it/s, reward=0.319, num_turns=1.96, num_tools=0.958, failed=0, completion_tokens=36.3]step 1:  78%|███████▊  | 25/32 [00:02<00:00, 17.29it/s, reward=0.373, num_turns=1.96, num_tools=0.96, failed=0, completion_tokens=36.2] step 1:  81%|████████▏ | 26/32 [00:02<00:00, 15.03it/s, reward=0.373, num_turns=1.96, num_tools=0.96, failed=0, completion_tokens=36.2]step 1:  81%|████████▏ | 26/32 [00:02<00:00, 15.03it/s, reward=0.282, num_turns=1.96, num_tools=0.962, failed=0, completion_tokens=38.5]step 1:  84%|████████▍ | 27/32 [00:03<00:00, 15.03it/s, reward=0.16, num_turns=1.93, num_tools=0.926, failed=0, completion_tokens=50.6] step 1:  88%|████████▊ | 28/32 [00:03<00:00, 15.03it/s, reward=0.0833, num_turns=1.93, num_tools=0.929, failed=0, completion_tokens=56.6]step 1:  91%|█████████ | 29/32 [00:03<00:00,  6.02it/s, reward=0.0833, num_turns=1.93, num_tools=0.929, failed=0, completion_tokens=56.6]step 1:  91%|█████████ | 29/32 [00:03<00:00,  6.02it/s, reward=-0.023, num_turns=1.9, num_tools=0.897, failed=0, completion_tokens=72.3] step 1:  94%|█████████▍| 30/32 [00:04<00:00,  6.02it/s, reward=-0.0889, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=78.7] step 1:  97%|█████████▋| 31/32 [00:04<00:00,  6.05it/s, reward=-0.0889, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=78.7]step 1:  97%|█████████▋| 31/32 [00:04<00:00,  6.05it/s, reward=-0.151, num_turns=1.9, num_tools=0.903, failed=0, completion_tokens=84.9]step 1: 100%|██████████| 32/32 [00:04<00:00,  6.05it/s, reward=-0.208, num_turns=1.91, num_tools=0.906, failed=0, completion_tokens=90.8]step 1: 100%|██████████| 32/32 [00:04<00:00,  7.54it/s, reward=-0.208, num_turns=1.91, num_tools=0.906, failed=0, completion_tokens=90.8]
+  group 0: mean=-1.25 std=1.639 min=-3.0 max=+2.0 | What is the distance from Earth to the Sun in km i
+  group 1: mean=-0.71 std=1.679 min=-2.0 max=+1.7 | What is the population of India divided by its are
+  group 2: mean=-1.88 std=1.166 min=-3.0 max=+1.0 | How old was Einstein in 2020?
+  group 3: mean=+3.00 std=1.000 min=+2.0 max=+4.0 | What is 567 times 18?
+  Avg reward: -0.208 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+"./.art/rl-tool-use/models/qwen-0.5b-tool-agent/history.jsonl" not found
+Packed 32 trajectories into 4 sequences of length 2048
+train:   0%|          | 0/4 [00:00<?, ?it/s]The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None}.
+==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
+   \\   /|    Num examples = 10,000,000 | Num Epochs = 3 | Total steps = 30,000,000
+O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 1
+\        /    Data Parallel GPUs = 1 | Total batch size (2 x 1 x 1) = 2
+ "-____-"     Trainable parameters = 4,399,104 of 498,431,872 (0.88% trained)
+train:  25%|██▌       | 1/4 [00:10<00:30, 10.23s/it]train:  25%|██▌       | 1/4 [00:10<00:30, 10.23s/it, loss/train=-1.08, loss/grad_norm=0.826, loss/learning_rate=5e-5, loss/entropy=1.5]train:  50%|█████     | 2/4 [00:10<00:08,  4.43s/it, loss/train=-1.08, loss/grad_norm=0.826, loss/learning_rate=5e-5, loss/entropy=1.5]train:  50%|█████     | 2/4 [00:10<00:08,  4.43s/it, loss/train=-0.388, loss/grad_norm=2.59, loss/learning_rate=5e-5, loss/entropy=2.01]train:  75%|███████▌  | 3/4 [00:10<00:02,  2.57s/it, loss/train=-0.388, loss/grad_norm=2.59, loss/learning_rate=5e-5, loss/entropy=2.01]train:  75%|███████▌  | 3/4 [00:10<00:02,  2.57s/it, loss/train=-1.22, loss/grad_norm=0.875, loss/learning_rate=5e-5, loss/entropy=1.48]train: 100%|██████████| 4/4 [00:11<00:00,  1.70s/it, loss/train=-1.22, loss/grad_norm=0.875, loss/learning_rate=5e-5, loss/entropy=1.48]train: 100%|██████████| 4/4 [00:11<00:00,  1.70s/it, loss/train=-0.368, loss/grad_norm=1.93, loss/learning_rate=5e-5, loss/entropy=1.27](APIServer pid=12946) Adapters before cleanup: ['default']
+(APIServer pid=12946) Keeping active adapter(s): ['default']
+(APIServer pid=12946) Adapters after cleanup: ['default']
+train: 100%|██████████| 4/4 [00:39<00:00,  9.89s/it, loss/train=-0.368, loss/grad_norm=1.93, loss/learning_rate=5e-5, loss/entropy=1.27]
+
+============================================================
+Step 2/50
+============================================================
+step 2:   0%|          | 0/32 [00:00<?, ?it/s]step 2:   3%|▎         | 1/32 [00:01<00:34,  1.12s/it]step 2:   3%|▎         | 1/32 [00:01<00:34,  1.12s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=36]step 2:   6%|▋         | 2/32 [00:01<00:33,  1.12s/it, reward=-2.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=27.8]step 2:   9%|▉         | 3/32 [00:01<00:32,  1.12s/it, reward=-1, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=25.5]step 2:  12%|█▎        | 4/32 [00:01<00:31,  1.12s/it, reward=-0.25, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=24.4]step 2:  16%|█▌        | 5/32 [00:01<00:30,  1.12s/it, reward=0.2, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=23.7]    step 2:  19%|█▉        | 6/32 [00:01<00:04,  6.33it/s, reward=0.2, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=23.7]step 2:  19%|█▉        | 6/32 [00:01<00:04,  6.33it/s, reward=0.5, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=23.2]step 2:  22%|██▏       | 7/32 [00:01<00:03,  6.33it/s, reward=0.714, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=22.9]step 2:  25%|██▌       | 8/32 [00:01<00:03,  6.33it/s, reward=0.875, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=22.7]step 2:  28%|██▊       | 9/32 [00:01<00:03,  6.33it/s, reward=1, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=22.5]    step 2:  31%|███▏      | 10/32 [00:01<00:03,  6.33it/s, reward=1.3, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=23]  step 2:  34%|███▍      | 11/32 [00:01<00:03,  6.33it/s, reward=1.55, num_turns=1.91, num_tools=0.909, failed=0, completion_tokens=23.4]step 2:  38%|███▊      | 12/32 [00:01<00:03,  6.33it/s, reward=1.75, num_turns=1.92, num_tools=0.917, failed=0, completion_tokens=23.8]step 2:  41%|████      | 13/32 [00:01<00:03,  6.33it/s, reward=1.92, num_turns=1.92, num_tools=0.923, failed=0, completion_tokens=24.2]step 2:  44%|████▍     | 14/32 [00:01<00:02,  6.33it/s, reward=2.07, num_turns=1.93, num_tools=0.929, failed=0, completion_tokens=24.5]step 2:  47%|████▋     | 15/32 [00:01<00:02,  6.33it/s, reward=2.2, num_turns=1.93, num_tools=0.933, failed=0, completion_tokens=24.7] step 2:  50%|█████     | 16/32 [00:01<00:02,  6.33it/s, reward=2.31, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=24.9]step 2:  53%|█████▎    | 17/32 [00:01<00:02,  6.33it/s, reward=2.06, num_turns=1.94, num_tools=0.941, failed=0, completion_tokens=24.6]step 2:  56%|█████▋    | 18/32 [00:01<00:02,  6.33it/s, reward=1.83, num_turns=1.94, num_tools=0.944, failed=0, completion_tokens=25.3]step 2:  59%|█████▉    | 19/32 [00:01<00:02,  6.33it/s, reward=1.63, num_turns=1.95, num_tools=0.947, failed=0, completion_tokens=25.7]step 2:  62%|██████▎   | 20/32 [00:01<00:00, 24.63it/s, reward=1.63, num_turns=1.95, num_tools=0.947, failed=0, completion_tokens=25.7]step 2:  62%|██████▎   | 20/32 [00:01<00:00, 24.63it/s, reward=1.45, num_turns=1.95, num_tools=0.95, failed=0, completion_tokens=26]   step 2:  66%|██████▌   | 21/32 [00:01<00:00, 24.63it/s, reward=1.29, num_turns=1.95, num_tools=0.952, failed=0, completion_tokens=26.3]step 2:  69%|██████▉   | 22/32 [00:01<00:00, 24.63it/s, reward=1.41, num_turns=1.95, num_tools=0.955, failed=0, completion_tokens=26.2]step 2:  72%|███████▏  | 23/32 [00:01<00:00, 24.63it/s, reward=1.52, num_turns=1.96, num_tools=0.957, failed=0, completion_tokens=26.2]step 2:  75%|███████▌  | 24/32 [00:01<00:00, 24.63it/s, reward=1.38, num_turns=1.96, num_tools=0.958, failed=0, completion_tokens=26.1]step 2:  78%|███████▊  | 25/32 [00:01<00:00, 24.63it/s, reward=1.4, num_turns=1.96, num_tools=0.96, failed=0, completion_tokens=25.9]  step 2:  81%|████████▏ | 26/32 [00:01<00:00, 24.63it/s, reward=1.5, num_turns=1.96, num_tools=0.962, failed=0, completion_tokens=26] step 2:  84%|████████▍ | 27/32 [00:01<00:00, 24.63it/s, reward=1.59, num_turns=1.96, num_tools=0.963, failed=0, completion_tokens=26.1]step 2:  88%|████████▊ | 28/32 [00:01<00:00, 33.31it/s, reward=1.59, num_turns=1.96, num_tools=0.963, failed=0, completion_tokens=26.1]step 2:  88%|████████▊ | 28/32 [00:01<00:00, 33.31it/s, reward=1.68, num_turns=1.96, num_tools=0.964, failed=0, completion_tokens=26.1]step 2:  91%|█████████ | 29/32 [00:01<00:00, 33.31it/s, reward=1.76, num_turns=1.97, num_tools=0.966, failed=0, completion_tokens=26.2]step 2:  94%|█████████▍| 30/32 [00:01<00:00, 33.31it/s, reward=1.83, num_turns=1.97, num_tools=0.967, failed=0, completion_tokens=26.3]step 2:  97%|█████████▋| 31/32 [00:01<00:00, 33.31it/s, reward=1.9, num_turns=1.97, num_tools=0.968, failed=0, completion_tokens=26.3] step 2: 100%|██████████| 32/32 [00:01<00:00, 33.31it/s, reward=1.97, num_turns=1.97, num_tools=0.969, failed=0, completion_tokens=26.4]step 2: 100%|██████████| 32/32 [00:01<00:00, 22.07it/s, reward=1.97, num_turns=1.97, num_tools=0.969, failed=0, completion_tokens=26.4]
+  group 0: mean=+2.00 std=0.000 min=+2.0 max=+2.0 | What is 115 minus 94?
+  group 1: mean=+4.00 std=0.000 min=+4.0 max=+4.0 | Convert 33 kg to lbs.
+  group 2: mean=+4.00 std=0.000 min=+4.0 max=+4.0 | Convert 11 kg to lbs.
+  group 3: mean=-2.12 std=0.331 min=-3.0 max=-2.0 | What is the boiling point of water?
+  Avg reward: 1.969 | Avg tools/rollout: 1.0 | groups with variance: 1/4
+No "val/reward" metric found in history
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0000
+Packed 8 trajectories into 1 sequences of length 2048
+train:   0%|          | 0/1 [00:00<?, ?it/s]train: 100%|██████████| 1/1 [00:01<00:00,  1.86s/it]train: 100%|██████████| 1/1 [00:01<00:00,  1.86s/it, loss/train=-0.0999, loss/grad_norm=3, loss/learning_rate=5e-5, loss/entropy=0.884](APIServer pid=12946) Adapters before cleanup: ['default']
+(APIServer pid=12946) Keeping active adapter(s): ['default']
+(APIServer pid=12946) Adapters after cleanup: ['default']
+train: 100%|██████████| 1/1 [00:30<00:00, 30.06s/it, loss/train=-0.0999, loss/grad_norm=3, loss/learning_rate=5e-5, loss/entropy=0.884]
+
+============================================================
+Step 3/50
+============================================================
+step 3:   0%|          | 0/32 [00:00<?, ?it/s]step 3:   3%|▎         | 1/32 [00:01<00:34,  1.13s/it]step 3:   3%|▎         | 1/32 [00:01<00:34,  1.13s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=22]step 3:   6%|▋         | 2/32 [00:01<00:18,  1.64it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=22]step 3:   6%|▋         | 2/32 [00:01<00:18,  1.64it/s, reward=-0.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=21.8]step 3:   9%|▉         | 3/32 [00:01<00:17,  1.64it/s, reward=0.333, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=21.7]step 3:  12%|█▎        | 4/32 [00:01<00:17,  1.64it/s, reward=-0.25, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=22.2] step 3:  16%|█▌        | 5/32 [00:01<00:16,  1.64it/s, reward=-0.8, num_turns=1.6, num_tools=0.6, failed=0, completion_tokens=31.4]   step 3:  19%|█▉        | 6/32 [00:01<00:15,  1.64it/s, reward=-1, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=30.8]step 3:  22%|██▏       | 7/32 [00:01<00:15,  1.64it/s, reward=-0.286, num_turns=1.71, num_tools=0.714, failed=0, completion_tokens=30.4]step 3:  25%|██▌       | 8/32 [00:01<00:14,  1.64it/s, reward=0.25, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=30]     step 3:  28%|██▊       | 9/32 [00:01<00:14,  1.64it/s, reward=0.667, num_turns=1.78, num_tools=0.778, failed=0, completion_tokens=29.7]step 3:  31%|███▏      | 10/32 [00:01<00:13,  1.64it/s, reward=1, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=29.5]      step 3:  34%|███▍      | 11/32 [00:01<00:12,  1.64it/s, reward=1.21, num_turns=1.82, num_tools=0.818, failed=0, completion_tokens=29.4]step 3:  38%|███▊      | 12/32 [00:01<00:12,  1.64it/s, reward=1.44, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=29.2]step 3:  41%|████      | 13/32 [00:01<00:11,  1.64it/s, reward=1.64, num_turns=1.85, num_tools=0.846, failed=0, completion_tokens=29.1]step 3:  44%|████▍     | 14/32 [00:01<00:01, 15.53it/s, reward=1.64, num_turns=1.85, num_tools=0.846, failed=0, completion_tokens=29.1]step 3:  44%|████▍     | 14/32 [00:01<00:01, 15.53it/s, reward=1.81, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=29.2]step 3:  47%|████▋     | 15/32 [00:01<00:01, 15.53it/s, reward=1.82, num_turns=1.87, num_tools=0.867, failed=0, completion_tokens=28.7]step 3:  50%|█████     | 16/32 [00:01<00:01, 15.53it/s, reward=1.83, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=28.2]step 3:  53%|█████▎    | 17/32 [00:01<00:00, 15.53it/s, reward=1.84, num_turns=1.88, num_tools=0.882, failed=0, completion_tokens=27.9]step 3:  56%|█████▋    | 18/32 [00:01<00:00, 15.53it/s, reward=1.85, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=27.5]step 3:  59%|█████▉    | 19/32 [00:01<00:00, 15.53it/s, reward=1.86, num_turns=1.89, num_tools=0.895, failed=0, completion_tokens=27.2]step 3:  62%|██████▎   | 20/32 [00:01<00:00, 15.53it/s, reward=1.67, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=27.7]   step 3:  66%|██████▌   | 21/32 [00:01<00:00, 15.53it/s, reward=1.49, num_turns=1.9, num_tools=0.905, failed=0, completion_tokens=28.3]step 3:  69%|██████▉   | 22/32 [00:01<00:00, 15.53it/s, reward=1.33, num_turns=1.91, num_tools=0.909, failed=0, completion_tokens=28.8]step 3:  72%|███████▏  | 23/32 [00:01<00:00, 15.53it/s, reward=1.19, num_turns=1.91, num_tools=0.913, failed=0, completion_tokens=29.4]step 3:  75%|███████▌  | 24/32 [00:01<00:00, 27.08it/s, reward=1.19, num_turns=1.91, num_tools=0.913, failed=0, completion_tokens=29.4]step 3:  75%|███████▌  | 24/32 [00:01<00:00, 27.08it/s, reward=1.17, num_turns=1.92, num_tools=0.917, failed=0, completion_tokens=30.1]step 3:  78%|███████▊  | 25/32 [00:01<00:00, 27.08it/s, reward=1.04, num_turns=1.92, num_tools=0.92, failed=0, completion_tokens=30]   step 3:  81%|████████▏ | 26/32 [00:01<00:00, 27.08it/s, reward=1.03, num_turns=1.92, num_tools=0.923, failed=0, completion_tokens=30.8]step 3:  84%|████████▍ | 27/32 [00:01<00:00, 27.08it/s, reward=0.914, num_turns=1.93, num_tools=0.926, failed=0, completion_tokens=31.2]step 3:  88%|████████▊ | 28/32 [00:01<00:00, 27.08it/s, reward=0.929, num_turns=1.93, num_tools=0.929, failed=0, completion_tokens=32.7]step 3:  91%|█████████ | 29/32 [00:01<00:00, 27.08it/s, reward=0.954, num_turns=1.97, num_tools=0.966, failed=0, completion_tokens=33.1]step 3:  94%|█████████▍| 30/32 [00:01<00:00, 27.08it/s, reward=0.944, num_turns=1.97, num_tools=0.967, failed=0, completion_tokens=33.6]step 3:  97%|█████████▋| 31/32 [00:02<00:00, 15.58it/s, reward=0.944, num_turns=1.97, num_tools=0.967, failed=0, completion_tokens=33.6]step 3:  97%|█████████▋| 31/32 [00:02<00:00, 15.58it/s, reward=0.946, num_turns=1.97, num_tools=0.968, failed=0, completion_tokens=35.9]step 3: 100%|██████████| 32/32 [00:02<00:00, 15.58it/s, reward=0.854, num_turns=1.97, num_tools=0.969, failed=0, completion_tokens=40.7]step 3: 100%|██████████| 32/32 [00:02<00:00, 11.47it/s, reward=0.854, num_turns=1.97, num_tools=0.969, failed=0, completion_tokens=40.7]
+  group 0: mean=+1.50 std=1.323 min=-2.0 max=+2.0 | What is 995 minus 50?
+  group 1: mean=-0.88 std=1.462 min=-2.0 max=+1.3 | What is Germany's population density in people per
+  group 2: mean=+3.92 std=0.220 min=+3.3 max=+4.0 | Convert 5 kg to lbs.
+  group 3: mean=-1.12 std=1.715 min=-3.0 max=+1.7 | What is India's population density in people per s
+  Avg reward: 0.854 | Avg tools/rollout: 1.0 | groups with variance: 4/4
+No "val/reward" metric found in history
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0001
+Packed 32 trajectories into 3 sequences of length 2048
+train:   0%|          | 0/3 [00:00<?, ?it/s]train:  33%|███▎      | 1/3 [00:02<00:04,  2.03s/it]train:  33%|███▎      | 1/3 [00:02<00:04,  2.03s/it, loss/train=-0.323, loss/grad_norm=0.562, loss/learning_rate=5e-5, loss/entropy=0.441]train:  67%|██████▋   | 2/3 [00:02<00:01,  1.03s/it, loss/train=-0.323, loss/grad_norm=0.562, loss/learning_rate=5e-5, loss/entropy=0.441]train:  67%|██████▋   | 2/3 [00:02<00:01,  1.03s/it, loss/train=-0.0952, loss/grad_norm=3.2, loss/learning_rate=5e-5, loss/entropy=1.27]  train: 100%|██████████| 3/3 [00:02<00:00,  1.42it/s, loss/train=-0.0952, loss/grad_norm=3.2, loss/learning_rate=5e-5, loss/entropy=1.27]train: 100%|██████████| 3/3 [00:02<00:00,  1.42it/s, loss/train=0.106, loss/grad_norm=1.33, loss/learning_rate=5e-5, loss/entropy=0.686](APIServer pid=12946) Adapters before cleanup: ['default']
+(APIServer pid=12946) Keeping active adapter(s): ['default']
+(APIServer pid=12946) Adapters after cleanup: ['default']
+train: 100%|██████████| 3/3 [00:30<00:00, 10.24s/it, loss/train=0.106, loss/grad_norm=1.33, loss/learning_rate=5e-5, loss/entropy=0.686]
+
+============================================================
+Step 4/50
+============================================================
+step 4:   0%|          | 0/32 [00:00<?, ?it/s]step 4:   3%|▎         | 1/32 [00:01<00:38,  1.25s/it]step 4:   3%|▎         | 1/32 [00:01<00:38,  1.25s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=21.5]step 4:   6%|▋         | 2/32 [00:01<00:37,  1.25s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=21.5]step 4:   9%|▉         | 3/32 [00:01<00:36,  1.25s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=21.7]step 4:  12%|█▎        | 4/32 [00:01<00:34,  1.25s/it, reward=3.5, num_turns=2, num_tools=1, failed=0, completion_tokens=21.6]step 4:  16%|█▌        | 5/32 [00:01<00:33,  1.25s/it, reward=3.2, num_turns=2, num_tools=1, failed=0, completion_tokens=21.6]step 4:  19%|█▉        | 6/32 [00:01<00:32,  1.25s/it, reward=3, num_turns=2, num_tools=1, failed=0, completion_tokens=21.6]  step 4:  22%|██▏       | 7/32 [00:01<00:31,  1.25s/it, reward=2.86, num_turns=2, num_tools=1, failed=0, completion_tokens=21.6]step 4:  25%|██▌       | 8/32 [00:01<00:29,  1.25s/it, reward=2.75, num_turns=2, num_tools=1, failed=0, completion_tokens=21.6]step 4:  28%|██▊       | 9/32 [00:01<00:28,  1.25s/it, reward=2.67, num_turns=2, num_tools=1, failed=0, completion_tokens=21.6]step 4:  31%|███▏      | 10/32 [00:01<00:27,  1.25s/it, reward=2.6, num_turns=2, num_tools=1, failed=0, completion_tokens=21.6]step 4:  34%|███▍      | 11/32 [00:01<00:26,  1.25s/it, reward=2.55, num_turns=2, num_tools=1, failed=0, completion_tokens=21.5]step 4:  38%|███▊      | 12/32 [00:01<00:24,  1.25s/it, reward=2.17, num_turns=2, num_tools=1, failed=0, completion_tokens=21.7]step 4:  41%|████      | 13/32 [00:01<00:23,  1.25s/it, reward=2.12, num_turns=2, num_tools=1, failed=0, completion_tokens=21.9]step 4:  44%|████▍     | 14/32 [00:01<00:01,  9.43it/s, reward=2.12, num_turns=2, num_tools=1, failed=0, completion_tokens=21.9]step 4:  44%|████▍     | 14/32 [00:01<00:01,  9.43it/s, reward=1.82, num_turns=2, num_tools=1, failed=0, completion_tokens=22.4]step 4:  47%|████▋     | 15/32 [00:01<00:01,  9.43it/s, reward=1.9, num_turns=2, num_tools=1, failed=0, completion_tokens=22.6] step 4:  50%|█████     | 16/32 [00:01<00:01,  9.43it/s, reward=1.97, num_turns=2, num_tools=1, failed=0, completion_tokens=22.8]step 4:  53%|█████▎    | 17/32 [00:01<00:01,  9.43it/s, reward=2.03, num_turns=2, num_tools=1, failed=0, completion_tokens=23.1]step 4:  56%|█████▋    | 18/32 [00:01<00:01,  9.43it/s, reward=2.14, num_turns=2, num_tools=1, failed=0, completion_tokens=22.9]step 4:  59%|█████▉    | 19/32 [00:01<00:01,  9.43it/s, reward=2.24, num_turns=2, num_tools=1, failed=0, completion_tokens=22.8]step 4:  62%|██████▎   | 20/32 [00:01<00:01,  9.43it/s, reward=2.15, num_turns=2, num_tools=1, failed=0, completion_tokens=23]  step 4:  66%|██████▌   | 21/32 [00:01<00:01,  9.43it/s, reward=2.07, num_turns=2, num_tools=1, failed=0, completion_tokens=23.2]step 4:  69%|██████▉   | 22/32 [00:01<00:01,  9.43it/s, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=23.5]   step 4:  72%|███████▏  | 23/32 [00:01<00:00,  9.43it/s, reward=1.93, num_turns=2, num_tools=1, failed=0, completion_tokens=23.8]step 4:  75%|███████▌  | 24/32 [00:01<00:00,  9.43it/s, reward=1.88, num_turns=2, num_tools=1, failed=0, completion_tokens=24.1]step 4:  78%|███████▊  | 25/32 [00:01<00:00,  9.43it/s, reward=1.82, num_turns=2, num_tools=1, failed=0, completion_tokens=24.5]step 4:  81%|████████▏ | 26/32 [00:01<00:00,  9.43it/s, reward=1.63, num_turns=1.96, num_tools=0.962, failed=0, completion_tokens=28.7]step 4:  84%|████████▍ | 27/32 [00:01<00:00,  9.43it/s, reward=1.61, num_turns=1.96, num_tools=0.963, failed=0, completion_tokens=28.7]step 4:  88%|████████▊ | 28/32 [00:01<00:00,  9.43it/s, reward=1.48, num_turns=1.96, num_tools=0.964, failed=0, completion_tokens=29]  step 4:  91%|█████████ | 29/32 [00:01<00:00,  9.43it/s, reward=1.36, num_turns=1.97, num_tools=0.966, failed=0, completion_tokens=29.3]step 4:  94%|█████████▍| 30/32 [00:01<00:00,  9.43it/s, reward=1.22, num_turns=1.93, num_tools=0.933, failed=0, completion_tokens=32.1]step 4:  97%|█████████▋| 31/32 [00:01<00:00,  9.43it/s, reward=1.21, num_turns=1.94, num_tools=0.935, failed=0, completion_tokens=33]  step 4: 100%|██████████| 32/32 [00:02<00:00, 22.73it/s, reward=1.21, num_turns=1.94, num_tools=0.935, failed=0, completion_tokens=33]step 4: 100%|██████████| 32/32 [00:02<00:00, 22.73it/s, reward=1.19, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=35.7]step 4: 100%|██████████| 32/32 [00:02<00:00, 15.81it/s, reward=1.19, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=35.7]
+  group 0: mean=-1.04 std=1.531 min=-3.0 max=+1.0 | What is Germany's population density in people per
+  group 1: mean=+3.62 std=0.484 min=+3.0 max=+4.0 | What's the weather like in London?
+  group 2: mean=+0.19 std=1.248 min=-3.0 max=+1.5 | What is the temperature in Mumbai in Fahrenheit?
+  group 3: mean=+2.00 std=0.000 min=+2.0 max=+2.0 | What is 543 plus 96?
+  Avg reward: 1.193 | Avg tools/rollout: 0.9 | groups with variance: 3/4
+No "val/reward" metric found in history
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0002
+Packed 24 trajectories into 2 sequences of length 2048
+train:   0%|          | 0/2 [00:00<?, ?it/s]train:  50%|█████     | 1/2 [00:01<00:01,  1.97s/it]train:  50%|█████     | 1/2 [00:01<00:01,  1.97s/it, loss/train=0.0658, loss/grad_norm=1.35, loss/learning_rate=5e-5, loss/entropy=0.857]train: 100%|██████████| 2/2 [00:02<00:00,  1.02it/s, loss/train=0.0658, loss/grad_norm=1.35, loss/learning_rate=5e-5, loss/entropy=0.857]train: 100%|██████████| 2/2 [00:02<00:00,  1.02it/s, loss/train=-0.501, loss/grad_norm=1.91, loss/learning_rate=5e-5, loss/entropy=0.515](APIServer pid=12946) Adapters before cleanup: ['default']
+(APIServer pid=12946) Keeping active adapter(s): ['default']
+(APIServer pid=12946) Adapters after cleanup: ['default']
+train: 100%|██████████| 2/2 [00:30<00:00, 15.18s/it, loss/train=-0.501, loss/grad_norm=1.91, loss/learning_rate=5e-5, loss/entropy=0.515]
+
+============================================================
+Step 5/50
+============================================================
+step 5:   0%|          | 0/32 [00:00<?, ?it/s]step 5:   3%|▎         | 1/32 [00:01<00:35,  1.15s/it]step 5:   3%|▎         | 1/32 [00:01<00:35,  1.15s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=28]step 5:   6%|▋         | 2/32 [00:01<00:16,  1.82it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=28]step 5:   6%|▋         | 2/32 [00:01<00:16,  1.82it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=34]step 5:   9%|▉         | 3/32 [00:01<00:15,  1.82it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=37]step 5:  12%|█▎        | 4/32 [00:01<00:15,  1.82it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=38.8]step 5:  16%|█▌        | 5/32 [00:01<00:05,  5.27it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=38.8]step 5:  16%|█▌        | 5/32 [00:01<00:05,  5.27it/s, reward=-2.8, num_turns=1.2, num_tools=0.2, failed=0, completion_tokens=36.2]step 5:  19%|█▉        | 6/32 [00:01<00:04,  5.27it/s, reward=-2.83, num_turns=1.17, num_tools=0.167, failed=0, completion_tokens=41.5]step 5:  22%|██▏       | 7/32 [00:01<00:04,  5.27it/s, reward=-2.71, num_turns=1.29, num_tools=0.286, failed=0, completion_tokens=39.4]step 5:  25%|██▌       | 8/32 [00:01<00:04,  5.27it/s, reward=-2.62, num_turns=1.38, num_tools=0.375, failed=0, completion_tokens=37.9]step 5:  28%|██▊       | 9/32 [00:01<00:04,  5.27it/s, reward=-2.56, num_turns=1.44, num_tools=0.444, failed=0, completion_tokens=36.6]step 5:  31%|███▏      | 10/32 [00:01<00:04,  5.27it/s, reward=-2.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=35.8]   step 5:  34%|███▍      | 11/32 [00:01<00:03,  5.27it/s, reward=-2.45, num_turns=1.55, num_tools=0.545, failed=0, completion_tokens=35.5]step 5:  38%|███▊      | 12/32 [00:01<00:01, 14.50it/s, reward=-2.45, num_turns=1.55, num_tools=0.545, failed=0, completion_tokens=35.5]step 5:  38%|███▊      | 12/32 [00:01<00:01, 14.50it/s, reward=-2.42, num_turns=1.58, num_tools=0.583, failed=0, completion_tokens=35.7]step 5:  41%|████      | 13/32 [00:01<00:01, 14.50it/s, reward=-2.38, num_turns=1.62, num_tools=0.615, failed=0, completion_tokens=35.8]step 5:  44%|████▍     | 14/32 [00:01<00:01, 14.50it/s, reward=-2.36, num_turns=1.64, num_tools=0.643, failed=0, completion_tokens=36.1]step 5:  47%|████▋     | 15/32 [00:01<00:01, 14.50it/s, reward=-2.33, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=35.5]step 5:  50%|█████     | 16/32 [00:01<00:01, 14.50it/s, reward=-2.31, num_turns=1.69, num_tools=0.688, failed=0, completion_tokens=34.7]step 5:  53%|█████▎    | 17/32 [00:01<00:01, 14.50it/s, reward=-2.29, num_turns=1.71, num_tools=0.706, failed=0, completion_tokens=35.3]step 5:  56%|█████▋    | 18/32 [00:01<00:00, 21.49it/s, reward=-2.29, num_turns=1.71, num_tools=0.706, failed=0, completion_tokens=35.3]step 5:  56%|█████▋    | 18/32 [00:01<00:00, 21.49it/s, reward=-2.28, num_turns=1.72, num_tools=0.722, failed=0, completion_tokens=35.9]step 5:  59%|█████▉    | 19/32 [00:01<00:00, 21.49it/s, reward=-2.05, num_turns=1.74, num_tools=0.737, failed=0, completion_tokens=35.4]step 5:  62%|██████▎   | 20/32 [00:01<00:00, 21.49it/s, reward=-2.05, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=36.2] step 5:  66%|██████▌   | 21/32 [00:01<00:00, 21.49it/s, reward=-2.05, num_turns=1.76, num_tools=0.762, failed=0, completion_tokens=37.2]step 5:  69%|██████▉   | 22/32 [00:01<00:00, 24.26it/s, reward=-2.05, num_turns=1.76, num_tools=0.762, failed=0, completion_tokens=37.2]step 5:  69%|██████▉   | 22/32 [00:01<00:00, 24.26it/s, reward=-2.05, num_turns=1.77, num_tools=0.773, failed=0, completion_tokens=38.1]step 5:  72%|███████▏  | 23/32 [00:01<00:00, 24.26it/s, reward=-2.04, num_turns=1.78, num_tools=0.783, failed=0, completion_tokens=37.7]step 5:  75%|███████▌  | 24/32 [00:01<00:00, 24.26it/s, reward=-2.04, num_turns=1.79, num_tools=0.792, failed=0, completion_tokens=38.2]step 5:  78%|███████▊  | 25/32 [00:01<00:00, 24.26it/s, reward=-2.04, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=37.8]   step 5:  81%|████████▏ | 26/32 [00:01<00:00, 23.28it/s, reward=-2.04, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=37.8]step 5:  81%|████████▏ | 26/32 [00:01<00:00, 23.28it/s, reward=-2.04, num_turns=1.81, num_tools=0.808, failed=0, completion_tokens=38.3]step 5:  84%|████████▍ | 27/32 [00:02<00:00, 23.28it/s, reward=-2.04, num_turns=1.81, num_tools=0.815, failed=0, completion_tokens=40.4]step 5:  88%|████████▊ | 28/32 [00:02<00:00, 23.28it/s, reward=-2.07, num_turns=1.79, num_tools=0.786, failed=0, completion_tokens=47.9]step 5:  91%|█████████ | 29/32 [00:03<00:00, 23.28it/s, reward=-2.1, num_turns=1.76, num_tools=0.759, failed=0, completion_tokens=63.7] step 5:  94%|█████████▍| 30/32 [00:03<00:00,  7.07it/s, reward=-2.1, num_turns=1.76, num_tools=0.759, failed=0, completion_tokens=63.7]step 5:  94%|█████████▍| 30/32 [00:03<00:00,  7.07it/s, reward=-2.1, num_turns=1.77, num_tools=0.8, failed=0, completion_tokens=70]    step 5:  97%|█████████▋| 31/32 [00:03<00:00,  7.07it/s, reward=-2.1, num_turns=1.77, num_tools=0.806, failed=0, completion_tokens=76.4]step 5: 100%|██████████| 32/32 [00:03<00:00,  7.07it/s, reward=-2.09, num_turns=1.78, num_tools=0.812, failed=0, completion_tokens=82.7]step 5: 100%|██████████| 32/32 [00:03<00:00,  8.72it/s, reward=-2.09, num_turns=1.78, num_tools=0.812, failed=0, completion_tokens=82.7]
+  group 0: mean=-2.00 std=1.581 min=-3.0 max=+2.0 | What is the tallest mountain?
+  group 1: mean=-2.00 std=0.000 min=-2.0 max=-2.0 | Which is hotter right now, Tokyo or Mumbai?
+  group 2: mean=-2.25 std=0.433 min=-3.0 max=-2.0 | What is the population of France divided by its ar
+  group 3: mean=-2.12 std=0.331 min=-3.0 max=-2.0 | What is the GDP of France?
+  Avg reward: -2.094 | Avg tools/rollout: 0.8 | groups with variance: 3/4
+No "val/reward" metric found in history
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0003
+Packed 21 trajectories into 2 sequences of length 2048
+train:   0%|          | 0/2 [00:00<?, ?it/s]train:  50%|█████     | 1/2 [00:02<00:02,  2.12s/it]train:  50%|█████     | 1/2 [00:02<00:02,  2.12s/it, loss/train=-1.41, loss/grad_norm=1.82, loss/learning_rate=5e-5, loss/entropy=1.69]train: 100%|██████████| 2/2 [00:02<00:00,  1.09s/it, loss/train=-1.41, loss/grad_norm=1.82, loss/learning_rate=5e-5, loss/entropy=1.69]train: 100%|██████████| 2/2 [00:02<00:00,  1.09s/it, loss/train=-0.442, loss/grad_norm=7.41, loss/learning_rate=5e-5, loss/entropy=1.25](APIServer pid=12946) Adapters before cleanup: ['default']
+(APIServer pid=12946) Keeping active adapter(s): ['default']
+(APIServer pid=12946) Adapters after cleanup: ['default']
+train: 100%|██████████| 2/2 [00:30<00:00, 15.43s/it, loss/train=-0.442, loss/grad_norm=7.41, loss/learning_rate=5e-5, loss/entropy=1.25]
+  Running validation...
+validation:   0%|          | 0/400 [00:00<?, ?it/s]validation:   0%|          | 1/400 [00:10<1:12:15, 10.87s/it]validation:   0%|          | 1/400 [00:10<1:12:15, 10.87s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=44]validation:   0%|          | 2/400 [00:11<31:36,  4.77s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=44]  validation:   0%|          | 2/400 [00:11<31:36,  4.77s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=37]validation:   1%|          | 3/400 [00:11<31:31,  4.77s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=49.3]validation:   1%|          | 4/400 [00:11<31:27,  4.77s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=54]  validation:   1%|▏         | 5/400 [00:11<31:22,  4.77s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=46.2]validation:   2%|▏         | 6/400 [00:11<07:32,  1.15s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=46.2]validation:   2%|▏         | 6/400 [00:11<07:32,  1.15s/it, reward=-2.83, num_turns=1.17, num_tools=0.167, failed=0, completion_tokens=42.2]validation:   2%|▏         | 7/400 [00:11<06:10,  1.06it/s, reward=-2.83, num_turns=1.17, num_tools=0.167, failed=0, completion_tokens=42.2]validation:   2%|▏         | 7/400 [00:11<06:10,  1.06it/s, reward=-2.86, num_turns=1.14, num_tools=0.143, failed=0, completion_tokens=42.6]validation:   2%|▏         | 8/400 [00:11<06:09,  1.06it/s, reward=-2.88, num_turns=1.12, num_tools=0.125, failed=0, completion_tokens=48.2]validation:   2%|▏         | 9/400 [00:12<04:04,  1.60it/s, reward=-2.88, num_turns=1.12, num_tools=0.125, failed=0, completion_tokens=48.2]validation:   2%|▏         | 9/400 [00:12<04:04,  1.60it/s, reward=-2.89, num_turns=1.11, num_tools=0.111, failed=0, completion_tokens=45.3]validation:   2%|▎         | 10/400 [00:12<04:03,  1.60it/s, reward=-2.9, num_turns=1.1, num_tools=0.1, failed=0, completion_tokens=43.3]   validation:   3%|▎         | 11/400 [00:12<02:46,  2.34it/s, reward=-2.9, num_turns=1.1, num_tools=0.1, failed=0, completion_tokens=43.3]validation:   3%|▎         | 11/400 [00:12<02:46,  2.34it/s, reward=-2.91, num_turns=1.09, num_tools=0.0909, failed=0, completion_tokens=42]validation:   3%|▎         | 12/400 [00:12<02:45,  2.34it/s, reward=-2.92, num_turns=1.08, num_tools=0.0833, failed=0, completion_tokens=40]validation:   3%|▎         | 13/400 [00:12<02:45,  2.34it/s, reward=-2.38, num_turns=1.15, num_tools=0.154, failed=0, completion_tokens=39.1]validation:   4%|▎         | 14/400 [00:12<02:45,  2.34it/s, reward=-1.93, num_turns=1.21, num_tools=0.214, failed=0, completion_tokens=38.3]validation:   4%|▍         | 15/400 [00:12<02:44,  2.34it/s, reward=-2, num_turns=1.2, num_tools=0.2, failed=0, completion_tokens=37.3]      validation:   4%|▍         | 16/400 [00:12<02:44,  2.34it/s, reward=-2.06, num_turns=1.19, num_tools=0.188, failed=0, completion_tokens=38.4]validation:   4%|▍         | 17/400 [00:12<02:43,  2.34it/s, reward=-2.06, num_turns=1.24, num_tools=0.235, failed=0, completion_tokens=38]  validation:   4%|▍         | 18/400 [00:12<02:43,  2.34it/s, reward=-2.06, num_turns=1.28, num_tools=0.278, failed=0, completion_tokens=37.4]validation:   5%|▍         | 19/400 [00:12<02:42,  2.34it/s, reward=-2.05, num_turns=1.32, num_tools=0.316, failed=0, completion_tokens=37.1]validation:   5%|▌         | 20/400 [00:12<02:42,  2.34it/s, reward=-2.1, num_turns=1.3, num_tools=0.3, failed=0, completion_tokens=39.4]    validation:   5%|▌         | 21/400 [00:12<02:42,  2.34it/s, reward=-1.93, num_turns=1.33, num_tools=0.333, failed=0, completion_tokens=39.5]validation:   6%|▌         | 22/400 [00:12<02:41,  2.34it/s, reward=-1.98, num_turns=1.32, num_tools=0.318, failed=0, completion_tokens=48.6]validation:   6%|▌         | 23/400 [00:12<02:41,  2.34it/s, reward=-2.02, num_turns=1.3, num_tools=0.304, failed=0, completion_tokens=53.1] validation:   6%|▌         | 24/400 [00:12<02:40,  2.34it/s, reward=-1.8, num_turns=1.33, num_tools=0.333, failed=0, completion_tokens=52.1]validation:   6%|▋         | 25/400 [00:12<02:40,  2.34it/s, reward=-1.81, num_turns=1.36, num_tools=0.36, failed=0, completion_tokens=52.2]validation:   6%|▋         | 26/400 [00:12<02:39,  2.34it/s, reward=-1.81, num_turns=1.38, num_tools=0.385, failed=0, completion_tokens=52.5]validation:   7%|▋         | 27/400 [00:12<00:40,  9.10it/s, reward=-1.81, num_turns=1.38, num_tools=0.385, failed=0, completion_tokens=52.5]validation:   7%|▋         | 27/400 [00:12<00:40,  9.10it/s, reward=-1.64, num_turns=1.41, num_tools=0.407, failed=0, completion_tokens=51.5]validation:   7%|▋         | 28/400 [00:12<00:40,  9.10it/s, reward=-1.65, num_turns=1.43, num_tools=0.429, failed=0, completion_tokens=50.4]validation:   7%|▋         | 29/400 [00:12<00:40,  9.10it/s, reward=-1.57, num_turns=1.45, num_tools=0.448, failed=0, completion_tokens=49.6]validation:   8%|▊         | 30/400 [00:12<00:40,  9.10it/s, reward=-1.51, num_turns=1.47, num_tools=0.467, failed=0, completion_tokens=48.9]validation:   8%|▊         | 31/400 [00:12<00:40,  9.10it/s, reward=-1.52, num_turns=1.48, num_tools=0.484, failed=0, completion_tokens=48.1]validation:   8%|▊         | 32/400 [00:12<00:40,  9.10it/s, reward=-1.57, num_turns=1.47, num_tools=0.469, failed=0, completion_tokens=47.4][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:   8%|▊         | 33/400 [00:12<00:32, 11.43it/s, reward=-1.57, num_turns=1.47, num_tools=0.469, failed=0, completion_tokens=47.4]validation:   8%|▊         | 33/400 [00:12<00:32, 11.43it/s, reward=-1.61, num_turns=1.45, num_tools=0.455, failed=0.0303, completion_tokens=47.4]validation:   8%|▊         | 34/400 [00:12<00:32, 11.43it/s, reward=-1.65, num_turns=1.44, num_tools=0.441, failed=0.0588, completion_tokens=47.4]validation:   9%|▉         | 35/400 [00:12<00:31, 11.43it/s, reward=-1.69, num_turns=1.43, num_tools=0.429, failed=0.0857, completion_tokens=47.4]validation:   9%|▉         | 36/400 [00:12<00:31, 11.43it/s, reward=-1.73, num_turns=1.42, num_tools=0.417, failed=0.111, completion_tokens=47.4] validation:   9%|▉         | 37/400 [00:12<00:31, 11.43it/s, reward=-1.76, num_turns=1.41, num_tools=0.405, failed=0.135, completion_tokens=47.4]validation:  10%|▉         | 38/400 [00:12<00:31, 11.43it/s, reward=-1.79, num_turns=1.39, num_tools=0.395, failed=0.158, completion_tokens=47.4]validation:  10%|▉         | 39/400 [00:12<00:31, 11.43it/s, reward=-1.82, num_turns=1.38, num_tools=0.385, failed=0.179, completion_tokens=47.4]validation:  10%|█         | 40/400 [00:12<00:31, 11.43it/s, reward=-1.85, num_turns=1.38, num_tools=0.375, failed=0.2, completion_tokens=47.4]  validation:  10%|█         | 41/400 [00:12<00:31, 11.43it/s, reward=-1.88, num_turns=1.37, num_tools=0.366, failed=0.22, completion_tokens=47.4]validation:  10%|█         | 42/400 [00:12<00:31, 11.43it/s, reward=-1.91, num_turns=1.36, num_tools=0.357, failed=0.238, completion_tokens=47.4]validation:  11%|█         | 43/400 [00:12<00:31, 11.43it/s, reward=-1.93, num_turns=1.35, num_tools=0.349, failed=0.256, completion_tokens=47.4]validation:  11%|█         | 44/400 [00:12<00:31, 11.43it/s, reward=-1.96, num_turns=1.34, num_tools=0.341, failed=0.273, completion_tokens=47.4]validation:  11%|█▏        | 45/400 [00:12<00:31, 11.43it/s, reward=-1.98, num_turns=1.33, num_tools=0.333, failed=0.289, completion_tokens=47.4]validation:  12%|█▏        | 46/400 [00:12<00:30, 11.43it/s, reward=-2, num_turns=1.33, num_tools=0.326, failed=0.304, completion_tokens=47.4]   [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  12%|█▏        | 47/400 [00:13<00:18, 19.01it/s, reward=-2, num_turns=1.33, num_tools=0.326, failed=0.304, completion_tokens=47.4]validation:  12%|█▏        | 47/400 [00:13<00:18, 19.01it/s, reward=-2.02, num_turns=1.32, num_tools=0.319, failed=0.319, completion_tokens=47.4]validation:  12%|█▏        | 48/400 [00:13<00:18, 19.01it/s, reward=-2.05, num_turns=1.31, num_tools=0.312, failed=0.333, completion_tokens=47.4]validation:  12%|█▏        | 49/400 [00:13<00:18, 19.01it/s, reward=-2.06, num_turns=1.31, num_tools=0.306, failed=0.347, completion_tokens=47.4]validation:  12%|█▎        | 50/400 [00:13<00:18, 19.01it/s, reward=-2.08, num_turns=1.3, num_tools=0.3, failed=0.36, completion_tokens=47.4]    validation:  13%|█▎        | 51/400 [00:13<00:18, 19.01it/s, reward=-2.1, num_turns=1.29, num_tools=0.294, failed=0.373, completion_tokens=47.4]validation:  13%|█▎        | 52/400 [00:13<00:18, 19.01it/s, reward=-2.12, num_turns=1.29, num_tools=0.288, failed=0.385, completion_tokens=47.4]validation:  13%|█▎        | 53/400 [00:13<00:18, 19.01it/s, reward=-2.14, num_turns=1.28, num_tools=0.283, failed=0.396, completion_tokens=47.4]validation:  14%|█▎        | 54/400 [00:13<00:18, 19.01it/s, reward=-2.15, num_turns=1.28, num_tools=0.278, failed=0.407, completion_tokens=47.4]validation:  14%|█▍        | 55/400 [00:13<00:18, 19.01it/s, reward=-2.17, num_turns=1.27, num_tools=0.273, failed=0.418, completion_tokens=47.4]validation:  14%|█▍        | 56/400 [00:13<00:18, 19.01it/s, reward=-2.18, num_turns=1.27, num_tools=0.268, failed=0.429, completion_tokens=47.4]validation:  14%|█▍        | 57/400 [00:13<00:18, 19.01it/s, reward=-2.2, num_turns=1.26, num_tools=0.263, failed=0.439, completion_tokens=47.4] validation:  14%|█▍        | 58/400 [00:13<00:17, 19.01it/s, reward=-2.21, num_turns=1.26, num_tools=0.259, failed=0.448, completion_tokens=47.4]validation:  15%|█▍        | 59/400 [00:13<00:17, 19.01it/s, reward=-2.22, num_turns=1.25, num_tools=0.254, failed=0.458, completion_tokens=47.4]validation:  15%|█▌        | 60/400 [00:13<00:17, 19.01it/s, reward=-2.24, num_turns=1.25, num_tools=0.25, failed=0.467, completion_tokens=47.4] validation:  15%|█▌        | 61/400 [00:13<00:17, 19.01it/s, reward=-2.25, num_turns=1.25, num_tools=0.246, failed=0.475, completion_tokens=47.4]validation:  16%|█▌        | 62/400 [00:13<00:17, 19.01it/s, reward=-2.26, num_turns=1.24, num_tools=0.242, failed=0.484, completion_tokens=47.4]validation:  16%|█▌        | 63/400 [00:13<00:17, 19.01it/s, reward=-2.27, num_turns=1.24, num_tools=0.238, failed=0.492, completion_tokens=47.4]validation:  16%|█▌        | 64/400 [00:13<00:17, 19.01it/s, reward=-2.28, num_turns=1.23, num_tools=0.234, failed=0.5, completion_tokens=47.4]  validation:  16%|█▋        | 65/400 [00:13<00:17, 19.01it/s, reward=-2.29, num_turns=1.23, num_tools=0.231, failed=0.508, completion_tokens=47.4]validation:  16%|█▋        | 66/400 [00:13<00:17, 19.01it/s, reward=-2.31, num_turns=1.23, num_tools=0.227, failed=0.515, completion_tokens=47.4]validation:  17%|█▋        | 67/400 [00:13<00:17, 19.01it/s, reward=-2.32, num_turns=1.22, num_tools=0.224, failed=0.522, completion_tokens=47.4]validation:  17%|█▋        | 68/400 [00:13<00:17, 19.01it/s, reward=-2.33, num_turns=1.22, num_tools=0.221, failed=0.529, completion_tokens=47.4]validation:  17%|█▋        | 69/400 [00:13<00:17, 19.01it/s, reward=-2.34, num_turns=1.22, num_tools=0.217, failed=0.536, completion_tokens=47.4]validation:  18%|█▊        | 70/400 [00:13<00:17, 19.01it/s, reward=-2.35, num_turns=1.21, num_tools=0.214, failed=0.543, completion_tokens=47.4]validation:  18%|█▊        | 71/400 [00:13<00:17, 19.01it/s, reward=-2.35, num_turns=1.21, num_tools=0.211, failed=0.549, completion_tokens=47.4]validation:  18%|█▊        | 72/400 [00:13<00:17, 19.01it/s, reward=-2.36, num_turns=1.21, num_tools=0.208, failed=0.556, completion_tokens=47.4]validation:  18%|█▊        | 73/400 [00:13<00:17, 19.01it/s, reward=-2.37, num_turns=1.21, num_tools=0.205, failed=0.562, completion_tokens=47.4][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  18%|█▊        | 74/400 [00:13<00:17, 19.01it/s, reward=-2.38, num_turns=1.2, num_tools=0.203, failed=0.568, completion_tokens=47.4] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  19%|█▉        | 75/400 [00:13<00:08, 39.70it/s, reward=-2.38, num_turns=1.2, num_tools=0.203, failed=0.568, completion_tokens=47.4]validation:  19%|█▉        | 75/400 [00:13<00:08, 39.70it/s, reward=-2.39, num_turns=1.2, num_tools=0.2, failed=0.573, completion_tokens=47.4]  validation:  19%|█▉        | 76/400 [00:13<00:08, 39.70it/s, reward=-2.4, num_turns=1.2, num_tools=0.197, failed=0.579, completion_tokens=47.4][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  19%|█▉        | 77/400 [00:13<00:08, 39.70it/s, reward=-2.4, num_turns=1.19, num_tools=0.195, failed=0.584, completion_tokens=47.4]validation:  20%|█▉        | 78/400 [00:13<00:08, 39.70it/s, reward=-2.41, num_turns=1.19, num_tools=0.192, failed=0.59, completion_tokens=47.4]validation:  20%|█▉        | 79/400 [00:13<00:08, 39.70it/s, reward=-2.42, num_turns=1.19, num_tools=0.19, failed=0.595, completion_tokens=47.4]validation:  20%|██        | 80/400 [00:13<00:08, 39.70it/s, reward=-2.43, num_turns=1.19, num_tools=0.188, failed=0.6, completion_tokens=47.4] validation:  20%|██        | 81/400 [00:13<00:08, 39.70it/s, reward=-2.43, num_turns=1.19, num_tools=0.185, failed=0.605, completion_tokens=47.4]validation:  20%|██        | 82/400 [00:13<00:08, 39.70it/s, reward=-2.44, num_turns=1.18, num_tools=0.183, failed=0.61, completion_tokens=47.4] validation:  21%|██        | 83/400 [00:13<00:07, 39.70it/s, reward=-2.45, num_turns=1.18, num_tools=0.181, failed=0.614, completion_tokens=47.4]validation:  21%|██        | 84/400 [00:13<00:07, 39.70it/s, reward=-2.45, num_turns=1.18, num_tools=0.179, failed=0.619, completion_tokens=47.4]validation:  21%|██▏       | 85/400 [00:13<00:07, 39.70it/s, reward=-2.46, num_turns=1.18, num_tools=0.176, failed=0.624, completion_tokens=47.4]validation:  22%|██▏       | 86/400 [00:13<00:07, 39.70it/s, reward=-2.47, num_turns=1.17, num_tools=0.174, failed=0.628, completion_tokens=47.4]validation:  22%|██▏       | 87/400 [00:13<00:06, 44.77it/s, reward=-2.47, num_turns=1.17, num_tools=0.174, failed=0.628, completion_tokens=47.4]validation:  22%|██▏       | 87/400 [00:13<00:06, 44.77it/s, reward=-2.46, num_turns=1.18, num_tools=0.184, failed=0.621, completion_tokens=46.6]validation:  22%|██▏       | 88/400 [00:13<00:06, 44.77it/s, reward=-2.46, num_turns=1.19, num_tools=0.193, failed=0.614, completion_tokens=45.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  22%|██▏       | 89/400 [00:13<00:06, 44.77it/s, reward=-2.46, num_turns=1.19, num_tools=0.191, failed=0.618, completion_tokens=45.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  22%|██▎       | 90/400 [00:13<00:06, 44.77it/s, reward=-2.47, num_turns=1.19, num_tools=0.189, failed=0.622, completion_tokens=45.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  23%|██▎       | 91/400 [00:13<00:06, 44.77it/s, reward=-2.47, num_turns=1.19, num_tools=0.187, failed=0.626, completion_tokens=45.9]validation:  23%|██▎       | 92/400 [00:13<00:06, 44.77it/s, reward=-2.48, num_turns=1.18, num_tools=0.185, failed=0.63, completion_tokens=45.9] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  23%|██▎       | 93/400 [00:13<00:06, 44.77it/s, reward=-2.49, num_turns=1.18, num_tools=0.183, failed=0.634, completion_tokens=45.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  24%|██▎       | 94/400 [00:13<00:06, 44.77it/s, reward=-2.49, num_turns=1.18, num_tools=0.181, failed=0.638, completion_tokens=45.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  24%|██▍       | 95/400 [00:13<00:06, 44.77it/s, reward=-2.5, num_turns=1.18, num_tools=0.179, failed=0.642, completion_tokens=45.9] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  24%|██▍       | 96/400 [00:13<00:06, 44.77it/s, reward=-2.5, num_turns=1.18, num_tools=0.177, failed=0.646, completion_tokens=45.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  24%|██▍       | 97/400 [00:13<00:06, 44.77it/s, reward=-2.51, num_turns=1.18, num_tools=0.175, failed=0.649, completion_tokens=45.9]validation:  24%|██▍       | 98/400 [00:13<00:06, 44.77it/s, reward=-2.51, num_turns=1.17, num_tools=0.173, failed=0.653, completion_tokens=45.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  25%|██▍       | 99/400 [00:13<00:06, 44.77it/s, reward=-2.52, num_turns=1.17, num_tools=0.172, failed=0.657, completion_tokens=45.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  25%|██▌       | 100/400 [00:13<00:06, 46.21it/s, reward=-2.52, num_turns=1.17, num_tools=0.172, failed=0.657, completion_tokens=45.9]validation:  25%|██▌       | 100/400 [00:13<00:06, 46.21it/s, reward=-2.52, num_turns=1.17, num_tools=0.17, failed=0.66, completion_tokens=45.9]  validation:  25%|██▌       | 101/400 [00:13<00:06, 46.21it/s, reward=-2.53, num_turns=1.17, num_tools=0.168, failed=0.663, completion_tokens=45.9]validation:  26%|██▌       | 102/400 [00:13<00:06, 46.21it/s, reward=-2.53, num_turns=1.17, num_tools=0.167, failed=0.667, completion_tokens=45.9]validation:  26%|██▌       | 103/400 [00:13<00:06, 46.21it/s, reward=-2.54, num_turns=1.17, num_tools=0.165, failed=0.67, completion_tokens=45.9] validation:  26%|██▌       | 104/400 [00:13<00:06, 46.21it/s, reward=-2.54, num_turns=1.16, num_tools=0.163, failed=0.673, completion_tokens=45.9]validation:  26%|██▋       | 105/400 [00:13<00:06, 46.21it/s, reward=-2.54, num_turns=1.16, num_tools=0.162, failed=0.676, completion_tokens=45.9]validation:  26%|██▋       | 106/400 [00:13<00:06, 46.21it/s, reward=-2.55, num_turns=1.16, num_tools=0.16, failed=0.679, completion_tokens=45.9] validation:  27%|██▋       | 107/400 [00:13<00:06, 46.21it/s, reward=-2.54, num_turns=1.17, num_tools=0.168, failed=0.673, completion_tokens=45.3]validation:  27%|██▋       | 108/400 [00:13<00:06, 46.21it/s, reward=-2.54, num_turns=1.18, num_tools=0.176, failed=0.667, completion_tokens=45]  validation:  27%|██▋       | 109/400 [00:13<00:06, 46.21it/s, reward=-2.53, num_turns=1.18, num_tools=0.183, failed=0.661, completion_tokens=44.7]validation:  28%|██▊       | 110/400 [00:13<00:06, 46.21it/s, reward=-2.53, num_turns=1.19, num_tools=0.191, failed=0.655, completion_tokens=44.3][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  28%|██▊       | 111/400 [00:13<00:06, 46.21it/s, reward=-2.53, num_turns=1.19, num_tools=0.189, failed=0.658, completion_tokens=44.3]validation:  28%|██▊       | 112/400 [00:13<00:06, 46.21it/s, reward=-2.54, num_turns=1.19, num_tools=0.188, failed=0.661, completion_tokens=44.3]validation:  28%|██▊       | 113/400 [00:13<00:06, 46.21it/s, reward=-2.54, num_turns=1.19, num_tools=0.186, failed=0.664, completion_tokens=44.3]validation:  28%|██▊       | 114/400 [00:13<00:06, 46.21it/s, reward=-2.55, num_turns=1.18, num_tools=0.184, failed=0.667, completion_tokens=44.3]validation:  29%|██▉       | 115/400 [00:13<00:06, 46.21it/s, reward=-2.55, num_turns=1.18, num_tools=0.183, failed=0.67, completion_tokens=44.3] validation:  29%|██▉       | 116/400 [00:13<00:06, 46.21it/s, reward=-2.55, num_turns=1.18, num_tools=0.181, failed=0.672, completion_tokens=44.3]validation:  29%|██▉       | 117/400 [00:13<00:06, 46.21it/s, reward=-2.56, num_turns=1.18, num_tools=0.179, failed=0.675, completion_tokens=44.3]validation:  30%|██▉       | 118/400 [00:13<00:06, 46.21it/s, reward=-2.56, num_turns=1.18, num_tools=0.178, failed=0.678, completion_tokens=44.3]validation:  30%|██▉       | 119/400 [00:13<00:06, 46.21it/s, reward=-2.56, num_turns=1.18, num_tools=0.176, failed=0.681, completion_tokens=44.3]validation:  30%|███       | 120/400 [00:13<00:06, 46.21it/s, reward=-2.57, num_turns=1.18, num_tools=0.175, failed=0.683, completion_tokens=44.3]validation:  30%|███       | 121/400 [00:13<00:06, 46.21it/s, reward=-2.57, num_turns=1.17, num_tools=0.174, failed=0.686, completion_tokens=44.3]validation:  30%|███       | 122/400 [00:13<00:06, 46.21it/s, reward=-2.58, num_turns=1.17, num_tools=0.172, failed=0.689, completion_tokens=44.3]validation:  31%|███       | 123/400 [00:13<00:05, 46.21it/s, reward=-2.52, num_turns=1.18, num_tools=0.179, failed=0.683, completion_tokens=43.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  31%|███       | 124/400 [00:13<00:04, 67.91it/s, reward=-2.52, num_turns=1.18, num_tools=0.179, failed=0.683, completion_tokens=43.8]validation:  31%|███       | 124/400 [00:13<00:04, 67.91it/s, reward=-2.52, num_turns=1.19, num_tools=0.185, failed=0.677, completion_tokens=44]  validation:  31%|███▏      | 125/400 [00:13<00:04, 67.91it/s, reward=-2.47, num_turns=1.19, num_tools=0.192, failed=0.672, completion_tokens=43.4]validation:  32%|███▏      | 126/400 [00:13<00:04, 67.91it/s, reward=-2.46, num_turns=1.2, num_tools=0.198, failed=0.667, completion_tokens=43]   validation:  32%|███▏      | 127/400 [00:13<00:04, 67.91it/s, reward=-2.47, num_turns=1.2, num_tools=0.197, failed=0.669, completion_tokens=43][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  32%|███▏      | 128/400 [00:13<00:04, 67.91it/s, reward=-2.47, num_turns=1.2, num_tools=0.195, failed=0.672, completion_tokens=43]validation:  32%|███▏      | 129/400 [00:13<00:03, 67.91it/s, reward=-2.47, num_turns=1.19, num_tools=0.194, failed=0.674, completion_tokens=43]validation:  32%|███▎      | 130/400 [00:13<00:03, 67.91it/s, reward=-2.48, num_turns=1.2, num_tools=0.2, failed=0.677, completion_tokens=43]   validation:  33%|███▎      | 131/400 [00:13<00:03, 67.91it/s, reward=-2.48, num_turns=1.21, num_tools=0.206, failed=0.679, completion_tokens=42.8]validation:  33%|███▎      | 132/400 [00:13<00:03, 67.91it/s, reward=-2.49, num_turns=1.21, num_tools=0.212, failed=0.682, completion_tokens=42.6]validation:  33%|███▎      | 133/400 [00:13<00:03, 67.91it/s, reward=-2.49, num_turns=1.22, num_tools=0.218, failed=0.684, completion_tokens=42.4]validation:  34%|███▎      | 134/400 [00:13<00:03, 67.91it/s, reward=-2.44, num_turns=1.22, num_tools=0.224, failed=0.679, completion_tokens=42.1]validation:  34%|███▍      | 135/400 [00:13<00:03, 67.91it/s, reward=-2.45, num_turns=1.23, num_tools=0.23, failed=0.681, completion_tokens=42]   validation:  34%|███▍      | 136/400 [00:13<00:03, 67.91it/s, reward=-2.45, num_turns=1.24, num_tools=0.235, failed=0.684, completion_tokens=41.9]validation:  34%|███▍      | 137/400 [00:13<00:03, 67.91it/s, reward=-2.45, num_turns=1.24, num_tools=0.241, failed=0.686, completion_tokens=41.9]validation:  34%|███▍      | 138/400 [00:13<00:03, 67.91it/s, reward=-2.45, num_turns=1.25, num_tools=0.246, failed=0.681, completion_tokens=41.7]validation:  35%|███▍      | 139/400 [00:13<00:03, 67.91it/s, reward=-2.4, num_turns=1.25, num_tools=0.252, failed=0.676, completion_tokens=41.4] validation:  35%|███▌      | 140/400 [00:13<00:03, 67.91it/s, reward=-2.41, num_turns=1.25, num_tools=0.25, failed=0.671, completion_tokens=41.9]validation:  35%|███▌      | 141/400 [00:13<00:03, 67.91it/s, reward=-2.41, num_turns=1.26, num_tools=0.255, failed=0.667, completion_tokens=41.8]validation:  36%|███▌      | 142/400 [00:13<00:03, 67.91it/s, reward=-2.4, num_turns=1.26, num_tools=0.261, failed=0.662, completion_tokens=41.6] validation:  36%|███▌      | 143/400 [00:13<00:03, 67.91it/s, reward=-2.36, num_turns=1.27, num_tools=0.266, failed=0.657, completion_tokens=41.3]validation:  36%|███▌      | 144/400 [00:13<00:03, 67.91it/s, reward=-2.31, num_turns=1.27, num_tools=0.271, failed=0.653, completion_tokens=41.1]validation:  36%|███▋      | 145/400 [00:13<00:03, 67.91it/s, reward=-2.27, num_turns=1.28, num_tools=0.276, failed=0.648, completion_tokens=40.9]validation:  36%|███▋      | 146/400 [00:13<00:03, 67.91it/s, reward=-2.23, num_turns=1.28, num_tools=0.281, failed=0.644, completion_tokens=40.6]validation:  37%|███▋      | 147/400 [00:13<00:03, 67.91it/s, reward=-2.23, num_turns=1.29, num_tools=0.286, failed=0.639, completion_tokens=40.4]validation:  37%|███▋      | 148/400 [00:13<00:03, 67.91it/s, reward=-2.19, num_turns=1.29, num_tools=0.291, failed=0.635, completion_tokens=40.2]validation:  37%|███▋      | 149/400 [00:13<00:03, 67.91it/s, reward=-2.16, num_turns=1.3, num_tools=0.295, failed=0.631, completion_tokens=39.9] validation:  38%|███▊      | 150/400 [00:13<00:03, 67.91it/s, reward=-2.13, num_turns=1.3, num_tools=0.3, failed=0.627, completion_tokens=39.6]  validation:  38%|███▊      | 151/400 [00:13<00:03, 67.91it/s, reward=-2.1, num_turns=1.3, num_tools=0.305, failed=0.623, completion_tokens=39.4]validation:  38%|███▊      | 152/400 [00:13<00:03, 67.91it/s, reward=-2.1, num_turns=1.31, num_tools=0.309, failed=0.618, completion_tokens=39.2]validation:  38%|███▊      | 153/400 [00:13<00:03, 67.91it/s, reward=-2.1, num_turns=1.31, num_tools=0.314, failed=0.614, completion_tokens=39.1]validation:  38%|███▊      | 154/400 [00:13<00:03, 67.91it/s, reward=-2.1, num_turns=1.32, num_tools=0.318, failed=0.61, completion_tokens=38.9] validation:  39%|███▉      | 155/400 [00:13<00:03, 67.91it/s, reward=-2.08, num_turns=1.32, num_tools=0.323, failed=0.606, completion_tokens=38.8]validation:  39%|███▉      | 156/400 [00:13<00:03, 67.91it/s, reward=-2.04, num_turns=1.33, num_tools=0.327, failed=0.603, completion_tokens=38.6]validation:  39%|███▉      | 157/400 [00:13<00:03, 67.91it/s, reward=-2, num_turns=1.33, num_tools=0.331, failed=0.599, completion_tokens=38.5]   validation:  40%|███▉      | 158/400 [00:13<00:03, 67.91it/s, reward=-1.96, num_turns=1.34, num_tools=0.335, failed=0.595, completion_tokens=38.3]validation:  40%|███▉      | 159/400 [00:13<00:03, 67.91it/s, reward=-1.92, num_turns=1.34, num_tools=0.34, failed=0.591, completion_tokens=38.2] validation:  40%|████      | 160/400 [00:13<00:03, 67.91it/s, reward=-1.89, num_turns=1.34, num_tools=0.344, failed=0.588, completion_tokens=38.1]validation:  40%|████      | 161/400 [00:13<00:03, 67.91it/s, reward=-1.85, num_turns=1.35, num_tools=0.348, failed=0.584, completion_tokens=38]  [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  40%|████      | 162/400 [00:13<00:02, 113.59it/s, reward=-1.85, num_turns=1.35, num_tools=0.348, failed=0.584, completion_tokens=38]validation:  40%|████      | 162/400 [00:13<00:02, 113.59it/s, reward=-1.86, num_turns=1.35, num_tools=0.346, failed=0.586, completion_tokens=38]validation:  41%|████      | 163/400 [00:13<00:02, 113.59it/s, reward=-1.87, num_turns=1.34, num_tools=0.344, failed=0.589, completion_tokens=38]validation:  41%|████      | 164/400 [00:13<00:02, 113.59it/s, reward=-1.88, num_turns=1.34, num_tools=0.341, failed=0.591, completion_tokens=38]validation:  41%|████▏     | 165/400 [00:13<00:02, 113.59it/s, reward=-1.88, num_turns=1.34, num_tools=0.339, failed=0.594, completion_tokens=38]validation:  42%|████▏     | 166/400 [00:13<00:02, 113.59it/s, reward=-1.89, num_turns=1.34, num_tools=0.337, failed=0.596, completion_tokens=38]validation:  42%|████▏     | 167/400 [00:13<00:02, 113.59it/s, reward=-1.9, num_turns=1.34, num_tools=0.335, failed=0.599, completion_tokens=38] validation:  42%|████▏     | 168/400 [00:13<00:02, 113.59it/s, reward=-1.9, num_turns=1.33, num_tools=0.333, failed=0.601, completion_tokens=38]validation:  42%|████▏     | 169/400 [00:13<00:02, 113.59it/s, reward=-1.91, num_turns=1.33, num_tools=0.331, failed=0.604, completion_tokens=38]validation:  42%|████▎     | 170/400 [00:13<00:02, 113.59it/s, reward=-1.91, num_turns=1.34, num_tools=0.335, failed=0.6, completion_tokens=38.5]validation:  43%|████▎     | 171/400 [00:13<00:02, 113.59it/s, reward=-1.92, num_turns=1.34, num_tools=0.339, failed=0.602, completion_tokens=38.2]validation:  43%|████▎     | 172/400 [00:13<00:02, 113.59it/s, reward=-1.92, num_turns=1.34, num_tools=0.343, failed=0.605, completion_tokens=38]  validation:  43%|████▎     | 173/400 [00:13<00:01, 113.59it/s, reward=-1.93, num_turns=1.35, num_tools=0.347, failed=0.607, completion_tokens=37.8]validation:  44%|████▎     | 174/400 [00:13<00:01, 113.59it/s, reward=-1.93, num_turns=1.35, num_tools=0.351, failed=0.609, completion_tokens=37.6]validation:  44%|████▍     | 175/400 [00:13<00:01, 113.59it/s, reward=-1.94, num_turns=1.35, num_tools=0.354, failed=0.611, completion_tokens=37.4]validation:  44%|████▍     | 176/400 [00:13<00:01, 113.59it/s, reward=-1.92, num_turns=1.36, num_tools=0.358, failed=0.608, completion_tokens=37.3]validation:  44%|████▍     | 177/400 [00:13<00:01, 113.59it/s, reward=-1.9, num_turns=1.36, num_tools=0.362, failed=0.605, completion_tokens=37.1] validation:  44%|████▍     | 178/400 [00:13<00:01, 113.59it/s, reward=-1.9, num_turns=1.37, num_tools=0.365, failed=0.607, completion_tokens=37]  validation:  45%|████▍     | 179/400 [00:13<00:01, 113.59it/s, reward=-1.9, num_turns=1.37, num_tools=0.369, failed=0.603, completion_tokens=36.9]validation:  45%|████▌     | 180/400 [00:13<00:01, 113.59it/s, reward=-1.9, num_turns=1.37, num_tools=0.372, failed=0.6, completion_tokens=36.8]  validation:  45%|████▌     | 181/400 [00:14<00:01, 113.59it/s, reward=-1.9, num_turns=1.38, num_tools=0.376, failed=0.597, completion_tokens=37.6]validation:  46%|████▌     | 182/400 [00:14<00:01, 113.59it/s, reward=-1.9, num_turns=1.38, num_tools=0.379, failed=0.593, completion_tokens=37.5][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  46%|████▌     | 183/400 [00:14<00:01, 113.59it/s, reward=-1.89, num_turns=1.38, num_tools=0.383, failed=0.59, completion_tokens=37.4]validation:  46%|████▌     | 184/400 [00:14<00:01, 113.59it/s, reward=-1.88, num_turns=1.39, num_tools=0.386, failed=0.587, completion_tokens=37.3][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  46%|████▋     | 185/400 [00:14<00:01, 113.59it/s, reward=-1.88, num_turns=1.39, num_tools=0.389, failed=0.589, completion_tokens=37.1][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  46%|████▋     | 186/400 [00:14<00:01, 113.59it/s, reward=-1.89, num_turns=1.39, num_tools=0.392, failed=0.591, completion_tokens=37]  validation:  47%|████▋     | 187/400 [00:14<00:01, 113.59it/s, reward=-1.9, num_turns=1.4, num_tools=0.396, failed=0.594, completion_tokens=36.9]validation:  47%|████▋     | 188/400 [00:14<00:01, 113.59it/s, reward=-1.9, num_turns=1.4, num_tools=0.399, failed=0.596, completion_tokens=36.8]validation:  47%|████▋     | 189/400 [00:14<00:01, 113.59it/s, reward=-1.91, num_turns=1.4, num_tools=0.402, failed=0.598, completion_tokens=36.6]validation:  48%|████▊     | 190/400 [00:14<00:01, 113.59it/s, reward=-1.91, num_turns=1.41, num_tools=0.405, failed=0.6, completion_tokens=36.5] validation:  48%|████▊     | 191/400 [00:14<00:01, 113.59it/s, reward=-1.92, num_turns=1.41, num_tools=0.408, failed=0.602, completion_tokens=36.4]validation:  48%|████▊     | 192/400 [00:14<00:01, 113.59it/s, reward=-1.92, num_turns=1.41, num_tools=0.411, failed=0.604, completion_tokens=36.2]validation:  48%|████▊     | 193/400 [00:14<00:01, 113.59it/s, reward=-1.93, num_turns=1.41, num_tools=0.415, failed=0.606, completion_tokens=36.1]validation:  48%|████▊     | 194/400 [00:14<00:01, 113.59it/s, reward=-1.9, num_turns=1.42, num_tools=0.418, failed=0.603, completion_tokens=36.1] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  49%|████▉     | 195/400 [00:14<00:01, 150.42it/s, reward=-1.9, num_turns=1.42, num_tools=0.418, failed=0.603, completion_tokens=36.1]validation:  49%|████▉     | 195/400 [00:14<00:01, 150.42it/s, reward=-1.91, num_turns=1.42, num_tools=0.415, failed=0.605, completion_tokens=36.1][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  49%|████▉     | 196/400 [00:14<00:01, 150.42it/s, reward=-1.92, num_turns=1.42, num_tools=0.418, failed=0.607, completion_tokens=36]  [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  49%|████▉     | 197/400 [00:14<00:01, 150.42it/s, reward=-1.92, num_turns=1.42, num_tools=0.421, failed=0.609, completion_tokens=35.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  50%|████▉     | 198/400 [00:14<00:01, 150.42it/s, reward=-1.93, num_turns=1.42, num_tools=0.424, failed=0.611, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  50%|████▉     | 199/400 [00:14<00:01, 150.42it/s, reward=-1.93, num_turns=1.43, num_tools=0.427, failed=0.613, completion_tokens=35.6][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  50%|█████     | 200/400 [00:14<00:01, 150.42it/s, reward=-1.94, num_turns=1.43, num_tools=0.43, failed=0.615, completion_tokens=35.4] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  50%|█████     | 201/400 [00:14<00:01, 150.42it/s, reward=-1.94, num_turns=1.43, num_tools=0.433, failed=0.617, completion_tokens=35.3][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  50%|█████     | 202/400 [00:14<00:01, 150.42it/s, reward=-1.95, num_turns=1.44, num_tools=0.436, failed=0.619, completion_tokens=35.2]validation:  51%|█████     | 203/400 [00:14<00:01, 150.42it/s, reward=-1.93, num_turns=1.44, num_tools=0.438, failed=0.616, completion_tokens=35.3]validation:  51%|█████     | 204/400 [00:14<00:01, 150.42it/s, reward=-1.91, num_turns=1.44, num_tools=0.441, failed=0.613, completion_tokens=35.1]validation:  51%|█████▏    | 205/400 [00:14<00:01, 150.42it/s, reward=-1.91, num_turns=1.44, num_tools=0.444, failed=0.61, completion_tokens=35.2] validation:  52%|█████▏    | 206/400 [00:14<00:01, 150.42it/s, reward=-1.91, num_turns=1.45, num_tools=0.447, failed=0.607, completion_tokens=35] validation:  52%|█████▏    | 207/400 [00:14<00:01, 150.42it/s, reward=-1.91, num_turns=1.45, num_tools=0.449, failed=0.604, completion_tokens=35.1]validation:  52%|█████▏    | 208/400 [00:14<00:01, 150.42it/s, reward=-1.9, num_turns=1.45, num_tools=0.452, failed=0.601, completion_tokens=35]   validation:  52%|█████▏    | 209/400 [00:14<00:01, 150.42it/s, reward=-1.9, num_turns=1.45, num_tools=0.455, failed=0.598, completion_tokens=35]validation:  52%|█████▎    | 210/400 [00:14<00:01, 150.42it/s, reward=-1.9, num_turns=1.46, num_tools=0.457, failed=0.595, completion_tokens=35]validation:  53%|█████▎    | 211/400 [00:14<00:01, 150.42it/s, reward=-1.89, num_turns=1.46, num_tools=0.46, failed=0.592, completion_tokens=35]validation:  53%|█████▎    | 212/400 [00:14<00:01, 150.42it/s, reward=-1.89, num_turns=1.46, num_tools=0.462, failed=0.59, completion_tokens=35]validation:  53%|█████▎    | 213/400 [00:14<00:01, 150.42it/s, reward=-1.89, num_turns=1.46, num_tools=0.465, failed=0.587, completion_tokens=35][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  54%|█████▎    | 214/400 [00:14<00:01, 150.42it/s, reward=-1.89, num_turns=1.46, num_tools=0.463, failed=0.589, completion_tokens=35]validation:  54%|█████▍    | 215/400 [00:14<00:01, 150.42it/s, reward=-1.9, num_turns=1.47, num_tools=0.465, failed=0.591, completion_tokens=34.9]validation:  54%|█████▍    | 216/400 [00:14<00:01, 150.42it/s, reward=-1.9, num_turns=1.47, num_tools=0.468, failed=0.593, completion_tokens=34.8]validation:  54%|█████▍    | 217/400 [00:14<00:01, 150.42it/s, reward=-1.91, num_turns=1.47, num_tools=0.47, failed=0.594, completion_tokens=34.7]validation:  55%|█████▍    | 218/400 [00:14<00:01, 155.23it/s, reward=-1.91, num_turns=1.47, num_tools=0.47, failed=0.594, completion_tokens=34.7]validation:  55%|█████▍    | 218/400 [00:14<00:01, 155.23it/s, reward=-1.91, num_turns=1.47, num_tools=0.472, failed=0.596, completion_tokens=34.6]validation:  55%|█████▍    | 219/400 [00:14<00:01, 155.23it/s, reward=-1.92, num_turns=1.47, num_tools=0.475, failed=0.598, completion_tokens=34.6]validation:  55%|█████▌    | 220/400 [00:14<00:01, 155.23it/s, reward=-1.92, num_turns=1.48, num_tools=0.477, failed=0.6, completion_tokens=34.5]  validation:  55%|█████▌    | 221/400 [00:14<00:01, 155.23it/s, reward=-1.93, num_turns=1.48, num_tools=0.48, failed=0.602, completion_tokens=34.4]validation:  56%|█████▌    | 222/400 [00:14<00:01, 155.23it/s, reward=-1.93, num_turns=1.48, num_tools=0.482, failed=0.604, completion_tokens=34.3]validation:  56%|█████▌    | 223/400 [00:14<00:01, 155.23it/s, reward=-1.94, num_turns=1.48, num_tools=0.48, failed=0.605, completion_tokens=34.3] validation:  56%|█████▌    | 224/400 [00:14<00:01, 155.23it/s, reward=-1.94, num_turns=1.48, num_tools=0.482, failed=0.603, completion_tokens=34.3]validation:  56%|█████▋    | 225/400 [00:14<00:01, 155.23it/s, reward=-1.94, num_turns=1.48, num_tools=0.484, failed=0.6, completion_tokens=34.3]  validation:  56%|█████▋    | 226/400 [00:14<00:01, 155.23it/s, reward=-1.92, num_turns=1.49, num_tools=0.487, failed=0.597, completion_tokens=34.2]validation:  57%|█████▋    | 227/400 [00:14<00:01, 155.23it/s, reward=-1.91, num_turns=1.49, num_tools=0.489, failed=0.595, completion_tokens=34.3]validation:  57%|█████▋    | 228/400 [00:14<00:01, 155.23it/s, reward=-1.88, num_turns=1.49, num_tools=0.491, failed=0.592, completion_tokens=34.3]validation:  57%|█████▋    | 229/400 [00:14<00:01, 155.23it/s, reward=-1.86, num_turns=1.49, num_tools=0.493, failed=0.59, completion_tokens=34.2] validation:  57%|█████▊    | 230/400 [00:14<00:01, 155.23it/s, reward=-1.83, num_turns=1.5, num_tools=0.496, failed=0.587, completion_tokens=34.2][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  58%|█████▊    | 231/400 [00:14<00:01, 155.23it/s, reward=-1.82, num_turns=1.5, num_tools=0.498, failed=0.584, completion_tokens=34.1]validation:  58%|█████▊    | 232/400 [00:14<00:01, 155.23it/s, reward=-1.82, num_turns=1.5, num_tools=0.5, failed=0.586, completion_tokens=34.1]  validation:  58%|█████▊    | 233/400 [00:14<00:01, 155.23it/s, reward=-1.83, num_turns=1.5, num_tools=0.502, failed=0.588, completion_tokens=34]validation:  58%|█████▊    | 234/400 [00:14<00:01, 155.23it/s, reward=-1.83, num_turns=1.5, num_tools=0.504, failed=0.59, completion_tokens=33.9]validation:  59%|█████▉    | 235/400 [00:14<00:01, 155.23it/s, reward=-1.82, num_turns=1.51, num_tools=0.506, failed=0.587, completion_tokens=34.2]validation:  59%|█████▉    | 236/400 [00:14<00:01, 155.23it/s, reward=-1.82, num_turns=1.51, num_tools=0.508, failed=0.585, completion_tokens=34.1]validation:  59%|█████▉    | 237/400 [00:14<00:01, 155.23it/s, reward=-1.79, num_turns=1.51, num_tools=0.511, failed=0.582, completion_tokens=34.1]validation:  60%|█████▉    | 238/400 [00:14<00:01, 155.23it/s, reward=-1.77, num_turns=1.51, num_tools=0.513, failed=0.58, completion_tokens=34]   validation:  60%|█████▉    | 239/400 [00:14<00:00, 163.47it/s, reward=-1.77, num_turns=1.51, num_tools=0.513, failed=0.58, completion_tokens=34]validation:  60%|█████▉    | 239/400 [00:14<00:00, 163.47it/s, reward=-1.76, num_turns=1.51, num_tools=0.515, failed=0.577, completion_tokens=34.1]validation:  60%|██████    | 240/400 [00:14<00:00, 163.47it/s, reward=-1.76, num_turns=1.52, num_tools=0.517, failed=0.575, completion_tokens=34.1]validation:  60%|██████    | 241/400 [00:14<00:00, 163.47it/s, reward=-1.76, num_turns=1.52, num_tools=0.519, failed=0.573, completion_tokens=34]  validation:  60%|██████    | 242/400 [00:14<00:00, 163.47it/s, reward=-1.76, num_turns=1.52, num_tools=0.521, failed=0.57, completion_tokens=34] validation:  61%|██████    | 243/400 [00:14<00:00, 163.47it/s, reward=-1.75, num_turns=1.52, num_tools=0.523, failed=0.568, completion_tokens=34.1]validation:  61%|██████    | 244/400 [00:14<00:00, 163.47it/s, reward=-1.75, num_turns=1.52, num_tools=0.525, failed=0.566, completion_tokens=34.2]validation:  61%|██████▏   | 245/400 [00:14<00:00, 163.47it/s, reward=-1.75, num_turns=1.53, num_tools=0.527, failed=0.563, completion_tokens=34.1]validation:  62%|██████▏   | 246/400 [00:14<00:00, 163.47it/s, reward=-1.75, num_turns=1.53, num_tools=0.528, failed=0.561, completion_tokens=34.2]validation:  62%|██████▏   | 247/400 [00:14<00:00, 163.47it/s, reward=-1.75, num_turns=1.53, num_tools=0.53, failed=0.559, completion_tokens=34.2] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  62%|██████▏   | 248/400 [00:14<00:00, 163.47it/s, reward=-1.76, num_turns=1.53, num_tools=0.532, failed=0.56, completion_tokens=34.3]validation:  62%|██████▏   | 249/400 [00:50<00:00, 163.47it/s, reward=-1.76, num_turns=1.53, num_tools=0.534, failed=0.562, completion_tokens=34.3]validation:  62%|██████▏   | 249/400 [00:50<00:00, 163.47it/s, reward=-1.76, num_turns=1.53, num_tools=0.534, failed=0.562, completion_tokens=34.3]validation:  62%|██████▎   | 250/400 [00:50<01:21,  1.85it/s, reward=-1.76, num_turns=1.53, num_tools=0.534, failed=0.562, completion_tokens=34.3] validation:  62%|██████▎   | 250/400 [00:50<01:21,  1.85it/s, reward=-1.77, num_turns=1.54, num_tools=0.536, failed=0.564, completion_tokens=34.2]validation:  63%|██████▎   | 251/400 [00:50<01:19,  1.88it/s, reward=-1.77, num_turns=1.54, num_tools=0.536, failed=0.564, completion_tokens=34.2]validation:  63%|██████▎   | 251/400 [00:50<01:19,  1.88it/s, reward=-1.77, num_turns=1.54, num_tools=0.538, failed=0.562, completion_tokens=34.2]validation:  63%|██████▎   | 252/400 [00:50<01:18,  1.88it/s, reward=-1.77, num_turns=1.54, num_tools=0.54, failed=0.56, completion_tokens=34.2]  <string>:1: SyntaxWarning: 'float' object is not callable; perhaps you missed a comma?
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  63%|██████▎   | 253/400 [00:50<01:18,  1.88it/s, reward=-1.77, num_turns=1.54, num_tools=0.542, failed=0.557, completion_tokens=34.4]validation:  64%|██████▎   | 254/400 [00:50<01:17,  1.88it/s, reward=-1.77, num_turns=1.54, num_tools=0.543, failed=0.555, completion_tokens=34.5]validation:  64%|██████▍   | 255/400 [00:50<01:17,  1.88it/s, reward=-1.77, num_turns=1.55, num_tools=0.545, failed=0.553, completion_tokens=34.7]validation:  64%|██████▍   | 256/400 [00:50<01:16,  1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.547, failed=0.555, completion_tokens=34.6]validation:  64%|██████▍   | 257/400 [00:50<01:16,  1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.549, failed=0.553, completion_tokens=34.7]validation:  64%|██████▍   | 258/400 [00:50<01:15,  1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.55, failed=0.55, completion_tokens=34.8]  validation:  65%|██████▍   | 259/400 [00:50<01:15,  1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.552, failed=0.548, completion_tokens=35]validation:  65%|██████▌   | 260/400 [00:50<01:14,  1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.554, failed=0.546, completion_tokens=35.1]validation:  65%|██████▌   | 261/400 [00:50<01:14,  1.88it/s, reward=-1.78, num_turns=1.56, num_tools=0.556, failed=0.544, completion_tokens=35.1]validation:  66%|██████▌   | 262/400 [00:50<01:13,  1.88it/s, reward=-1.78, num_turns=1.56, num_tools=0.557, failed=0.542, completion_tokens=35.1]validation:  66%|██████▌   | 263/400 [00:50<01:13,  1.88it/s, reward=-1.77, num_turns=1.56, num_tools=0.559, failed=0.54, completion_tokens=35.3] validation:  66%|██████▌   | 264/400 [00:50<01:12,  1.88it/s, reward=-1.76, num_turns=1.56, num_tools=0.561, failed=0.538, completion_tokens=35.3]validation:  66%|██████▋   | 265/400 [00:50<01:11,  1.88it/s, reward=-1.75, num_turns=1.56, num_tools=0.562, failed=0.536, completion_tokens=35.3]validation:  66%|██████▋   | 266/400 [00:50<00:51,  2.62it/s, reward=-1.75, num_turns=1.56, num_tools=0.562, failed=0.536, completion_tokens=35.3]validation:  66%|██████▋   | 266/400 [00:50<00:51,  2.62it/s, reward=-1.73, num_turns=1.56, num_tools=0.564, failed=0.534, completion_tokens=35.3]validation:  67%|██████▋   | 267/400 [00:50<00:50,  2.62it/s, reward=-1.72, num_turns=1.57, num_tools=0.566, failed=0.532, completion_tokens=35.3]validation:  67%|██████▋   | 268/400 [00:50<00:50,  2.62it/s, reward=-1.71, num_turns=1.57, num_tools=0.567, failed=0.53, completion_tokens=35.4] validation:  67%|██████▋   | 269/400 [00:50<00:49,  2.62it/s, reward=-1.7, num_turns=1.57, num_tools=0.569, failed=0.528, completion_tokens=35.7]validation:  68%|██████▊   | 270/400 [00:50<00:49,  2.62it/s, reward=-1.69, num_turns=1.57, num_tools=0.57, failed=0.526, completion_tokens=36.2]validation:  68%|██████▊   | 271/400 [00:50<00:49,  2.62it/s, reward=-1.68, num_turns=1.57, num_tools=0.572, failed=0.524, completion_tokens=36.3]validation:  68%|██████▊   | 272/400 [00:50<00:48,  2.62it/s, reward=-1.68, num_turns=1.57, num_tools=0.574, failed=0.522, completion_tokens=36.3]validation:  68%|██████▊   | 273/400 [00:50<00:48,  2.62it/s, reward=-1.66, num_turns=1.58, num_tools=0.575, failed=0.52, completion_tokens=36.4] validation:  68%|██████▊   | 274/400 [00:50<00:48,  2.62it/s, reward=-1.65, num_turns=1.58, num_tools=0.577, failed=0.518, completion_tokens=36.7]validation:  69%|██████▉   | 275/400 [00:50<00:47,  2.62it/s, reward=-1.63, num_turns=1.58, num_tools=0.582, failed=0.516, completion_tokens=36.8]validation:  69%|██████▉   | 276/400 [00:50<00:47,  2.62it/s, reward=-1.62, num_turns=1.58, num_tools=0.583, failed=0.514, completion_tokens=36.8]validation:  69%|██████▉   | 277/400 [00:50<00:46,  2.62it/s, reward=-1.61, num_turns=1.58, num_tools=0.585, failed=0.513, completion_tokens=36.9]validation:  70%|██████▉   | 278/400 [00:50<00:46,  2.62it/s, reward=-1.61, num_turns=1.59, num_tools=0.586, failed=0.511, completion_tokens=37.1]validation:  70%|██████▉   | 279/400 [00:50<00:46,  2.62it/s, reward=-1.61, num_turns=1.59, num_tools=0.588, failed=0.509, completion_tokens=37.4]validation:  70%|███████   | 280/400 [00:50<00:45,  2.62it/s, reward=-1.61, num_turns=1.59, num_tools=0.589, failed=0.507, completion_tokens=37.7]validation:  70%|███████   | 281/400 [00:51<00:32,  3.71it/s, reward=-1.61, num_turns=1.59, num_tools=0.589, failed=0.507, completion_tokens=37.7]validation:  70%|███████   | 281/400 [00:51<00:32,  3.71it/s, reward=-1.62, num_turns=1.59, num_tools=0.591, failed=0.505, completion_tokens=37.7]validation:  70%|███████   | 282/400 [00:51<00:31,  3.71it/s, reward=-1.62, num_turns=1.59, num_tools=0.592, failed=0.504, completion_tokens=37.7]validation:  71%|███████   | 283/400 [00:51<00:31,  3.71it/s, reward=-1.62, num_turns=1.59, num_tools=0.594, failed=0.502, completion_tokens=37.7]validation:  71%|███████   | 284/400 [00:51<00:31,  3.71it/s, reward=-1.62, num_turns=1.6, num_tools=0.595, failed=0.5, completion_tokens=37.8]   validation:  71%|███████▏  | 285/400 [00:51<00:31,  3.71it/s, reward=-1.61, num_turns=1.6, num_tools=0.596, failed=0.498, completion_tokens=37.8]validation:  72%|███████▏  | 286/400 [00:51<00:30,  3.71it/s, reward=-1.59, num_turns=1.6, num_tools=0.598, failed=0.497, completion_tokens=37.7]validation:  72%|███████▏  | 287/400 [00:51<00:30,  3.71it/s, reward=-1.57, num_turns=1.6, num_tools=0.599, failed=0.495, completion_tokens=37.7]validation:  72%|███████▏  | 288/400 [00:51<00:30,  3.71it/s, reward=-1.55, num_turns=1.6, num_tools=0.601, failed=0.493, completion_tokens=37.6]validation:  72%|███████▏  | 289/400 [00:51<00:29,  3.71it/s, reward=-1.54, num_turns=1.6, num_tools=0.602, failed=0.491, completion_tokens=37.6]validation:  72%|███████▎  | 290/400 [00:51<00:29,  3.71it/s, reward=-1.53, num_turns=1.6, num_tools=0.603, failed=0.49, completion_tokens=37.8] validation:  73%|███████▎  | 291/400 [00:51<00:22,  4.74it/s, reward=-1.53, num_turns=1.6, num_tools=0.603, failed=0.49, completion_tokens=37.8]validation:  73%|███████▎  | 291/400 [00:51<00:22,  4.74it/s, reward=-1.53, num_turns=1.6, num_tools=0.605, failed=0.488, completion_tokens=37.8]validation:  73%|███████▎  | 292/400 [00:51<00:22,  4.74it/s, reward=-1.52, num_turns=1.61, num_tools=0.606, failed=0.486, completion_tokens=38] validation:  73%|███████▎  | 293/400 [00:51<00:22,  4.74it/s, reward=-1.52, num_turns=1.61, num_tools=0.608, failed=0.485, completion_tokens=38]validation:  74%|███████▎  | 294/400 [00:51<00:22,  4.74it/s, reward=-1.51, num_turns=1.61, num_tools=0.609, failed=0.483, completion_tokens=37.9]validation:  74%|███████▍  | 295/400 [00:51<00:22,  4.74it/s, reward=-1.5, num_turns=1.61, num_tools=0.61, failed=0.481, completion_tokens=37.8]  validation:  74%|███████▍  | 296/400 [00:51<00:21,  4.74it/s, reward=-1.49, num_turns=1.61, num_tools=0.611, failed=0.48, completion_tokens=37.8]validation:  74%|███████▍  | 297/400 [00:51<00:21,  4.74it/s, reward=-1.49, num_turns=1.61, num_tools=0.613, failed=0.478, completion_tokens=37.7]validation:  74%|███████▍  | 298/400 [00:51<00:21,  4.74it/s, reward=-1.49, num_turns=1.61, num_tools=0.614, failed=0.477, completion_tokens=37.7]validation:  75%|███████▍  | 299/400 [00:51<00:21,  4.74it/s, reward=-1.49, num_turns=1.62, num_tools=0.615, failed=0.475, completion_tokens=37.7]validation:  75%|███████▌  | 300/400 [00:51<00:21,  4.74it/s, reward=-1.48, num_turns=1.62, num_tools=0.617, failed=0.473, completion_tokens=37.7]validation:  75%|███████▌  | 301/400 [00:51<00:20,  4.74it/s, reward=-1.48, num_turns=1.62, num_tools=0.618, failed=0.472, completion_tokens=38]  validation:  76%|███████▌  | 302/400 [00:51<00:20,  4.74it/s, reward=-1.47, num_turns=1.62, num_tools=0.619, failed=0.47, completion_tokens=38] validation:  76%|███████▌  | 303/400 [00:51<00:20,  4.74it/s, reward=-1.45, num_turns=1.62, num_tools=0.62, failed=0.469, completion_tokens=37.9]validation:  76%|███████▌  | 304/400 [00:51<00:20,  4.74it/s, reward=-1.43, num_turns=1.62, num_tools=0.622, failed=0.467, completion_tokens=37.9]validation:  76%|███████▋  | 305/400 [00:51<00:20,  4.74it/s, reward=-1.41, num_turns=1.62, num_tools=0.623, failed=0.466, completion_tokens=37.8]validation:  76%|███████▋  | 306/400 [00:51<00:19,  4.74it/s, reward=-1.4, num_turns=1.62, num_tools=0.624, failed=0.464, completion_tokens=37.8] validation:  77%|███████▋  | 307/400 [00:51<00:19,  4.74it/s, reward=-1.4, num_turns=1.63, num_tools=0.625, failed=0.463, completion_tokens=37.8]validation:  77%|███████▋  | 308/400 [00:51<00:19,  4.74it/s, reward=-1.4, num_turns=1.63, num_tools=0.627, failed=0.461, completion_tokens=37.8]validation:  77%|███████▋  | 309/400 [00:51<00:19,  4.74it/s, reward=-1.4, num_turns=1.63, num_tools=0.628, failed=0.46, completion_tokens=37.8] validation:  78%|███████▊  | 310/400 [00:51<00:18,  4.74it/s, reward=-1.41, num_turns=1.63, num_tools=0.626, failed=0.458, completion_tokens=38.2]validation:  78%|███████▊  | 311/400 [00:51<00:18,  4.74it/s, reward=-1.41, num_turns=1.63, num_tools=0.627, failed=0.457, completion_tokens=38.3]validation:  78%|███████▊  | 312/400 [00:51<00:18,  4.74it/s, reward=-1.41, num_turns=1.63, num_tools=0.628, failed=0.455, completion_tokens=38.4]validation:  78%|███████▊  | 313/400 [00:51<00:18,  4.74it/s, reward=-1.41, num_turns=1.63, num_tools=0.629, failed=0.454, completion_tokens=38.8]validation:  78%|███████▊  | 314/400 [00:51<00:18,  4.74it/s, reward=-1.42, num_turns=1.63, num_tools=0.627, failed=0.452, completion_tokens=39.1]validation:  79%|███████▉  | 315/400 [00:51<00:17,  4.74it/s, reward=-1.42, num_turns=1.63, num_tools=0.629, failed=0.451, completion_tokens=39.1]validation:  79%|███████▉  | 316/400 [00:51<00:17,  4.74it/s, reward=-1.42, num_turns=1.63, num_tools=0.63, failed=0.449, completion_tokens=39.1] validation:  79%|███████▉  | 317/400 [00:51<00:17,  4.74it/s, reward=-1.42, num_turns=1.63, num_tools=0.631, failed=0.448, completion_tokens=39.1]validation:  80%|███████▉  | 318/400 [00:51<00:17,  4.74it/s, reward=-1.43, num_turns=1.63, num_tools=0.632, failed=0.447, completion_tokens=39.2]validation:  80%|███████▉  | 319/400 [00:51<00:17,  4.74it/s, reward=-1.43, num_turns=1.63, num_tools=0.633, failed=0.445, completion_tokens=39.2]validation:  80%|████████  | 320/400 [00:51<00:16,  4.74it/s, reward=-1.43, num_turns=1.63, num_tools=0.634, failed=0.444, completion_tokens=39.2]validation:  80%|████████  | 321/400 [00:51<00:16,  4.74it/s, reward=-1.42, num_turns=1.64, num_tools=0.636, failed=0.442, completion_tokens=39.1]validation:  80%|████████  | 322/400 [00:51<00:16,  4.74it/s, reward=-1.41, num_turns=1.64, num_tools=0.637, failed=0.441, completion_tokens=39.1]validation:  81%|████████  | 323/400 [00:51<00:16,  4.74it/s, reward=-1.4, num_turns=1.64, num_tools=0.638, failed=0.44, completion_tokens=39]    validation:  81%|████████  | 324/400 [00:51<00:16,  4.74it/s, reward=-1.4, num_turns=1.64, num_tools=0.639, failed=0.438, completion_tokens=38.9]validation:  81%|████████▏ | 325/400 [00:51<00:15,  4.74it/s, reward=-1.4, num_turns=1.64, num_tools=0.64, failed=0.437, completion_tokens=38.9] validation:  82%|████████▏ | 326/400 [00:51<00:15,  4.74it/s, reward=-1.4, num_turns=1.64, num_tools=0.641, failed=0.436, completion_tokens=38.9]validation:  82%|████████▏ | 327/400 [00:51<00:15,  4.74it/s, reward=-1.41, num_turns=1.64, num_tools=0.642, failed=0.434, completion_tokens=38.9]validation:  82%|████████▏ | 328/400 [00:51<00:15,  4.74it/s, reward=-1.41, num_turns=1.64, num_tools=0.643, failed=0.433, completion_tokens=39]  validation:  82%|████████▏ | 329/400 [00:51<00:14,  4.74it/s, reward=-1.41, num_turns=1.64, num_tools=0.644, failed=0.432, completion_tokens=39]validation:  82%|████████▎ | 330/400 [00:51<00:14,  4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.645, failed=0.43, completion_tokens=39] validation:  83%|████████▎ | 331/400 [00:51<00:14,  4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.647, failed=0.429, completion_tokens=39]validation:  83%|████████▎ | 332/400 [00:51<00:14,  4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.648, failed=0.428, completion_tokens=38.9]validation:  83%|████████▎ | 333/400 [00:51<00:14,  4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.649, failed=0.426, completion_tokens=38.9] validation:  84%|████████▎ | 334/400 [00:51<00:13,  4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.647, failed=0.425, completion_tokens=39] validation:  84%|████████▍ | 335/400 [00:51<00:13,  4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.648, failed=0.424, completion_tokens=39.4]validation:  84%|████████▍ | 336/400 [00:51<00:13,  4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.649, failed=0.423, completion_tokens=39.4]validation:  84%|████████▍ | 337/400 [00:51<00:13,  4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.65, failed=0.421, completion_tokens=39.4] validation:  84%|████████▍ | 338/400 [00:51<00:13,  4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.651, failed=0.42, completion_tokens=39.4]validation:  85%|████████▍ | 339/400 [00:51<00:12,  4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.652, failed=0.419, completion_tokens=39.4]validation:  85%|████████▌ | 340/400 [00:51<00:12,  4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.65, failed=0.418, completion_tokens=40.3] validation:  85%|████████▌ | 341/400 [00:51<00:12,  4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.651, failed=0.416, completion_tokens=40.4]validation:  86%|████████▌ | 342/400 [00:51<00:12,  4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.652, failed=0.415, completion_tokens=40.5]validation:  86%|████████▌ | 343/400 [00:51<00:12,  4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.653, failed=0.414, completion_tokens=40.6]validation:  86%|████████▌ | 344/400 [00:51<00:11,  4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.654, failed=0.413, completion_tokens=41]  validation:  86%|████████▋ | 345/400 [00:51<00:11,  4.74it/s, reward=-1.39, num_turns=1.66, num_tools=0.655, failed=0.412, completion_tokens=41.1]validation:  86%|████████▋ | 346/400 [00:51<00:11,  4.74it/s, reward=-1.39, num_turns=1.66, num_tools=0.656, failed=0.41, completion_tokens=41.1] validation:  87%|████████▋ | 347/400 [00:51<00:11,  4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.654, failed=0.409, completion_tokens=41.8]validation:  87%|████████▋ | 348/400 [00:51<00:10,  4.74it/s, reward=-1.39, num_turns=1.66, num_tools=0.655, failed=0.408, completion_tokens=42.1]validation:  87%|████████▋ | 349/400 [00:51<00:10,  4.74it/s, reward=-1.39, num_turns=1.66, num_tools=0.656, failed=0.407, completion_tokens=43.2]validation:  88%|████████▊ | 350/400 [00:51<00:10,  4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.654, failed=0.406, completion_tokens=44.4] validation:  88%|████████▊ | 351/400 [00:51<00:10,  4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.652, failed=0.405, completion_tokens=45.4]validation:  88%|████████▊ | 352/400 [00:51<00:10,  4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.651, failed=0.403, completion_tokens=47.3]validation:  88%|████████▊ | 353/400 [00:51<00:09,  4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.652, failed=0.402, completion_tokens=47.8]validation:  88%|████████▊ | 354/400 [00:51<00:09,  4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.65, failed=0.401, completion_tokens=49.6] validation:  89%|████████▉ | 355/400 [00:51<00:09,  4.74it/s, reward=-1.42, num_turns=1.65, num_tools=0.648, failed=0.4, completion_tokens=51.4] validation:  89%|████████▉ | 356/400 [00:51<00:09,  4.74it/s, reward=-1.42, num_turns=1.65, num_tools=0.646, failed=0.399, completion_tokens=53.2]validation:  89%|████████▉ | 357/400 [00:51<00:09,  4.74it/s, reward=-1.43, num_turns=1.64, num_tools=0.644, failed=0.398, completion_tokens=54.4]validation:  90%|████████▉ | 358/400 [00:51<00:08,  4.74it/s, reward=-1.43, num_turns=1.65, num_tools=0.645, failed=0.397, completion_tokens=54.7]validation:  90%|████████▉ | 359/400 [00:51<00:08,  4.74it/s, reward=-1.42, num_turns=1.65, num_tools=0.646, failed=0.396, completion_tokens=55.1]validation:  90%|█████████ | 360/400 [00:51<00:08,  4.74it/s, reward=-1.43, num_turns=1.64, num_tools=0.644, failed=0.394, completion_tokens=56.4]validation:  90%|█████████ | 361/400 [00:51<00:08,  4.74it/s, reward=-1.43, num_turns=1.64, num_tools=0.643, failed=0.393, completion_tokens=58.2]validation:  90%|█████████ | 362/400 [00:51<00:08,  4.74it/s, reward=-1.44, num_turns=1.64, num_tools=0.641, failed=0.392, completion_tokens=59.9]validation:  91%|█████████ | 363/400 [00:51<00:02, 14.97it/s, reward=-1.44, num_turns=1.64, num_tools=0.641, failed=0.392, completion_tokens=59.9]validation:  91%|█████████ | 363/400 [00:51<00:02, 14.97it/s, reward=-1.44, num_turns=1.64, num_tools=0.642, failed=0.391, completion_tokens=59.8]validation:  91%|█████████ | 364/400 [00:51<00:02, 14.97it/s, reward=-1.43, num_turns=1.64, num_tools=0.643, failed=0.39, completion_tokens=59.6] validation:  91%|█████████▏| 365/400 [00:51<00:02, 14.97it/s, reward=-1.42, num_turns=1.64, num_tools=0.644, failed=0.389, completion_tokens=59.5]validation:  92%|█████████▏| 366/400 [00:51<00:02, 14.97it/s, reward=-1.42, num_turns=1.64, num_tools=0.645, failed=0.388, completion_tokens=59.4]validation:  92%|█████████▏| 367/400 [00:51<00:02, 14.97it/s, reward=-1.41, num_turns=1.65, num_tools=0.646, failed=0.387, completion_tokens=59.3]validation:  92%|█████████▏| 368/400 [00:51<00:02, 14.97it/s, reward=-1.4, num_turns=1.65, num_tools=0.647, failed=0.386, completion_tokens=59.1] validation:  92%|█████████▏| 369/400 [00:51<00:02, 14.97it/s, reward=-1.39, num_turns=1.65, num_tools=0.648, failed=0.385, completion_tokens=59] validation:  92%|█████████▎| 370/400 [00:51<00:02, 14.97it/s, reward=-1.4, num_turns=1.65, num_tools=0.649, failed=0.384, completion_tokens=58.9]validation:  93%|█████████▎| 371/400 [00:51<00:01, 14.97it/s, reward=-1.38, num_turns=1.65, num_tools=0.65, failed=0.383, completion_tokens=58.7]validation:  93%|█████████▎| 372/400 [00:51<00:01, 14.97it/s, reward=-1.37, num_turns=1.65, num_tools=0.651, failed=0.382, completion_tokens=58.6]validation:  93%|█████████▎| 373/400 [00:51<00:01, 14.97it/s, reward=-1.36, num_turns=1.65, num_tools=0.651, failed=0.381, completion_tokens=58.5]validation:  94%|█████████▎| 374/400 [00:51<00:01, 14.97it/s, reward=-1.35, num_turns=1.65, num_tools=0.652, failed=0.38, completion_tokens=58.3] validation:  94%|█████████▍| 375/400 [00:51<00:01, 14.97it/s, reward=-1.34, num_turns=1.65, num_tools=0.653, failed=0.379, completion_tokens=58.2]validation:  94%|█████████▍| 376/400 [00:51<00:01, 14.97it/s, reward=-1.33, num_turns=1.65, num_tools=0.654, failed=0.378, completion_tokens=58.1]validation:  94%|█████████▍| 377/400 [00:51<00:01, 14.97it/s, reward=-1.33, num_turns=1.66, num_tools=0.655, failed=0.377, completion_tokens=58]  validation:  94%|█████████▍| 378/400 [00:51<00:01, 14.97it/s, reward=-1.31, num_turns=1.66, num_tools=0.656, failed=0.376, completion_tokens=57.9]validation:  95%|█████████▍| 379/400 [00:51<00:01, 14.97it/s, reward=-1.32, num_turns=1.66, num_tools=0.657, failed=0.375, completion_tokens=57.8]validation:  95%|█████████▌| 380/400 [00:51<00:01, 14.97it/s, reward=-1.3, num_turns=1.66, num_tools=0.658, failed=0.374, completion_tokens=57.7] validation:  95%|█████████▌| 381/400 [00:51<00:01, 14.97it/s, reward=-1.31, num_turns=1.66, num_tools=0.659, failed=0.373, completion_tokens=57.6]validation:  96%|█████████▌| 382/400 [00:51<00:01, 14.97it/s, reward=-1.31, num_turns=1.66, num_tools=0.66, failed=0.372, completion_tokens=57.5] validation:  96%|█████████▌| 383/400 [00:51<00:01, 14.97it/s, reward=-1.31, num_turns=1.66, num_tools=0.661, failed=0.371, completion_tokens=57.4]validation:  96%|█████████▌| 384/400 [00:51<00:01, 14.97it/s, reward=-1.3, num_turns=1.66, num_tools=0.661, failed=0.37, completion_tokens=57.2]  validation:  96%|█████████▋| 385/400 [00:51<00:01, 14.97it/s, reward=-1.3, num_turns=1.66, num_tools=0.662, failed=0.369, completion_tokens=57.1]validation:  96%|█████████▋| 386/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.66, num_tools=0.662, failed=0.369, completion_tokens=57.1]validation:  96%|█████████▋| 386/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.66, num_tools=0.663, failed=0.368, completion_tokens=57.1]validation:  97%|█████████▋| 387/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.66, num_tools=0.664, failed=0.367, completion_tokens=57]  validation:  97%|█████████▋| 388/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.66, num_tools=0.665, failed=0.366, completion_tokens=56.9]validation:  97%|█████████▋| 389/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.666, failed=0.365, completion_tokens=56.8]validation:  98%|█████████▊| 390/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.667, failed=0.364, completion_tokens=56.8]validation:  98%|█████████▊| 391/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.668, failed=0.363, completion_tokens=56.7]validation:  98%|█████████▊| 392/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.67, num_tools=0.668, failed=0.362, completion_tokens=56.7] validation:  98%|█████████▊| 393/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.672, failed=0.361, completion_tokens=56.8]validation:  98%|█████████▊| 394/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.673, failed=0.36, completion_tokens=56.8] validation:  99%|█████████▉| 395/400 [00:52<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.673, failed=0.359, completion_tokens=56.8]validation:  99%|█████████▉| 396/400 [00:52<00:00, 19.34it/s, reward=-1.3, num_turns=1.67, num_tools=0.674, failed=0.359, completion_tokens=56.9] validation:  99%|█████████▉| 397/400 [00:52<00:00, 19.34it/s, reward=-1.3, num_turns=1.67, num_tools=0.675, failed=0.358, completion_tokens=57.1]validation: 100%|█████████▉| 398/400 [00:52<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.676, failed=0.357, completion_tokens=57.2]validation: 100%|█████████▉| 399/400 [00:52<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.677, failed=0.356, completion_tokens=57.7]validation: 100%|██████████| 400/400 [00:52<00:00, 19.34it/s, reward=-1.3, num_turns=1.68, num_tools=0.677, failed=0.355, completion_tokens=58.1] validation: 100%|██████████| 400/400 [00:52<00:00,  7.55it/s, reward=-1.3, num_turns=1.68, num_tools=0.677, failed=0.355, completion_tokens=58.1]
+  Val avg reward: -1.295
+
+============================================================
+Step 6/50
+============================================================
+step 6:   0%|          | 0/32 [00:00<?, ?it/s]step 6:   3%|▎         | 1/32 [00:01<00:40,  1.29s/it]step 6:   3%|▎         | 1/32 [00:01<00:40,  1.29s/it, reward=2.5, num_turns=2, num_tools=1, failed=0, completion_tokens=17.5]step 6:   6%|▋         | 2/32 [00:01<00:38,  1.29s/it, reward=0.25, num_turns=2, num_tools=1, failed=0, completion_tokens=20] step 6:   9%|▉         | 3/32 [00:01<00:37,  1.29s/it, reward=-0.5, num_turns=2, num_tools=1, failed=0, completion_tokens=20.2]step 6:  12%|█▎        | 4/32 [00:01<00:36,  1.29s/it, reward=-0.125, num_turns=2, num_tools=1, failed=0, completion_tokens=21.1]step 6:  16%|█▌        | 5/32 [00:01<00:34,  1.29s/it, reward=0.5, num_turns=2, num_tools=1, failed=0, completion_tokens=22.2]   step 6:  19%|█▉        | 6/32 [00:01<00:33,  1.29s/it, reward=0.833, num_turns=2, num_tools=1, failed=0, completion_tokens=22.9]step 6:  22%|██▏       | 7/32 [00:01<00:32,  1.29s/it, reward=1.14, num_turns=2, num_tools=1, failed=0, completion_tokens=23.6] step 6:  25%|██▌       | 8/32 [00:01<00:03,  7.66it/s, reward=1.14, num_turns=2, num_tools=1, failed=0, completion_tokens=23.6]step 6:  25%|██▌       | 8/32 [00:01<00:03,  7.66it/s, reward=1.5, num_turns=2, num_tools=1, failed=0, completion_tokens=23.4] step 6:  28%|██▊       | 9/32 [00:01<00:03,  7.66it/s, reward=1.78, num_turns=2, num_tools=1, failed=0, completion_tokens=23.2]step 6:  31%|███▏      | 10/32 [00:01<00:02,  7.66it/s, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=23.1]  step 6:  34%|███▍      | 11/32 [00:01<00:02,  7.66it/s, reward=2.18, num_turns=2, num_tools=1, failed=0, completion_tokens=23.1]step 6:  38%|███▊      | 12/32 [00:01<00:02,  7.66it/s, reward=2.33, num_turns=2, num_tools=1, failed=0, completion_tokens=23.1]step 6:  41%|████      | 13/32 [00:01<00:02,  7.66it/s, reward=2.46, num_turns=2, num_tools=1, failed=0, completion_tokens=23.2]step 6:  44%|████▍     | 14/32 [00:01<00:02,  7.66it/s, reward=2.46, num_turns=2, num_tools=1, failed=0, completion_tokens=23.8]step 6:  47%|████▋     | 15/32 [00:01<00:02,  7.66it/s, reward=2.17, num_turns=2, num_tools=1, failed=0, completion_tokens=24.3]step 6:  50%|█████     | 16/32 [00:01<00:02,  7.66it/s, reward=2.19, num_turns=2, num_tools=1, failed=0, completion_tokens=24.9]step 6:  53%|█████▎    | 17/32 [00:01<00:01,  7.66it/s, reward=2.21, num_turns=2, num_tools=1, failed=0, completion_tokens=25.6]step 6:  56%|█████▋    | 18/32 [00:01<00:01,  7.66it/s, reward=1.97, num_turns=2, num_tools=1, failed=0, completion_tokens=26.3]step 6:  59%|█████▉    | 19/32 [00:01<00:00, 20.30it/s, reward=1.97, num_turns=2, num_tools=1, failed=0, completion_tokens=26.3]step 6:  59%|█████▉    | 19/32 [00:01<00:00, 20.30it/s, reward=1.76, num_turns=2, num_tools=1, failed=0, completion_tokens=26.9]step 6:  62%|██████▎   | 20/32 [00:01<00:00, 20.30it/s, reward=1.52, num_turns=1.95, num_tools=0.95, failed=0, completion_tokens=30.8]step 6:  66%|██████▌   | 21/32 [00:01<00:00, 20.30it/s, reward=1.36, num_turns=1.95, num_tools=0.952, failed=0, completion_tokens=31.5]step 6:  69%|██████▉   | 22/32 [00:01<00:00, 20.30it/s, reward=1.41, num_turns=1.95, num_tools=0.955, failed=0, completion_tokens=32.2]step 6:  72%|███████▏  | 23/32 [00:01<00:00, 20.30it/s, reward=1.26, num_turns=1.96, num_tools=0.957, failed=0, completion_tokens=32.9]step 6:  75%|███████▌  | 24/32 [00:01<00:00, 20.30it/s, reward=1.31, num_turns=1.96, num_tools=0.958, failed=0, completion_tokens=32.7]step 6:  78%|███████▊  | 25/32 [00:01<00:00, 20.30it/s, reward=1.18, num_turns=1.96, num_tools=0.96, failed=0, completion_tokens=32.9] step 6:  81%|████████▏ | 26/32 [00:01<00:00, 20.30it/s, reward=1.06, num_turns=1.96, num_tools=0.962, failed=0, completion_tokens=34.2]step 6:  84%|████████▍ | 27/32 [00:01<00:00, 20.12it/s, reward=1.06, num_turns=1.96, num_tools=0.962, failed=0, completion_tokens=34.2]step 6:  84%|████████▍ | 27/32 [00:01<00:00, 20.12it/s, reward=1.08, num_turns=1.96, num_tools=0.963, failed=0, completion_tokens=36]  step 6:  88%|████████▊ | 28/32 [00:02<00:00, 20.12it/s, reward=0.97, num_turns=1.96, num_tools=0.964, failed=0, completion_tokens=38.6]step 6:  91%|█████████ | 29/32 [00:02<00:00, 20.12it/s, reward=0.868, num_turns=1.97, num_tools=0.966, failed=0, completion_tokens=43.2]step 6:  94%|█████████▍| 30/32 [00:02<00:00, 20.12it/s, reward=0.739, num_turns=1.93, num_tools=0.933, failed=0, completion_tokens=54.4]step 6:  97%|█████████▋| 31/32 [00:02<00:00, 20.12it/s, reward=0.618, num_turns=1.9, num_tools=0.903, failed=0, completion_tokens=64]   step 6: 100%|██████████| 32/32 [00:03<00:00, 20.12it/s, reward=0.505, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=78]step 6: 100%|██████████| 32/32 [00:03<00:00,  9.77it/s, reward=0.505, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=78]
+  group 0: mean=+1.94 std=1.488 min=-2.0 max=+2.5 | Which country has a larger population, France or I
+  group 1: mean=-1.79 std=1.374 min=-3.0 max=+1.7 | What is the population of Germany divided by its a
+  group 2: mean=-1.88 std=1.166 min=-3.0 max=+1.0 | What is the population of Japan divided by its are
+  group 3: mean=+3.75 std=0.433 min=+3.0 max=+4.0 | What's the weather like in Dubai?
+  Avg reward: 0.505 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0004
+Packed 32 trajectories into 4 sequences of length 2048
+train:   0%|          | 0/4 [00:00<?, ?it/s]train:  25%|██▌       | 1/4 [00:02<00:06,  2.13s/it]train:  25%|██▌       | 1/4 [00:02<00:06,  2.13s/it, loss/train=0.0339, loss/grad_norm=4.55, loss/learning_rate=5e-5, loss/entropy=0.78]train:  50%|█████     | 2/4 [00:02<00:02,  1.08s/it, loss/train=0.0339, loss/grad_norm=4.55, loss/learning_rate=5e-5, loss/entropy=0.78]train:  50%|█████     | 2/4 [00:02<00:02,  1.08s/it, loss/train=-0.308, loss/grad_norm=2.06, loss/learning_rate=5e-5, loss/entropy=1.54]train:  75%|███████▌  | 3/4 [00:02<00:00,  1.34it/s, loss/train=-0.308, loss/grad_norm=2.06, loss/learning_rate=5e-5, loss/entropy=1.54]train:  75%|███████▌  | 3/4 [00:02<00:00,  1.34it/s, loss/train=0.463, loss/grad_norm=2.5, loss/learning_rate=5e-5, loss/entropy=1.18]  train: 100%|██████████| 4/4 [00:03<00:00,  1.71it/s, loss/train=0.463, loss/grad_norm=2.5, loss/learning_rate=5e-5, loss/entropy=1.18]train: 100%|██████████| 4/4 [00:03<00:00,  1.71it/s, loss/train=-0.826, loss/grad_norm=0.638, loss/learning_rate=5e-5, loss/entropy=0.878](APIServer pid=12946) Adapters before cleanup: ['default']
+(APIServer pid=12946) Keeping active adapter(s): ['default']
+(APIServer pid=12946) Adapters after cleanup: ['default']
+train: 100%|██████████| 4/4 [00:31<00:00,  7.79s/it, loss/train=-0.826, loss/grad_norm=0.638, loss/learning_rate=5e-5, loss/entropy=0.878]
+
+============================================================
+Step 7/50
+============================================================
+step 7:   0%|          | 0/32 [00:00<?, ?it/s]step 7:   3%|▎         | 1/32 [00:01<00:37,  1.20s/it]step 7:   3%|▎         | 1/32 [00:01<00:37,  1.20s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=34]step 7:   6%|▋         | 2/32 [00:01<00:35,  1.20s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=38.5]step 7:   9%|▉         | 3/32 [00:01<00:10,  2.70it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=38.5]step 7:   9%|▉         | 3/32 [00:01<00:10,  2.70it/s, reward=-1.17, num_turns=1.33, num_tools=0.333, failed=0, completion_tokens=30.3]step 7:  12%|█▎        | 4/32 [00:01<00:10,  2.70it/s, reward=-1.62, num_turns=1.25, num_tools=0.25, failed=0, completion_tokens=38]   step 7:  16%|█▌        | 5/32 [00:01<00:09,  2.70it/s, reward=-0.8, num_turns=1.4, num_tools=0.4, failed=0, completion_tokens=34]   step 7:  19%|█▉        | 6/32 [00:01<00:09,  2.70it/s, reward=-1, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=31.9]step 7:  22%|██▏       | 7/32 [00:01<00:09,  2.70it/s, reward=-0.5, num_turns=1.57, num_tools=0.571, failed=0, completion_tokens=30.6]step 7:  25%|██▌       | 8/32 [00:01<00:08,  2.70it/s, reward=-0.125, num_turns=1.62, num_tools=0.625, failed=0, completion_tokens=29.9]step 7:  28%|██▊       | 9/32 [00:01<00:02,  9.80it/s, reward=-0.125, num_turns=1.62, num_tools=0.625, failed=0, completion_tokens=29.9]step 7:  28%|██▊       | 9/32 [00:01<00:02,  9.80it/s, reward=-0.333, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=29.5]step 7:  31%|███▏      | 10/32 [00:01<00:02,  9.80it/s, reward=-0.5, num_turns=1.7, num_tools=0.7, failed=0, completion_tokens=29.3]    step 7:  34%|███▍      | 11/32 [00:01<00:02,  9.80it/s, reward=-0.227, num_turns=1.73, num_tools=0.727, failed=0, completion_tokens=29.1]step 7:  38%|███▊      | 12/32 [00:01<00:02,  9.80it/s, reward=-0.375, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=29.2] step 7:  41%|████      | 13/32 [00:01<00:01,  9.80it/s, reward=-0.154, num_turns=1.77, num_tools=0.769, failed=0, completion_tokens=29.3]step 7:  44%|████▍     | 14/32 [00:01<00:01,  9.80it/s, reward=0.0357, num_turns=1.79, num_tools=0.786, failed=0, completion_tokens=29.5]step 7:  47%|████▋     | 15/32 [00:01<00:01,  9.80it/s, reward=-0.1, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=30.1]     step 7:  50%|█████     | 16/32 [00:01<00:01,  9.80it/s, reward=-0.219, num_turns=1.81, num_tools=0.812, failed=0, completion_tokens=30.3]step 7:  53%|█████▎    | 17/32 [00:01<00:01,  9.80it/s, reward=-0.324, num_turns=1.82, num_tools=0.824, failed=0, completion_tokens=30.6]step 7:  56%|█████▋    | 18/32 [00:01<00:00, 20.22it/s, reward=-0.324, num_turns=1.82, num_tools=0.824, failed=0, completion_tokens=30.6]step 7:  56%|█████▋    | 18/32 [00:01<00:00, 20.22it/s, reward=-0.167, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=31.1]step 7:  59%|█████▉    | 19/32 [00:01<00:00, 20.22it/s, reward=-0.263, num_turns=1.84, num_tools=0.842, failed=0, completion_tokens=31.8]step 7:  62%|██████▎   | 20/32 [00:01<00:00, 20.22it/s, reward=-0.35, num_turns=1.85, num_tools=0.85, failed=0, completion_tokens=32.4]  step 7:  66%|██████▌   | 21/32 [00:01<00:00, 20.22it/s, reward=-0.429, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=33.1]step 7:  69%|██████▉   | 22/32 [00:01<00:00, 20.22it/s, reward=-0.341, num_turns=1.86, num_tools=0.864, failed=0, completion_tokens=33.8]step 7:  72%|███████▏  | 23/32 [00:01<00:00, 20.22it/s, reward=-0.217, num_turns=1.87, num_tools=0.87, failed=0, completion_tokens=33.7] step 7:  75%|███████▌  | 24/32 [00:01<00:00, 25.32it/s, reward=-0.217, num_turns=1.87, num_tools=0.87, failed=0, completion_tokens=33.7]step 7:  75%|███████▌  | 24/32 [00:01<00:00, 25.32it/s, reward=-0.125, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=34.9]step 7:  78%|███████▊  | 25/32 [00:01<00:00, 25.32it/s, reward=-0.02, num_turns=1.88, num_tools=0.88, failed=0, completion_tokens=36]    step 7:  81%|████████▏ | 26/32 [00:01<00:00, 25.32it/s, reward=0.0192, num_turns=1.88, num_tools=0.885, failed=0, completion_tokens=37.1]step 7:  84%|████████▍ | 27/32 [00:01<00:00, 25.32it/s, reward=0.111, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=36.8] step 7:  88%|████████▊ | 28/32 [00:01<00:00, 25.32it/s, reward=0.0357, num_turns=1.89, num_tools=0.893, failed=0, completion_tokens=36.7]step 7:  91%|█████████ | 29/32 [00:02<00:00, 22.16it/s, reward=0.0357, num_turns=1.89, num_tools=0.893, failed=0, completion_tokens=36.7]step 7:  91%|█████████ | 29/32 [00:02<00:00, 22.16it/s, reward=-0.0345, num_turns=1.9, num_tools=0.897, failed=0, completion_tokens=38.4]step 7:  94%|█████████▍| 30/32 [00:02<00:00, 22.16it/s, reward=-0.133, num_turns=1.87, num_tools=0.867, failed=0, completion_tokens=48.6]step 7:  97%|█████████▋| 31/32 [00:02<00:00, 22.16it/s, reward=-0.226, num_turns=1.84, num_tools=0.839, failed=0, completion_tokens=58.2]step 7: 100%|██████████| 32/32 [00:02<00:00, 22.16it/s, reward=-0.141, num_turns=1.84, num_tools=0.844, failed=0, completion_tokens=61.6]step 7: 100%|██████████| 32/32 [00:02<00:00, 11.90it/s, reward=-0.141, num_turns=1.84, num_tools=0.844, failed=0, completion_tokens=61.6]
+  group 0: mean=+1.12 std=2.382 min=-3.0 max=+2.5 | Which country has a larger population, Japan or Br
+  group 1: mean=-0.94 std=1.944 min=-3.0 max=+2.0 | What is the distance from Earth to the Sun in km i
+  group 2: mean=+1.38 std=1.949 min=-2.0 max=+2.5 | Which country has a larger population, France or B
+  group 3: mean=-2.12 std=0.331 min=-3.0 max=-2.0 | What is the GDP of Germany?
+  Avg reward: -0.141 | Avg tools/rollout: 0.8 | groups with variance: 4/4
+Packed 32 trajectories into 3 sequences of length 2048
+train:   0%|          | 0/3 [00:00<?, ?it/s]train:  33%|███▎      | 1/3 [00:02<00:04,  2.11s/it]train:  33%|███▎      | 1/3 [00:02<00:04,  2.11s/it, loss/train=-0.371, loss/grad_norm=3.5, loss/learning_rate=5e-5, loss/entropy=1.56]train:  67%|██████▋   | 2/3 [00:02<00:01,  1.07s/it, loss/train=-0.371, loss/grad_norm=3.5, loss/learning_rate=5e-5, loss/entropy=1.56]train:  67%|██████▋   | 2/3 [00:02<00:01,  1.07s/it, loss/train=0.0586, loss/grad_norm=0.753, loss/learning_rate=5e-5, loss/entropy=0.649]train: 100%|██████████| 3/3 [00:02<00:00,  1.34it/s, loss/train=0.0586, loss/grad_norm=0.753, loss/learning_rate=5e-5, loss/entropy=0.649]train: 100%|██████████| 3/3 [00:02<00:00,  1.34it/s, loss/train=-0.702, loss/grad_norm=2.84, loss/learning_rate=5e-5, loss/entropy=1.2]   (APIServer pid=12946) Adapters before cleanup: ['default']
+(APIServer pid=12946) Keeping active adapter(s): ['default']
+(APIServer pid=12946) Adapters after cleanup: ['default']
+train: 100%|██████████| 3/3 [00:30<00:00, 10.26s/it, loss/train=-0.702, loss/grad_norm=2.84, loss/learning_rate=5e-5, loss/entropy=1.2]
+
+============================================================
+Step 8/50
+============================================================
+step 8:   0%|          | 0/32 [00:00<?, ?it/s]step 8:   3%|▎         | 1/32 [00:00<00:30,  1.00it/s]step 8:   3%|▎         | 1/32 [00:00<00:30,  1.00it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=1]step 8:   6%|▋         | 2/32 [00:01<00:16,  1.81it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=1]step 8:   6%|▋         | 2/32 [00:01<00:16,  1.81it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=19.5]step 8:   9%|▉         | 3/32 [00:01<00:16,  1.81it/s, reward=-1.33, num_turns=1.33, num_tools=0.333, failed=0, completion_tokens=19.3]step 8:  12%|█▎        | 4/32 [00:01<00:15,  1.81it/s, reward=-0.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=19.4]    step 8:  16%|█▌        | 5/32 [00:01<00:04,  5.45it/s, reward=-0.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=19.4]step 8:  16%|█▌        | 5/32 [00:01<00:04,  5.45it/s, reward=-0.1, num_turns=1.6, num_tools=0.6, failed=0, completion_tokens=19.7]step 8:  19%|█▉        | 6/32 [00:01<00:04,  5.45it/s, reward=0.167, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=20.2]step 8:  22%|██▏       | 7/32 [00:01<00:04,  5.45it/s, reward=0.429, num_turns=1.71, num_tools=0.714, failed=0, completion_tokens=21]  step 8:  25%|██▌       | 8/32 [00:01<00:04,  5.45it/s, reward=0.625, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=21.3]step 8:  28%|██▊       | 9/32 [00:01<00:04,  5.45it/s, reward=0.778, num_turns=1.78, num_tools=0.778, failed=0, completion_tokens=21.6]step 8:  31%|███▏      | 10/32 [00:01<00:04,  5.45it/s, reward=0.5, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=22.1]    step 8:  34%|███▍      | 11/32 [00:01<00:03,  5.45it/s, reward=0.5, num_turns=1.82, num_tools=0.818, failed=0, completion_tokens=22.5]step 8:  38%|███▊      | 12/32 [00:01<00:03,  5.45it/s, reward=0.292, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=23]step 8:  41%|████      | 13/32 [00:01<00:03,  5.45it/s, reward=0.115, num_turns=1.85, num_tools=0.846, failed=0, completion_tokens=23.4]step 8:  44%|████▍     | 14/32 [00:01<00:03,  5.45it/s, reward=-0.107, num_turns=1.79, num_tools=0.786, failed=0, completion_tokens=27.2]step 8:  47%|████▋     | 15/32 [00:01<00:03,  5.45it/s, reward=-0.233, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=27.3]   step 8:  50%|█████     | 16/32 [00:01<00:00, 21.15it/s, reward=-0.233, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=27.3]step 8:  50%|█████     | 16/32 [00:01<00:00, 21.15it/s, reward=-0.344, num_turns=1.81, num_tools=0.812, failed=0, completion_tokens=27]step 8:  53%|█████▎    | 17/32 [00:01<00:00, 21.15it/s, reward=-0.441, num_turns=1.82, num_tools=0.824, failed=0, completion_tokens=27.6]step 8:  56%|█████▋    | 18/32 [00:01<00:00, 21.15it/s, reward=-0.528, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=28.2]step 8:  59%|█████▉    | 19/32 [00:01<00:00, 21.15it/s, reward=-0.474, num_turns=1.84, num_tools=0.842, failed=0, completion_tokens=28]  step 8:  62%|██████▎   | 20/32 [00:01<00:00, 21.15it/s, reward=-0.425, num_turns=1.85, num_tools=0.85, failed=0, completion_tokens=27.9]step 8:  66%|██████▌   | 21/32 [00:01<00:00, 21.15it/s, reward=-0.381, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=27.9]step 8:  69%|██████▉   | 22/32 [00:01<00:00, 21.15it/s, reward=-0.341, num_turns=1.86, num_tools=0.864, failed=0, completion_tokens=27.9]step 8:  72%|███████▏  | 23/32 [00:01<00:00, 21.15it/s, reward=-0.413, num_turns=1.87, num_tools=0.87, failed=0, completion_tokens=28.5] step 8:  75%|███████▌  | 24/32 [00:01<00:00, 29.53it/s, reward=-0.413, num_turns=1.87, num_tools=0.87, failed=0, completion_tokens=28.5]step 8:  75%|███████▌  | 24/32 [00:01<00:00, 29.53it/s, reward=-0.479, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=29.2]step 8:  78%|███████▊  | 25/32 [00:01<00:00, 29.53it/s, reward=-0.54, num_turns=1.88, num_tools=0.88, failed=0, completion_tokens=30.1]  step 8:  81%|████████▏ | 26/32 [00:01<00:00, 29.53it/s, reward=-0.596, num_turns=1.88, num_tools=0.885, failed=0, completion_tokens=30.2]step 8:  84%|████████▍ | 27/32 [00:01<00:00, 29.53it/s, reward=-0.648, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=31.4]step 8:  88%|████████▊ | 28/32 [00:01<00:00, 29.53it/s, reward=-0.696, num_turns=1.89, num_tools=0.893, failed=0, completion_tokens=32.6]step 8:  91%|█████████ | 29/32 [00:01<00:00, 23.73it/s, reward=-0.696, num_turns=1.89, num_tools=0.893, failed=0, completion_tokens=32.6]step 8:  91%|█████████ | 29/32 [00:01<00:00, 23.73it/s, reward=-0.741, num_turns=1.9, num_tools=0.897, failed=0, completion_tokens=33.9] step 8:  94%|█████████▍| 30/32 [00:02<00:00, 23.73it/s, reward=-0.783, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=33.9]  step 8:  97%|█████████▋| 31/32 [00:02<00:00, 23.73it/s, reward=-0.855, num_turns=1.87, num_tools=0.871, failed=0, completion_tokens=41.5]step 8: 100%|██████████| 32/32 [00:02<00:00, 23.73it/s, reward=-0.797, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=45.8]step 8: 100%|██████████| 32/32 [00:02<00:00, 11.77it/s, reward=-0.797, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=45.8]
+  group 0: mean=+0.94 std=0.583 min=+0.5 max=+2.0 | What is the temperature in Tokyo in Fahrenheit?
+  group 1: mean=-0.75 std=2.165 min=-3.0 max=+2.0 | Which is hotter right now, Paris or Mumbai?
+  group 2: mean=-1.50 std=1.323 min=-2.0 max=+2.0 | Which is hotter right now, Tokyo or Dubai?
+  group 3: mean=-1.88 std=1.166 min=-3.0 max=+1.0 | How old was Guido van Rossum in 2020?
+  Avg reward: -0.797 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0006
+Packed 32 trajectories into 3 sequences of length 2048
+train:   0%|          | 0/3 [00:00<?, ?it/s]train:  33%|███▎      | 1/3 [00:02<00:04,  2.06s/it]train:  33%|███▎      | 1/3 [00:02<00:04,  2.06s/it, loss/train=-0.724, loss/grad_norm=3.31, loss/learning_rate=5e-5, loss/entropy=0.82]train:  67%|██████▋   | 2/3 [00:02<00:01,  1.06s/it, loss/train=-0.724, loss/grad_norm=3.31, loss/learning_rate=5e-5, loss/entropy=0.82]train:  67%|██████▋   | 2/3 [00:02<00:01,  1.06s/it, loss/train=-3.07, loss/grad_norm=38, loss/learning_rate=5e-5, loss/entropy=0.91]   train: 100%|██████████| 3/3 [00:02<00:00,  1.36it/s, loss/train=-3.07, loss/grad_norm=38, loss/learning_rate=5e-5, loss/entropy=0.91]train: 100%|██████████| 3/3 [00:02<00:00,  1.36it/s, loss/train=1.03, loss/grad_norm=0.954, loss/learning_rate=5e-5, loss/entropy=1] (APIServer pid=12946) Adapters before cleanup: ['default']
+(APIServer pid=12946) Keeping active adapter(s): ['default']
+(APIServer pid=12946) Adapters after cleanup: ['default']
+[2026-04-13 02:28:34] ERROR base_events.py:1821: Task exception was never retrieved
+future: <Task finished name='Task-2' coro=<LocalBackend._monitor_openai_server() done, defined at /usr/local/lib/python3.12/dist-packages/art/local/backend.py:416> exception=NotFoundError("Error code: 404 - {'error': {'message': 'The model `qwen-0.5b-tool-agent@8` does not exist.', 'type': 'NotFoundError', 'param': 'model', 'code': 404}}")>
+Traceback (most recent call last):
+  File "/usr/lib/python3.12/asyncio/tasks.py", line 314, in __step_run_and_handle_result
+    result = coro.send(None)
+             ^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 468, in _monitor_openai_server
+    raise e
+  File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 453, in _monitor_openai_server
+    await openai_client.completions.create(
+  File "/usr/local/lib/python3.12/dist-packages/openai/resources/completions.py", line 1109, in create
+    return await self._post(
+           ^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1884, in post
+    return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1669, in request
+    raise self._make_status_error_from_response(err.response) from None
+openai.NotFoundError: Error code: 404 - {'error': {'message': 'The model `qwen-0.5b-tool-agent@8` does not exist.', 'type': 'NotFoundError', 'param': 'model', 'code': 404}}
+Traceback (most recent call last):
+  File "<frozen runpy>", line 198, in _run_module_as_main
+  File "<frozen runpy>", line 88, in _run_code
+  File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 171, in <module>
+    main()
+  File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 167, in main
+    asyncio.run(train(**kwargs))
+  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 30, in run
+    return loop.run_until_complete(task)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 98, in run_until_complete
+    return f.result()
+           ^^^^^^^^^^
+  File "/usr/lib/python3.12/asyncio/futures.py", line 203, in result
+    raise self._exception.with_traceback(self._exception_tb)
+  File "/usr/lib/python3.12/asyncio/tasks.py", line 316, in __step_run_and_handle_result
+    result = coro.throw(exc)
+             ^^^^^^^^^^^^^^^
+  File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 113, in train
+    result = await backend.train(model, train_groups, learning_rate=learning_rate)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 644, in train
+    async for metrics in self._train_model(
+  File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 783, in _train_model
+    async for result in service.train(
+  File "/usr/local/lib/python3.12/dist-packages/mp_actors/move.py", line 226, in async_gen_wrapper
+    send_value = yield await asyncio.wrap_future(
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/lib/python3.12/asyncio/futures.py", line 287, in __await__
+    yield self  # This tells Task to wait for completion.
+    ^^^^^^^^^^
+  File "/usr/lib/python3.12/asyncio/tasks.py", line 385, in __wakeup
+    future.result()
+  File "/usr/lib/python3.12/asyncio/futures.py", line 203, in result
+    raise self._exception.with_traceback(self._exception_tb)
+RuntimeError: Proxy is closing
+train: 100%|██████████| 3/3 [00:32<00:00, 10.79s/it, loss/train=1.03, loss/grad_norm=0.954, loss/learning_rate=5e-5, loss/entropy=1]
+Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0             Please see https://github.com/pytorch/ao/issues/2919 for more info
+Loaded 200 train, 50 val scenarios
+GRPO config: 4 scenarios/step × 8 rollouts/scenario = 32 rollouts/step
+Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0             Please see https://github.com/pytorch/ao/issues/2919 for more info
+/usr/local/lib/python3.12/dist-packages/art/__init__.py:37: UserWarning: WARNING: Unsloth should be imported before [transformers] to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations.
+
+Please restructure your imports with 'import unsloth' at the top of your file.
+  import unsloth  # noqa: F401
+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+==((====))==  Unsloth 2026.3.3: Fast Qwen2 patching. Transformers: 5.2.0. vLLM: 0.17.0+art1.
+   \\   /|    NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.252 GB. Platform: Linux.
+O^O/ \_/ \    Torch: 2.10.0+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.6.0
+\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.35. FA2 = False]
+ "-____-"     Free license: http://github.com/unslothai/unsloth
+Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
+Loading weights:   0%|          | 0/290 [00:00<?, ?it/s]Loading weights:   0%|          | 1/290 [00:00<00:00, 5714.31it/s, Materializing param=model.embed_tokens.weight]Loading weights:   0%|          | 1/290 [00:00<00:00, 3672.77it/s, Materializing param=model.embed_tokens.weight]Loading weights:   1%|          | 2/290 [00:00<00:54,  5.30it/s, Materializing param=model.embed_tokens.weight]  Loading weights:   1%|          | 2/290 [00:00<00:54,  5.30it/s, Materializing param=model.layers.0.input_layernorm.weight]Loading weights:   1%|          | 2/290 [00:00<00:54,  5.30it/s, Materializing param=model.layers.0.input_layernorm.weight]Loading weights:   1%|          | 3/290 [00:00<00:54,  5.30it/s, Materializing param=model.layers.0.mlp.down_proj.weight]  Loading weights:   1%|          | 3/290 [00:00<00:54,  5.30it/s, Materializing param=model.layers.0.mlp.down_proj.weight]Loading weights:   1%|▏         | 4/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.mlp.gate_proj.weight]Loading weights:   1%|▏         | 4/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.mlp.gate_proj.weight]Loading weights:   2%|▏         | 5/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.mlp.up_proj.weight]  Loading weights:   2%|▏         | 5/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.mlp.up_proj.weight]Loading weights:   2%|▏         | 6/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.post_attention_layernorm.weight]Loading weights:   2%|▏         | 6/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.post_attention_layernorm.weight]Loading weights:   2%|▏         | 7/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.self_attn.k_proj.bias]          Loading weights:   2%|▏         | 7/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.self_attn.k_proj.bias]Loading weights:   3%|▎         | 8/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.self_attn.k_proj.weight]Loading weights:   3%|▎         | 8/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.self_attn.k_proj.weight]Loading weights:   3%|▎         | 9/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.self_attn.o_proj.weight]Loading weights:   3%|▎         | 9/290 [00:00<00:53,  5.30it/s, Materializing param=model.layers.0.self_attn.o_proj.weight]Loading weights:   3%|▎         | 10/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.0.self_attn.q_proj.bias] Loading weights:   3%|▎         | 10/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.0.self_attn.q_proj.bias]Loading weights:   4%|▍         | 11/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.0.self_attn.q_proj.weight]Loading weights:   4%|▍         | 11/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.0.self_attn.q_proj.weight]Loading weights:   4%|▍         | 12/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.0.self_attn.v_proj.bias]  Loading weights:   4%|▍         | 12/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.0.self_attn.v_proj.bias]Loading weights:   4%|▍         | 13/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.0.self_attn.v_proj.weight]Loading weights:   4%|▍         | 13/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.0.self_attn.v_proj.weight]Loading weights:   5%|▍         | 14/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.1.input_layernorm.weight] Loading weights:   5%|▍         | 14/290 [00:00<00:52,  5.30it/s, Materializing param=model.layers.1.input_layernorm.weight]Loading weights:   5%|▌         | 15/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.mlp.down_proj.weight]  Loading weights:   5%|▌         | 15/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.mlp.down_proj.weight]Loading weights:   6%|▌         | 16/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.mlp.gate_proj.weight]Loading weights:   6%|▌         | 16/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.mlp.gate_proj.weight]Loading weights:   6%|▌         | 17/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.mlp.up_proj.weight]  Loading weights:   6%|▌         | 17/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.mlp.up_proj.weight]Loading weights:   6%|▌         | 18/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.post_attention_layernorm.weight]Loading weights:   6%|▌         | 18/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.post_attention_layernorm.weight]Loading weights:   7%|▋         | 19/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.self_attn.k_proj.bias]          Loading weights:   7%|▋         | 19/290 [00:00<00:51,  5.30it/s, Materializing param=model.layers.1.self_attn.k_proj.bias]Loading weights:   7%|▋         | 20/290 [00:00<00:50,  5.30it/s, Materializing param=model.layers.1.self_attn.k_proj.weight]Loading weights:   7%|▋         | 20/290 [00:00<00:50,  5.30it/s, Materializing param=model.layers.1.self_attn.k_proj.weight]Loading weights:   7%|▋         | 21/290 [00:00<00:50,  5.30it/s, Materializing param=model.layers.1.self_attn.o_proj.weight]Loading weights:   7%|▋         | 21/290 [00:00<00:50,  5.30it/s, Materializing param=model.layers.1.self_attn.o_proj.weight]Loading weights:   8%|▊         | 22/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.1.self_attn.o_proj.weight]Loading weights:   8%|▊         | 22/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.1.self_attn.q_proj.bias]  Loading weights:   8%|▊         | 22/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.1.self_attn.q_proj.bias]Loading weights:   8%|▊         | 23/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.1.self_attn.q_proj.weight]Loading weights:   8%|▊         | 23/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.1.self_attn.q_proj.weight]Loading weights:   8%|▊         | 24/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.1.self_attn.v_proj.bias]  Loading weights:   8%|▊         | 24/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.1.self_attn.v_proj.bias]Loading weights:   9%|▊         | 25/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.1.self_attn.v_proj.weight]Loading weights:   9%|▊         | 25/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.1.self_attn.v_proj.weight]Loading weights:   9%|▉         | 26/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.input_layernorm.weight] Loading weights:   9%|▉         | 26/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.input_layernorm.weight]Loading weights:   9%|▉         | 27/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.mlp.down_proj.weight]  Loading weights:   9%|▉         | 27/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.mlp.down_proj.weight]Loading weights:  10%|▉         | 28/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.mlp.gate_proj.weight]Loading weights:  10%|▉         | 28/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.mlp.gate_proj.weight]Loading weights:  10%|█         | 29/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.mlp.up_proj.weight]  Loading weights:  10%|█         | 29/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.mlp.up_proj.weight]Loading weights:  10%|█         | 30/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.post_attention_layernorm.weight]Loading weights:  10%|█         | 30/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.post_attention_layernorm.weight]Loading weights:  11%|█         | 31/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.k_proj.bias]          Loading weights:  11%|█         | 31/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.k_proj.bias]Loading weights:  11%|█         | 32/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.k_proj.weight]Loading weights:  11%|█         | 32/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.k_proj.weight]Loading weights:  11%|█▏        | 33/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.o_proj.weight]Loading weights:  11%|█▏        | 33/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.o_proj.weight]Loading weights:  12%|█▏        | 34/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.q_proj.bias]  Loading weights:  12%|█▏        | 34/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.q_proj.bias]Loading weights:  12%|█▏        | 35/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.q_proj.weight]Loading weights:  12%|█▏        | 35/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.q_proj.weight]Loading weights:  12%|█▏        | 36/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.v_proj.bias]  Loading weights:  12%|█▏        | 36/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.v_proj.bias]Loading weights:  13%|█▎        | 37/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]Loading weights:  13%|█▎        | 37/290 [00:00<00:04, 53.67it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]Loading weights:  13%|█▎        | 38/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]Loading weights:  13%|█▎        | 38/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.input_layernorm.weight] Loading weights:  13%|█▎        | 38/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.input_layernorm.weight]Loading weights:  13%|█▎        | 39/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.mlp.down_proj.weight]  Loading weights:  13%|█▎        | 39/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.mlp.down_proj.weight]Loading weights:  14%|█▍        | 40/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.mlp.gate_proj.weight]Loading weights:  14%|█▍        | 40/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.mlp.gate_proj.weight]Loading weights:  14%|█▍        | 41/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.mlp.up_proj.weight]  Loading weights:  14%|█▍        | 41/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.mlp.up_proj.weight]Loading weights:  14%|█▍        | 42/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.post_attention_layernorm.weight]Loading weights:  14%|█▍        | 42/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.post_attention_layernorm.weight]Loading weights:  15%|█▍        | 43/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.k_proj.bias]          Loading weights:  15%|█▍        | 43/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.k_proj.bias]Loading weights:  15%|█▌        | 44/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.k_proj.weight]Loading weights:  15%|█▌        | 44/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.k_proj.weight]Loading weights:  16%|█▌        | 45/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.o_proj.weight]Loading weights:  16%|█▌        | 45/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.o_proj.weight]Loading weights:  16%|█▌        | 46/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.q_proj.bias]  Loading weights:  16%|█▌        | 46/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.q_proj.bias]Loading weights:  16%|█▌        | 47/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.q_proj.weight]Loading weights:  16%|█▌        | 47/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.q_proj.weight]Loading weights:  17%|█▋        | 48/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.v_proj.bias]  Loading weights:  17%|█▋        | 48/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.v_proj.bias]Loading weights:  17%|█▋        | 49/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.v_proj.weight]Loading weights:  17%|█▋        | 49/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.3.self_attn.v_proj.weight]Loading weights:  17%|█▋        | 50/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.4.input_layernorm.weight] Loading weights:  17%|█▋        | 50/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.4.input_layernorm.weight]Loading weights:  18%|█▊        | 51/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.4.mlp.down_proj.weight]  Loading weights:  18%|█▊        | 51/290 [00:00<00:03, 79.49it/s, Materializing param=model.layers.4.mlp.down_proj.weight]Loading weights:  18%|█▊        | 52/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.mlp.gate_proj.weight]Loading weights:  18%|█▊        | 52/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.mlp.gate_proj.weight]Loading weights:  18%|█▊        | 53/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.mlp.up_proj.weight]  Loading weights:  18%|█▊        | 53/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.mlp.up_proj.weight]Loading weights:  19%|█▊        | 54/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.post_attention_layernorm.weight]Loading weights:  19%|█▊        | 54/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.post_attention_layernorm.weight]Loading weights:  19%|█▉        | 55/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.k_proj.bias]          Loading weights:  19%|█▉        | 55/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.k_proj.bias]Loading weights:  19%|█▉        | 56/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.k_proj.weight]Loading weights:  19%|█▉        | 56/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.k_proj.weight]Loading weights:  20%|█▉        | 57/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.o_proj.weight]Loading weights:  20%|█▉        | 57/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.o_proj.weight]Loading weights:  20%|██        | 58/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.q_proj.bias]  Loading weights:  20%|██        | 58/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.q_proj.bias]Loading weights:  20%|██        | 59/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.q_proj.weight]Loading weights:  20%|██        | 59/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.q_proj.weight]Loading weights:  21%|██        | 60/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.v_proj.bias]  Loading weights:  21%|██        | 60/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.v_proj.bias]Loading weights:  21%|██        | 61/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.v_proj.weight]Loading weights:  21%|██        | 61/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.4.self_attn.v_proj.weight]Loading weights:  21%|██▏       | 62/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.input_layernorm.weight] Loading weights:  21%|██▏       | 62/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.input_layernorm.weight]Loading weights:  22%|██▏       | 63/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.mlp.down_proj.weight]  Loading weights:  22%|██▏       | 63/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.mlp.down_proj.weight]Loading weights:  22%|██▏       | 64/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.mlp.gate_proj.weight]Loading weights:  22%|██▏       | 64/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.mlp.gate_proj.weight]Loading weights:  22%|██▏       | 65/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.mlp.up_proj.weight]  Loading weights:  22%|██▏       | 65/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.mlp.up_proj.weight]Loading weights:  23%|██▎       | 66/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.post_attention_layernorm.weight]Loading weights:  23%|██▎       | 66/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.post_attention_layernorm.weight]Loading weights:  23%|██▎       | 67/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.k_proj.bias]          Loading weights:  23%|██▎       | 67/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.k_proj.bias]Loading weights:  23%|██▎       | 68/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.k_proj.weight]Loading weights:  23%|██▎       | 68/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.k_proj.weight]Loading weights:  24%|██▍       | 69/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.o_proj.weight]Loading weights:  24%|██▍       | 69/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.o_proj.weight]Loading weights:  24%|██▍       | 70/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.q_proj.bias]  Loading weights:  24%|██▍       | 70/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.q_proj.bias]Loading weights:  24%|██▍       | 71/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.q_proj.weight]Loading weights:  24%|██▍       | 71/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.q_proj.weight]Loading weights:  25%|██▍       | 72/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.v_proj.bias]  Loading weights:  25%|██▍       | 72/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.v_proj.bias]Loading weights:  25%|██▌       | 73/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.v_proj.weight]Loading weights:  25%|██▌       | 73/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.5.self_attn.v_proj.weight]Loading weights:  26%|██▌       | 74/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.6.input_layernorm.weight] Loading weights:  26%|██▌       | 74/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.6.input_layernorm.weight]Loading weights:  26%|██▌       | 75/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.6.mlp.down_proj.weight]  Loading weights:  26%|██▌       | 75/290 [00:00<00:02, 79.49it/s, Materializing param=model.layers.6.mlp.down_proj.weight]Loading weights:  26%|██▌       | 76/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.mlp.down_proj.weight]Loading weights:  26%|██▌       | 76/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.mlp.gate_proj.weight]Loading weights:  26%|██▌       | 76/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.mlp.gate_proj.weight]Loading weights:  27%|██▋       | 77/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.mlp.up_proj.weight]  Loading weights:  27%|██▋       | 77/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.mlp.up_proj.weight]Loading weights:  27%|██▋       | 78/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.post_attention_layernorm.weight]Loading weights:  27%|██▋       | 78/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.post_attention_layernorm.weight]Loading weights:  27%|██▋       | 79/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.k_proj.bias]          Loading weights:  27%|██▋       | 79/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.k_proj.bias]Loading weights:  28%|██▊       | 80/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.k_proj.weight]Loading weights:  28%|██▊       | 80/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.k_proj.weight]Loading weights:  28%|██▊       | 81/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.o_proj.weight]Loading weights:  28%|██▊       | 81/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.o_proj.weight]Loading weights:  28%|██▊       | 82/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.q_proj.bias]  Loading weights:  28%|██▊       | 82/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.q_proj.bias]Loading weights:  29%|██▊       | 83/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.q_proj.weight]Loading weights:  29%|██▊       | 83/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.q_proj.weight]Loading weights:  29%|██▉       | 84/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.v_proj.bias]  Loading weights:  29%|██▉       | 84/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.v_proj.bias]Loading weights:  29%|██▉       | 85/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.v_proj.weight]Loading weights:  29%|██▉       | 85/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.6.self_attn.v_proj.weight]Loading weights:  30%|██▉       | 86/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.input_layernorm.weight] Loading weights:  30%|██▉       | 86/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.input_layernorm.weight]Loading weights:  30%|███       | 87/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.mlp.down_proj.weight]  Loading weights:  30%|███       | 87/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.mlp.down_proj.weight]Loading weights:  30%|███       | 88/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.mlp.gate_proj.weight]Loading weights:  30%|███       | 88/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.mlp.gate_proj.weight]Loading weights:  31%|███       | 89/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.mlp.up_proj.weight]  Loading weights:  31%|███       | 89/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.mlp.up_proj.weight]Loading weights:  31%|███       | 90/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.post_attention_layernorm.weight]Loading weights:  31%|███       | 90/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.post_attention_layernorm.weight]Loading weights:  31%|███▏      | 91/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.k_proj.bias]          Loading weights:  31%|███▏      | 91/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.k_proj.bias]Loading weights:  32%|███▏      | 92/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.k_proj.weight]Loading weights:  32%|███▏      | 92/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.k_proj.weight]Loading weights:  32%|███▏      | 93/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.o_proj.weight]Loading weights:  32%|███▏      | 93/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.o_proj.weight]Loading weights:  32%|███▏      | 94/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.q_proj.bias]  Loading weights:  32%|███▏      | 94/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.q_proj.bias]Loading weights:  33%|███▎      | 95/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.q_proj.weight]Loading weights:  33%|███▎      | 95/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.q_proj.weight]Loading weights:  33%|███▎      | 96/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.v_proj.bias]  Loading weights:  33%|███▎      | 96/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.v_proj.bias]Loading weights:  33%|███▎      | 97/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.v_proj.weight]Loading weights:  33%|███▎      | 97/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.7.self_attn.v_proj.weight]Loading weights:  34%|███▍      | 98/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.input_layernorm.weight] Loading weights:  34%|███▍      | 98/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.input_layernorm.weight]Loading weights:  34%|███▍      | 99/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.mlp.down_proj.weight]  Loading weights:  34%|███▍      | 99/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.mlp.down_proj.weight]Loading weights:  34%|███▍      | 100/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.mlp.gate_proj.weight]Loading weights:  34%|███▍      | 100/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.mlp.gate_proj.weight]Loading weights:  35%|███▍      | 101/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.mlp.up_proj.weight]  Loading weights:  35%|███▍      | 101/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.mlp.up_proj.weight]Loading weights:  35%|███▌      | 102/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.post_attention_layernorm.weight]Loading weights:  35%|███▌      | 102/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.post_attention_layernorm.weight]Loading weights:  36%|███▌      | 103/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.self_attn.k_proj.bias]          Loading weights:  36%|███▌      | 103/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.self_attn.k_proj.bias]Loading weights:  36%|███▌      | 104/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.self_attn.k_proj.weight]Loading weights:  36%|███▌      | 104/290 [00:00<00:01, 159.98it/s, Materializing param=model.layers.8.self_attn.k_proj.weight]Loading weights:  36%|███▌      | 105/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.k_proj.weight]Loading weights:  36%|███▌      | 105/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.o_proj.weight]Loading weights:  36%|███▌      | 105/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.o_proj.weight]Loading weights:  37%|███▋      | 106/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.q_proj.bias]  Loading weights:  37%|███▋      | 106/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.q_proj.bias]Loading weights:  37%|███▋      | 107/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.q_proj.weight]Loading weights:  37%|███▋      | 107/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.q_proj.weight]Loading weights:  37%|███▋      | 108/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.v_proj.bias]  Loading weights:  37%|███▋      | 108/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.v_proj.bias]Loading weights:  38%|███▊      | 109/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.v_proj.weight]Loading weights:  38%|███▊      | 109/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.8.self_attn.v_proj.weight]Loading weights:  38%|███▊      | 110/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.input_layernorm.weight] Loading weights:  38%|███▊      | 110/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.input_layernorm.weight]Loading weights:  38%|███▊      | 111/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.mlp.down_proj.weight]  Loading weights:  38%|███▊      | 111/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.mlp.down_proj.weight]Loading weights:  39%|███▊      | 112/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.mlp.gate_proj.weight]Loading weights:  39%|███▊      | 112/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.mlp.gate_proj.weight]Loading weights:  39%|███▉      | 113/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.mlp.up_proj.weight]  Loading weights:  39%|███▉      | 113/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.mlp.up_proj.weight]Loading weights:  39%|███▉      | 114/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.post_attention_layernorm.weight]Loading weights:  39%|███▉      | 114/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.post_attention_layernorm.weight]Loading weights:  40%|███▉      | 115/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.k_proj.bias]          Loading weights:  40%|███▉      | 115/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.k_proj.bias]Loading weights:  40%|████      | 116/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.k_proj.weight]Loading weights:  40%|████      | 116/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.k_proj.weight]Loading weights:  40%|████      | 117/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.o_proj.weight]Loading weights:  40%|████      | 117/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.o_proj.weight]Loading weights:  41%|████      | 118/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.q_proj.bias]  Loading weights:  41%|████      | 118/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.q_proj.bias]Loading weights:  41%|████      | 119/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.q_proj.weight]Loading weights:  41%|████      | 119/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.q_proj.weight]Loading weights:  41%|████▏     | 120/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.v_proj.bias]  Loading weights:  41%|████▏     | 120/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.v_proj.bias]Loading weights:  42%|████▏     | 121/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.v_proj.weight]Loading weights:  42%|████▏     | 121/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.9.self_attn.v_proj.weight]Loading weights:  42%|████▏     | 122/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.input_layernorm.weight]Loading weights:  42%|████▏     | 122/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.input_layernorm.weight]Loading weights:  42%|████▏     | 123/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.mlp.down_proj.weight]  Loading weights:  42%|████▏     | 123/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.mlp.down_proj.weight]Loading weights:  43%|████▎     | 124/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.mlp.gate_proj.weight]Loading weights:  43%|████▎     | 124/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.mlp.gate_proj.weight]Loading weights:  43%|████▎     | 125/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.mlp.up_proj.weight]  Loading weights:  43%|████▎     | 125/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.mlp.up_proj.weight]Loading weights:  43%|████▎     | 126/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.post_attention_layernorm.weight]Loading weights:  43%|████▎     | 126/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.post_attention_layernorm.weight]Loading weights:  44%|████▍     | 127/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.k_proj.bias]          Loading weights:  44%|████▍     | 127/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.k_proj.bias]Loading weights:  44%|████▍     | 128/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.k_proj.weight]Loading weights:  44%|████▍     | 128/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.k_proj.weight]Loading weights:  44%|████▍     | 129/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.o_proj.weight]Loading weights:  44%|████▍     | 129/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.o_proj.weight]Loading weights:  45%|████▍     | 130/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.q_proj.bias]  Loading weights:  45%|████▍     | 130/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.q_proj.bias]Loading weights:  45%|████▌     | 131/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.q_proj.weight]Loading weights:  45%|████▌     | 131/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.q_proj.weight]Loading weights:  46%|████▌     | 132/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.v_proj.bias]  Loading weights:  46%|████▌     | 132/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.v_proj.bias]Loading weights:  46%|████▌     | 133/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.v_proj.weight]Loading weights:  46%|████▌     | 133/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.10.self_attn.v_proj.weight]Loading weights:  46%|████▌     | 134/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.input_layernorm.weight] Loading weights:  46%|████▌     | 134/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.input_layernorm.weight]Loading weights:  47%|████▋     | 135/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.mlp.down_proj.weight]  Loading weights:  47%|████▋     | 135/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.mlp.down_proj.weight]Loading weights:  47%|████▋     | 136/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.mlp.gate_proj.weight]Loading weights:  47%|████▋     | 136/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.mlp.gate_proj.weight]Loading weights:  47%|████▋     | 137/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.mlp.up_proj.weight]  Loading weights:  47%|████▋     | 137/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.mlp.up_proj.weight]Loading weights:  48%|████▊     | 138/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.post_attention_layernorm.weight]Loading weights:  48%|████▊     | 138/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.post_attention_layernorm.weight]Loading weights:  48%|████▊     | 139/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.self_attn.k_proj.bias]          Loading weights:  48%|████▊     | 139/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.self_attn.k_proj.bias]Loading weights:  48%|████▊     | 140/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.self_attn.k_proj.weight]Loading weights:  48%|████▊     | 140/290 [00:00<00:00, 194.21it/s, Materializing param=model.layers.11.self_attn.k_proj.weight]Loading weights:  49%|████▊     | 141/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.k_proj.weight]Loading weights:  49%|████▊     | 141/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.o_proj.weight]Loading weights:  49%|████▊     | 141/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.o_proj.weight]Loading weights:  49%|████▉     | 142/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.q_proj.bias]  Loading weights:  49%|████▉     | 142/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.q_proj.bias]Loading weights:  49%|████▉     | 143/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.q_proj.weight]Loading weights:  49%|████▉     | 143/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.q_proj.weight]Loading weights:  50%|████▉     | 144/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.v_proj.bias]  Loading weights:  50%|████▉     | 144/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.v_proj.bias]Loading weights:  50%|█████     | 145/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.v_proj.weight]Loading weights:  50%|█████     | 145/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.11.self_attn.v_proj.weight]Loading weights:  50%|█████     | 146/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.input_layernorm.weight] Loading weights:  50%|█████     | 146/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.input_layernorm.weight]Loading weights:  51%|█████     | 147/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.mlp.down_proj.weight]  Loading weights:  51%|█████     | 147/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.mlp.down_proj.weight]Loading weights:  51%|█████     | 148/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.mlp.gate_proj.weight]Loading weights:  51%|█████     | 148/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.mlp.gate_proj.weight]Loading weights:  51%|█████▏    | 149/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.mlp.up_proj.weight]  Loading weights:  51%|█████▏    | 149/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.mlp.up_proj.weight]Loading weights:  52%|█████▏    | 150/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.post_attention_layernorm.weight]Loading weights:  52%|█████▏    | 150/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.post_attention_layernorm.weight]Loading weights:  52%|█████▏    | 151/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.k_proj.bias]          Loading weights:  52%|█████▏    | 151/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.k_proj.bias]Loading weights:  52%|█████▏    | 152/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.k_proj.weight]Loading weights:  52%|█████▏    | 152/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.k_proj.weight]Loading weights:  53%|█████▎    | 153/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.o_proj.weight]Loading weights:  53%|█████▎    | 153/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.o_proj.weight]Loading weights:  53%|█████▎    | 154/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.q_proj.bias]  Loading weights:  53%|█████▎    | 154/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.q_proj.bias]Loading weights:  53%|█████▎    | 155/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.q_proj.weight]Loading weights:  53%|█████▎    | 155/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.q_proj.weight]Loading weights:  54%|█████▍    | 156/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.v_proj.bias]  Loading weights:  54%|█████▍    | 156/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.v_proj.bias]Loading weights:  54%|█████▍    | 157/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.v_proj.weight]Loading weights:  54%|█████▍    | 157/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.12.self_attn.v_proj.weight]Loading weights:  54%|█████▍    | 158/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.input_layernorm.weight] Loading weights:  54%|█████▍    | 158/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.input_layernorm.weight]Loading weights:  55%|█████▍    | 159/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.mlp.down_proj.weight]  Loading weights:  55%|█████▍    | 159/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.mlp.down_proj.weight]Loading weights:  55%|█████▌    | 160/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.mlp.gate_proj.weight]Loading weights:  55%|█████▌    | 160/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.mlp.gate_proj.weight]Loading weights:  56%|█████▌    | 161/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.mlp.up_proj.weight]  Loading weights:  56%|█████▌    | 161/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.mlp.up_proj.weight]Loading weights:  56%|█████▌    | 162/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.post_attention_layernorm.weight]Loading weights:  56%|█████▌    | 162/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.post_attention_layernorm.weight]Loading weights:  56%|█████▌    | 163/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.k_proj.bias]          Loading weights:  56%|█████▌    | 163/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.k_proj.bias]Loading weights:  57%|█████▋    | 164/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.k_proj.weight]Loading weights:  57%|█████▋    | 164/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.k_proj.weight]Loading weights:  57%|█████▋    | 165/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.o_proj.weight]Loading weights:  57%|█████▋    | 165/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.o_proj.weight]Loading weights:  57%|█████▋    | 166/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.q_proj.bias]  Loading weights:  57%|█████▋    | 166/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.q_proj.bias]Loading weights:  58%|█████▊    | 167/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.q_proj.weight]Loading weights:  58%|█████▊    | 167/290 [00:00<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.q_proj.weight]Loading weights:  58%|█████▊    | 168/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.v_proj.bias]  Loading weights:  58%|█████▊    | 168/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.v_proj.bias]Loading weights:  58%|█████▊    | 169/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.v_proj.weight]Loading weights:  58%|█████▊    | 169/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.13.self_attn.v_proj.weight]Loading weights:  59%|█████▊    | 170/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.input_layernorm.weight] Loading weights:  59%|█████▊    | 170/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.input_layernorm.weight]Loading weights:  59%|█████▉    | 171/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.mlp.down_proj.weight]  Loading weights:  59%|█████▉    | 171/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.mlp.down_proj.weight]Loading weights:  59%|█████▉    | 172/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.mlp.gate_proj.weight]Loading weights:  59%|█████▉    | 172/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.mlp.gate_proj.weight]Loading weights:  60%|█████▉    | 173/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.mlp.up_proj.weight]  Loading weights:  60%|█████▉    | 173/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.mlp.up_proj.weight]Loading weights:  60%|██████    | 174/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.post_attention_layernorm.weight]Loading weights:  60%|██████    | 174/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.post_attention_layernorm.weight]Loading weights:  60%|██████    | 175/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.self_attn.k_proj.bias]          Loading weights:  60%|██████    | 175/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.self_attn.k_proj.bias]Loading weights:  61%|██████    | 176/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.self_attn.k_proj.weight]Loading weights:  61%|██████    | 176/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.self_attn.k_proj.weight]Loading weights:  61%|██████    | 177/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.self_attn.o_proj.weight]Loading weights:  61%|██████    | 177/290 [00:01<00:00, 240.51it/s, Materializing param=model.layers.14.self_attn.o_proj.weight]Loading weights:  61%|██████▏   | 178/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.14.self_attn.o_proj.weight]Loading weights:  61%|██████▏   | 178/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.14.self_attn.q_proj.bias]  Loading weights:  61%|██████▏   | 178/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.14.self_attn.q_proj.bias]Loading weights:  62%|██████▏   | 179/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.14.self_attn.q_proj.weight]Loading weights:  62%|██████▏   | 179/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.14.self_attn.q_proj.weight]Loading weights:  62%|██████▏   | 180/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.14.self_attn.v_proj.bias]  Loading weights:  62%|██████▏   | 180/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.14.self_attn.v_proj.bias]Loading weights:  62%|██████▏   | 181/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.14.self_attn.v_proj.weight]Loading weights:  62%|██████▏   | 181/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.14.self_attn.v_proj.weight]Loading weights:  63%|██████▎   | 182/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.input_layernorm.weight] Loading weights:  63%|██████▎   | 182/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.input_layernorm.weight]Loading weights:  63%|██████▎   | 183/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.mlp.down_proj.weight]  Loading weights:  63%|██████▎   | 183/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.mlp.down_proj.weight]Loading weights:  63%|██████▎   | 184/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.mlp.gate_proj.weight]Loading weights:  63%|██████▎   | 184/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.mlp.gate_proj.weight]Loading weights:  64%|██████▍   | 185/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.mlp.up_proj.weight]  Loading weights:  64%|██████▍   | 185/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.mlp.up_proj.weight]Loading weights:  64%|██████▍   | 186/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.post_attention_layernorm.weight]Loading weights:  64%|██████▍   | 186/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.post_attention_layernorm.weight]Loading weights:  64%|██████▍   | 187/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.k_proj.bias]          Loading weights:  64%|██████▍   | 187/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.k_proj.bias]Loading weights:  65%|██████▍   | 188/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.k_proj.weight]Loading weights:  65%|██████▍   | 188/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.k_proj.weight]Loading weights:  65%|██████▌   | 189/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.o_proj.weight]Loading weights:  65%|██████▌   | 189/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.o_proj.weight]Loading weights:  66%|██████▌   | 190/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.q_proj.bias]  Loading weights:  66%|██████▌   | 190/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.q_proj.bias]Loading weights:  66%|██████▌   | 191/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.q_proj.weight]Loading weights:  66%|██████▌   | 191/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.q_proj.weight]Loading weights:  66%|██████▌   | 192/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.v_proj.bias]  Loading weights:  66%|██████▌   | 192/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.v_proj.bias]Loading weights:  67%|██████▋   | 193/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.v_proj.weight]Loading weights:  67%|██████▋   | 193/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.15.self_attn.v_proj.weight]Loading weights:  67%|██████▋   | 194/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.input_layernorm.weight] Loading weights:  67%|██████▋   | 194/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.input_layernorm.weight]Loading weights:  67%|██████▋   | 195/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.mlp.down_proj.weight]  Loading weights:  67%|██████▋   | 195/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.mlp.down_proj.weight]Loading weights:  68%|██████▊   | 196/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.mlp.gate_proj.weight]Loading weights:  68%|██████▊   | 196/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.mlp.gate_proj.weight]Loading weights:  68%|██████▊   | 197/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.mlp.up_proj.weight]  Loading weights:  68%|██████▊   | 197/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.mlp.up_proj.weight]Loading weights:  68%|██████▊   | 198/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.post_attention_layernorm.weight]Loading weights:  68%|██████▊   | 198/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.post_attention_layernorm.weight]Loading weights:  69%|██████▊   | 199/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.k_proj.bias]          Loading weights:  69%|██████▊   | 199/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.k_proj.bias]Loading weights:  69%|██████▉   | 200/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.k_proj.weight]Loading weights:  69%|██████▉   | 200/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.k_proj.weight]Loading weights:  69%|██████▉   | 201/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.o_proj.weight]Loading weights:  69%|██████▉   | 201/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.o_proj.weight]Loading weights:  70%|██████▉   | 202/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.q_proj.bias]  Loading weights:  70%|██████▉   | 202/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.q_proj.bias]Loading weights:  70%|███████   | 203/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.q_proj.weight]Loading weights:  70%|███████   | 203/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.q_proj.weight]Loading weights:  70%|███████   | 204/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.v_proj.bias]  Loading weights:  70%|███████   | 204/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.v_proj.bias]Loading weights:  71%|███████   | 205/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.v_proj.weight]Loading weights:  71%|███████   | 205/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.16.self_attn.v_proj.weight]Loading weights:  71%|███████   | 206/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.17.input_layernorm.weight] Loading weights:  71%|███████   | 206/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.17.input_layernorm.weight]Loading weights:  71%|███████▏  | 207/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.17.mlp.down_proj.weight]  Loading weights:  71%|███████▏  | 207/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.17.mlp.down_proj.weight]Loading weights:  72%|███████▏  | 208/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.17.mlp.gate_proj.weight]Loading weights:  72%|███████▏  | 208/290 [00:01<00:00, 275.02it/s, Materializing param=model.layers.17.mlp.gate_proj.weight]Loading weights:  72%|███████▏  | 209/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.mlp.gate_proj.weight]Loading weights:  72%|███████▏  | 209/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.mlp.up_proj.weight]  Loading weights:  72%|███████▏  | 209/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.mlp.up_proj.weight]Loading weights:  72%|███████▏  | 210/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.post_attention_layernorm.weight]Loading weights:  72%|███████▏  | 210/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.post_attention_layernorm.weight]Loading weights:  73%|███████▎  | 211/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.k_proj.bias]          Loading weights:  73%|███████▎  | 211/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.k_proj.bias]Loading weights:  73%|███████▎  | 212/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.k_proj.weight]Loading weights:  73%|███████▎  | 212/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.k_proj.weight]Loading weights:  73%|███████▎  | 213/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.o_proj.weight]Loading weights:  73%|███████▎  | 213/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.o_proj.weight]Loading weights:  74%|███████▍  | 214/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.q_proj.bias]  Loading weights:  74%|███████▍  | 214/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.q_proj.bias]Loading weights:  74%|███████▍  | 215/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.q_proj.weight]Loading weights:  74%|███████▍  | 215/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.q_proj.weight]Loading weights:  74%|███████▍  | 216/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.v_proj.bias]  Loading weights:  74%|███████▍  | 216/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.v_proj.bias]Loading weights:  75%|███████▍  | 217/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.v_proj.weight]Loading weights:  75%|███████▍  | 217/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.17.self_attn.v_proj.weight]Loading weights:  75%|███████▌  | 218/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.input_layernorm.weight] Loading weights:  75%|███████▌  | 218/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.input_layernorm.weight]Loading weights:  76%|███████▌  | 219/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.mlp.down_proj.weight]  Loading weights:  76%|███████▌  | 219/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.mlp.down_proj.weight]Loading weights:  76%|███████▌  | 220/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.mlp.gate_proj.weight]Loading weights:  76%|███████▌  | 220/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.mlp.gate_proj.weight]Loading weights:  76%|███████▌  | 221/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.mlp.up_proj.weight]  Loading weights:  76%|███████▌  | 221/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.mlp.up_proj.weight]Loading weights:  77%|███████▋  | 222/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.post_attention_layernorm.weight]Loading weights:  77%|███████▋  | 222/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.post_attention_layernorm.weight]Loading weights:  77%|███████▋  | 223/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.k_proj.bias]          Loading weights:  77%|███████▋  | 223/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.k_proj.bias]Loading weights:  77%|███████▋  | 224/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.k_proj.weight]Loading weights:  77%|███████▋  | 224/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.k_proj.weight]Loading weights:  78%|███████▊  | 225/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.o_proj.weight]Loading weights:  78%|███████▊  | 225/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.o_proj.weight]Loading weights:  78%|███████▊  | 226/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.q_proj.bias]  Loading weights:  78%|███████▊  | 226/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.q_proj.bias]Loading weights:  78%|███████▊  | 227/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.q_proj.weight]Loading weights:  78%|███████▊  | 227/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.q_proj.weight]Loading weights:  79%|███████▊  | 228/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.v_proj.bias]  Loading weights:  79%|███████▊  | 228/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.v_proj.bias]Loading weights:  79%|███████▉  | 229/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.v_proj.weight]Loading weights:  79%|███████▉  | 229/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.18.self_attn.v_proj.weight]Loading weights:  79%|███████▉  | 230/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.input_layernorm.weight] Loading weights:  79%|███████▉  | 230/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.input_layernorm.weight]Loading weights:  80%|███████▉  | 231/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.mlp.down_proj.weight]  Loading weights:  80%|███████▉  | 231/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.mlp.down_proj.weight]Loading weights:  80%|████████  | 232/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.mlp.gate_proj.weight]Loading weights:  80%|████████  | 232/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.mlp.gate_proj.weight]Loading weights:  80%|████████  | 233/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.mlp.up_proj.weight]  Loading weights:  80%|████████  | 233/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.mlp.up_proj.weight]Loading weights:  81%|████████  | 234/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.post_attention_layernorm.weight]Loading weights:  81%|████████  | 234/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.post_attention_layernorm.weight]Loading weights:  81%|████████  | 235/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.k_proj.bias]          Loading weights:  81%|████████  | 235/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.k_proj.bias]Loading weights:  81%|████████▏ | 236/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.k_proj.weight]Loading weights:  81%|████████▏ | 236/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.k_proj.weight]Loading weights:  82%|████████▏ | 237/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.o_proj.weight]Loading weights:  82%|████████▏ | 237/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.o_proj.weight]Loading weights:  82%|████████▏ | 238/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.q_proj.bias]  Loading weights:  82%|████████▏ | 238/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.q_proj.bias]Loading weights:  82%|████████▏ | 239/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.q_proj.weight]Loading weights:  82%|████████▏ | 239/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.q_proj.weight]Loading weights:  83%|████████▎ | 240/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.v_proj.bias]  Loading weights:  83%|████████▎ | 240/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.v_proj.bias]Loading weights:  83%|████████▎ | 241/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.v_proj.weight]Loading weights:  83%|████████▎ | 241/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.19.self_attn.v_proj.weight]Loading weights:  83%|████████▎ | 242/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.20.input_layernorm.weight] Loading weights:  83%|████████▎ | 242/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.20.input_layernorm.weight]Loading weights:  84%|████████▍ | 243/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.20.mlp.down_proj.weight]  Loading weights:  84%|████████▍ | 243/290 [00:01<00:00, 280.26it/s, Materializing param=model.layers.20.mlp.down_proj.weight]Loading weights:  84%|████████▍ | 244/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.mlp.down_proj.weight]Loading weights:  84%|████████▍ | 244/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.mlp.gate_proj.weight]Loading weights:  84%|████████▍ | 244/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.mlp.gate_proj.weight]Loading weights:  84%|████████▍ | 245/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.mlp.up_proj.weight]  Loading weights:  84%|████████▍ | 245/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.mlp.up_proj.weight]Loading weights:  85%|████████▍ | 246/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.post_attention_layernorm.weight]Loading weights:  85%|████████▍ | 246/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.post_attention_layernorm.weight]Loading weights:  85%|████████▌ | 247/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.k_proj.bias]          Loading weights:  85%|████████▌ | 247/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.k_proj.bias]Loading weights:  86%|████████▌ | 248/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.k_proj.weight]Loading weights:  86%|████████▌ | 248/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.k_proj.weight]Loading weights:  86%|████████▌ | 249/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.o_proj.weight]Loading weights:  86%|████████▌ | 249/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.o_proj.weight]Loading weights:  86%|████████▌ | 250/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.q_proj.bias]  Loading weights:  86%|████████▌ | 250/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.q_proj.bias]Loading weights:  87%|████████▋ | 251/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.q_proj.weight]Loading weights:  87%|████████▋ | 251/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.q_proj.weight]Loading weights:  87%|████████▋ | 252/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.v_proj.bias]  Loading weights:  87%|████████▋ | 252/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.v_proj.bias]Loading weights:  87%|████████▋ | 253/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.v_proj.weight]Loading weights:  87%|████████▋ | 253/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.20.self_attn.v_proj.weight]Loading weights:  88%|████████▊ | 254/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.input_layernorm.weight] Loading weights:  88%|████████▊ | 254/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.input_layernorm.weight]Loading weights:  88%|████████▊ | 255/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.mlp.down_proj.weight]  Loading weights:  88%|████████▊ | 255/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.mlp.down_proj.weight]Loading weights:  88%|████████▊ | 256/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.mlp.gate_proj.weight]Loading weights:  88%|████████▊ | 256/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.mlp.gate_proj.weight]Loading weights:  89%|████████▊ | 257/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.mlp.up_proj.weight]  Loading weights:  89%|████████▊ | 257/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.mlp.up_proj.weight]Loading weights:  89%|████████▉ | 258/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.post_attention_layernorm.weight]Loading weights:  89%|████████▉ | 258/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.post_attention_layernorm.weight]Loading weights:  89%|████████▉ | 259/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.k_proj.bias]          Loading weights:  89%|████████▉ | 259/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.k_proj.bias]Loading weights:  90%|████████▉ | 260/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.k_proj.weight]Loading weights:  90%|████████▉ | 260/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.k_proj.weight]Loading weights:  90%|█████████ | 261/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.o_proj.weight]Loading weights:  90%|█████████ | 261/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.o_proj.weight]Loading weights:  90%|█████████ | 262/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.q_proj.bias]  Loading weights:  90%|█████████ | 262/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.q_proj.bias]Loading weights:  91%|█████████ | 263/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.q_proj.weight]Loading weights:  91%|█████████ | 263/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.q_proj.weight]Loading weights:  91%|█████████ | 264/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.v_proj.bias]  Loading weights:  91%|█████████ | 264/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.v_proj.bias]Loading weights:  91%|█████████▏| 265/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.v_proj.weight]Loading weights:  91%|█████████▏| 265/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.21.self_attn.v_proj.weight]Loading weights:  92%|█████████▏| 266/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.input_layernorm.weight] Loading weights:  92%|█████████▏| 266/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.input_layernorm.weight]Loading weights:  92%|█████████▏| 267/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.mlp.down_proj.weight]  Loading weights:  92%|█████████▏| 267/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.mlp.down_proj.weight]Loading weights:  92%|█████████▏| 268/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.mlp.gate_proj.weight]Loading weights:  92%|█████████▏| 268/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.mlp.gate_proj.weight]Loading weights:  93%|█████████▎| 269/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.mlp.up_proj.weight]  Loading weights:  93%|█████████▎| 269/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.mlp.up_proj.weight]Loading weights:  93%|█████████▎| 270/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.post_attention_layernorm.weight]Loading weights:  93%|█████████▎| 270/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.post_attention_layernorm.weight]Loading weights:  93%|█████████▎| 271/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.k_proj.bias]          Loading weights:  93%|█████████▎| 271/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.k_proj.bias]Loading weights:  94%|█████████▍| 272/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.k_proj.weight]Loading weights:  94%|█████████▍| 272/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.k_proj.weight]Loading weights:  94%|█████████▍| 273/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.o_proj.weight]Loading weights:  94%|█████████▍| 273/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.o_proj.weight]Loading weights:  94%|█████████▍| 274/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.q_proj.bias]  Loading weights:  94%|█████████▍| 274/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.q_proj.bias]Loading weights:  95%|█████████▍| 275/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.q_proj.weight]Loading weights:  95%|█████████▍| 275/290 [00:01<00:00, 298.67it/s, Materializing param=model.layers.22.self_attn.q_proj.weight]Loading weights:  95%|█████████▌| 276/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.22.self_attn.q_proj.weight]Loading weights:  95%|█████████▌| 276/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.22.self_attn.v_proj.bias]  Loading weights:  95%|█████████▌| 276/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.22.self_attn.v_proj.bias]Loading weights:  96%|█████████▌| 277/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.22.self_attn.v_proj.weight]Loading weights:  96%|█████████▌| 277/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.22.self_attn.v_proj.weight]Loading weights:  96%|█████████▌| 278/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.input_layernorm.weight] Loading weights:  96%|█████████▌| 278/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.input_layernorm.weight]Loading weights:  96%|█████████▌| 279/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.mlp.down_proj.weight]  Loading weights:  96%|█████████▌| 279/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.mlp.down_proj.weight]Loading weights:  97%|█████████▋| 280/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.mlp.gate_proj.weight]Loading weights:  97%|█████████▋| 280/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.mlp.gate_proj.weight]Loading weights:  97%|█████████▋| 281/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.mlp.up_proj.weight]  Loading weights:  97%|█████████▋| 281/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.mlp.up_proj.weight]Loading weights:  97%|█████████▋| 282/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.post_attention_layernorm.weight]Loading weights:  97%|█████████▋| 282/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.post_attention_layernorm.weight]Loading weights:  98%|█████████▊| 283/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.k_proj.bias]          Loading weights:  98%|█████████▊| 283/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.k_proj.bias]Loading weights:  98%|█████████▊| 284/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.k_proj.weight]Loading weights:  98%|█████████▊| 284/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.k_proj.weight]Loading weights:  98%|█████████▊| 285/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.o_proj.weight]Loading weights:  98%|█████████▊| 285/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.o_proj.weight]Loading weights:  99%|█████████▊| 286/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.q_proj.bias]  Loading weights:  99%|█████████▊| 286/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.q_proj.bias]Loading weights:  99%|█████████▉| 287/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.q_proj.weight]Loading weights:  99%|█████████▉| 287/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.q_proj.weight]Loading weights:  99%|█████████▉| 288/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.v_proj.bias]  Loading weights:  99%|█████████▉| 288/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.v_proj.bias]Loading weights: 100%|█████████▉| 289/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]Loading weights: 100%|█████████▉| 289/290 [00:01<00:00, 276.47it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]Loading weights: 100%|██████████| 290/290 [00:01<00:00, 276.47it/s, Materializing param=model.norm.weight]                      Loading weights: 100%|██████████| 290/290 [00:01<00:00, 276.47it/s, Materializing param=model.norm.weight]Loading weights: 100%|██████████| 290/290 [00:01<00:00, 206.03it/s, Materializing param=model.norm.weight]
+unsloth/qwen2.5-0.5b-instruct-unsloth-bnb-4bit does not have a padding token! Will use pad_token = <|PAD_TOKEN|>.
+Unsloth 2026.3.3 patched 24 layers with 24 QKV layers, 24 O layers and 24 MLP layers.
+[33mWarning: `huggingface-cli` is deprecated and no longer works. Use `hf` instead.
+[0m
+[90mHint: `hf` is already installed! Use it directly.
+[0m
+[90mHint: Examples:
+  hf auth login
+  hf download unsloth/gemma-4-31B-it-GGUF
+  hf upload my-cool-model . .
+  hf models ls --search "gemma"
+  hf repos ls --format json
+  hf jobs run python:3.12 python -c 'print("Hello!")'
+  hf --help
+[0m
+INFO 04-13 02:36:48 [model.py:531] Resolved architecture: Qwen2ForCausalLM
+INFO 04-13 02:36:48 [model.py:1554] Using max model len 32768
+INFO 04-13 02:36:48 [scheduler.py:231] Chunked prefill is enabled with max_num_batched_tokens=2048.
+INFO 04-13 02:36:48 [vllm.py:747] Asynchronous scheduling is enabled.
+WARNING 04-13 02:36:50 [system_utils.py:152] We must use the `spawn` multiprocessing start method. Overriding VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. See https://docs.vllm.ai/en/latest/usage/troubleshooting.html#python-multiprocessing for more information. Reasons: CUDA is initialized
+Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0             Please see https://github.com/pytorch/ao/issues/2919 for more info
+/usr/local/lib/python3.12/dist-packages/art/__init__.py:37: UserWarning: WARNING: Unsloth should be imported before [transformers] to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations.
+
+Please restructure your imports with 'import unsloth' at the top of your file.
+  import unsloth  # noqa: F401
+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:15 [core.py:101] Initializing a V1 LLM engine (v0.17.0+art1) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, enable_return_routed_experts=False, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser='', reasoning_parser_plugin='', enable_in_reasoning=False), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, kv_cache_metrics=False, kv_cache_metrics_sample=0.01, cudagraph_metrics=False, enable_layerwise_nvtx_tracing=False, enable_mfu_metrics=False, enable_mm_processor_stats=False, enable_logging_iteration_details=False), seed=0, served_model_name=Qwen/Qwen2.5-0.5B-Instruct, enable_prefix_caching=True, enable_chunked_prefill=True, pooler_config=None, compilation_config={'level': None, 'mode': <CompilationMode.VLLM_COMPILE: 3>, 'debug_dump_path': None, 'cache_dir': '', 'compile_cache_save_format': 'binary', 'backend': 'inductor', 'custom_ops': ['none'], 'splitting_ops': ['vllm::unified_attention', 'vllm::unified_attention_with_output', 'vllm::unified_mla_attention', 'vllm::unified_mla_attention_with_output', 'vllm::mamba_mixer2', 'vllm::mamba_mixer', 'vllm::short_conv', 'vllm::linear_attention', 'vllm::plamo2_mamba_mixer', 'vllm::gdn_attention_core', 'vllm::kda_attention', 'vllm::sparse_attn_indexer', 'vllm::rocm_aiter_sparse_attn_indexer', 'vllm::unified_kv_cache_update', 'vllm::unified_mla_kv_cache_update'], 'compile_mm_encoder': False, 'compile_sizes': [], 'compile_ranges_split_points': [2048], 'inductor_compile_config': {'enable_auto_functionalized_v2': False, 'combo_kernels': True, 'benchmark_combo_kernel': True}, 'inductor_passes': {}, 'cudagraph_mode': <CUDAGraphMode.FULL_AND_PIECEWISE: (2, 1)>, 'cudagraph_num_of_warmups': 1, 'cudagraph_capture_sizes': [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256], 'cudagraph_copy_inputs': False, 'cudagraph_specialize_lora': True, 'use_inductor_graph_partition': False, 'pass_config': {'fuse_norm_quant': False, 'fuse_act_quant': False, 'fuse_attn_quant': False, 'enable_sp': False, 'fuse_gemm_comms': False, 'fuse_allreduce_rms': False}, 'max_cudagraph_capture_size': 256, 'dynamic_shapes_config': {'type': <DynamicShapesType.BACKED: 'backed'>, 'evaluate_guards': False, 'assume_32_bit_indexing': False}, 'local_cache_dir': None, 'fast_moe_cold_start': True, 'static_all_moe_layers': []}
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:15 [worker_base.py:283] Injected <class 'art.vllm.engine.WorkerExtension'> into <class 'vllm.v1.worker.gpu_worker.Worker'> for extended collective_rpc calls ['run', 'time']
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:15 [parallel_state.py:1393] world_size=1 rank=0 local_rank=0 distributed_init_method=tcp://172.21.0.2:42797 backend=nccl
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:15 [parallel_state.py:1715] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, PCP rank 0, TP rank 0, EP rank N/A, EPLB rank N/A
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:16 [base.py:106] Offloader set to NoopOffloader
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:16 [gpu_model_runner.py:4255] Starting to load model Qwen/Qwen2.5-0.5B-Instruct...
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:17 [cuda.py:405] Using FLASH_ATTN attention backend out of potential backends: ['FLASH_ATTN', 'FLASHINFER', 'TRITON_ATTN', 'FLEX_ATTENTION'].
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:17 [flash_attn.py:587] Using FlashAttention version 2
+(EngineCore_DP0 pid=15589) <frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead.
+(EngineCore_DP0 pid=15589) <frozen importlib._bootstrap_external>:1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead.
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:18 [weight_utils.py:601] No model.safetensors.index.json found in remote.
+(EngineCore_DP0 pid=15589) Loading safetensors checkpoint shards:   0% Completed | 0/1 [00:00<?, ?it/s]
+(EngineCore_DP0 pid=15589) Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:01<00:00,  1.78s/it]
+(EngineCore_DP0 pid=15589) Loading safetensors checkpoint shards: 100% Completed | 1/1 [00:01<00:00,  1.78s/it]
+(EngineCore_DP0 pid=15589) 
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:20 [default_loader.py:293] Loading weights took 1.78 seconds
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:20 [punica_selector.py:20] Using PunicaWrapperGPU.
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:21 [gpu_model_runner.py:4338] Model loading took 0.96 GiB memory and 3.620917 seconds
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:37 [decorators.py:465] Directly load AOT compilation from path /root/.cache/vllm/torch_compile_cache/torch_aot_compile/19f16ef5be162d523fe85c0ed27f944cf1ccd27d08e2ae363d4b7c12b35022cc/rank_0_0/model
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:37 [backends.py:916] Using cache directory: /root/.cache/vllm/torch_compile_cache/d97828e2e7/rank_0_0/backbone for vLLM's torch.compile
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:37 [backends.py:976] Dynamo bytecode transform time: 3.13 s
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:40 [backends.py:266] Directly load the compiled graph(s) for compile range (1, 2048) from the cache, took 1.439 s
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:40 [monitor.py:35] torch.compile takes 5.47 s in total
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:41 [gpu_worker.py:424] Available KV cache memory: 70.01 GiB
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:41 [kv_cache_utils.py:1314] GPU KV cache size: 6,117,600 tokens
+(EngineCore_DP0 pid=15589) INFO 04-13 02:37:41 [kv_cache_utils.py:1319] Maximum concurrency for 32,768 tokens per request: 186.69x
+(EngineCore_DP0 pid=15589) Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   0%|          | 0/70 [00:00<?, ?it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   1%|▏         | 1/70 [00:12<14:40, 12.76s/it](EngineCore_DP0 pid=15589) WARNING 04-13 02:37:56 [utils.py:268] Using default LoRA kernel configs
+Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   3%|▎         | 2/70 [00:12<06:05,  5.37s/it]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   6%|▌         | 4/70 [00:13<02:14,  2.04s/it]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):   9%|▊         | 6/70 [00:13<01:10,  1.11s/it]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  11%|█▏        | 8/70 [00:13<00:42,  1.46it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  14%|█▍        | 10/70 [00:13<00:27,  2.19it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  17%|█▋        | 12/70 [00:13<00:18,  3.10it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  20%|██        | 14/70 [00:13<00:13,  4.23it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  23%|██▎       | 16/70 [00:13<00:09,  5.55it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  26%|██▌       | 18/70 [00:13<00:07,  7.03it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  29%|██▊       | 20/70 [00:14<00:05,  8.58it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  31%|███▏      | 22/70 [00:14<00:04, 10.09it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  34%|███▍      | 24/70 [00:14<00:03, 11.52it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  37%|███▋      | 26/70 [00:14<00:03, 12.52it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  40%|████      | 28/70 [00:14<00:03, 13.39it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  43%|████▎     | 30/70 [00:14<00:02, 14.13it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  46%|████▌     | 32/70 [00:14<00:02, 14.39it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  49%|████▊     | 34/70 [00:14<00:02, 14.80it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  51%|█████▏    | 36/70 [00:15<00:02, 14.96it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  54%|█████▍    | 38/70 [00:15<00:02, 14.85it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  57%|█████▋    | 40/70 [00:15<00:01, 15.56it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  60%|██████    | 42/70 [00:15<00:01, 15.78it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  63%|██████▎   | 44/70 [00:15<00:01, 15.34it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  66%|██████▌   | 46/70 [00:15<00:01, 15.87it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  69%|██████▊   | 48/70 [00:15<00:01, 16.38it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  71%|███████▏  | 50/70 [00:15<00:01, 16.61it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  74%|███████▍  | 52/70 [00:16<00:01, 16.84it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  77%|███████▋  | 54/70 [00:16<00:00, 16.85it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  80%|████████  | 56/70 [00:16<00:00, 16.74it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  83%|████████▎ | 58/70 [00:16<00:00, 15.93it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  86%|████████▌ | 60/70 [00:16<00:00, 16.18it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  89%|████████▊ | 62/70 [00:16<00:00, 16.71it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  91%|█████████▏| 64/70 [00:16<00:00, 17.01it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  94%|█████████▍| 66/70 [00:16<00:00, 17.19it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE):  97%|█████████▋| 68/70 [00:16<00:00, 17.44it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 70/70 [00:17<00:00, 15.84it/s]Capturing CUDA graphs (mixed prefill-decode, PIECEWISE): 100%|██████████| 70/70 [00:17<00:00,  4.09it/s]
+(EngineCore_DP0 pid=15589) Capturing CUDA graphs (decode, FULL):   0%|          | 0/38 [00:00<?, ?it/s]Capturing CUDA graphs (decode, FULL):   3%|▎         | 1/38 [00:12<07:49, 12.69s/it]Capturing CUDA graphs (decode, FULL):  11%|█         | 4/38 [00:12<01:23,  2.44s/it]Capturing CUDA graphs (decode, FULL):  16%|█▌        | 6/38 [00:12<00:44,  1.40s/it]Capturing CUDA graphs (decode, FULL):  21%|██        | 8/38 [00:13<00:26,  1.13it/s]Capturing CUDA graphs (decode, FULL):  26%|██▋       | 10/38 [00:13<00:16,  1.69it/s]Capturing CUDA graphs (decode, FULL):  32%|███▏      | 12/38 [00:13<00:10,  2.43it/s]Capturing CUDA graphs (decode, FULL):  37%|███▋      | 14/38 [00:13<00:07,  3.38it/s]Capturing CUDA graphs (decode, FULL):  42%|████▏     | 16/38 [00:13<00:04,  4.55it/s]Capturing CUDA graphs (decode, FULL):  47%|████▋     | 18/38 [00:13<00:03,  5.91it/s]Capturing CUDA graphs (decode, FULL):  53%|█████▎    | 20/38 [00:13<00:02,  7.39it/s]Capturing CUDA graphs (decode, FULL):  58%|█████▊    | 22/38 [00:13<00:01,  8.96it/s]Capturing CUDA graphs (decode, FULL):  63%|██████▎   | 24/38 [00:13<00:01, 10.47it/s]Capturing CUDA graphs (decode, FULL):  68%|██████▊   | 26/38 [00:14<00:01, 11.88it/s]Capturing CUDA graphs (decode, FULL):  74%|███████▎  | 28/38 [00:14<00:00, 13.17it/s]Capturing CUDA graphs (decode, FULL):  79%|███████▉  | 30/38 [00:14<00:00, 14.22it/s]Capturing CUDA graphs (decode, FULL):  84%|████████▍ | 32/38 [00:14<00:00, 15.04it/s]Capturing CUDA graphs (decode, FULL):  89%|████████▉ | 34/38 [00:14<00:00, 15.68it/s]Capturing CUDA graphs (decode, FULL):  95%|█████████▍| 36/38 [00:14<00:00, 16.10it/s]Capturing CUDA graphs (decode, FULL): 100%|██████████| 38/38 [00:14<00:00, 16.44it/s]Capturing CUDA graphs (decode, FULL): 100%|██████████| 38/38 [00:14<00:00,  2.57it/s]
+(EngineCore_DP0 pid=15589) INFO 04-13 02:38:16 [gpu_model_runner.py:5360] Graph capturing finished in 33 secs, took 0.65 GiB
+(EngineCore_DP0 pid=15589) INFO 04-13 02:38:28 [core.py:282] init engine (profile, create kv cache, warmup model) took 67.40 seconds
+(EngineCore_DP0 pid=15589) INFO 04-13 02:38:32 [vllm.py:747] Asynchronous scheduling is enabled.
+Starting from step 8
+
+============================================================
+Step 9/50
+============================================================
+step 9:   0%|          | 0/32 [00:00<?, ?it/s]step 9:   3%|▎         | 1/32 [00:01<01:01,  1.98s/it]step 9:   3%|▎         | 1/32 [00:01<01:01,  1.98s/it, reward=3, num_turns=2, num_tools=1, failed=0, completion_tokens=20.5]step 9:   6%|▋         | 2/32 [00:01<00:59,  1.98s/it, reward=2.25, num_turns=2, num_tools=1, failed=0, completion_tokens=21]step 9:   9%|▉         | 3/32 [00:02<00:57,  1.98s/it, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=22]   step 9:  12%|█▎        | 4/32 [00:02<00:55,  1.98s/it, reward=1.62, num_turns=2, num_tools=1, failed=0, completion_tokens=22.9]step 9:  16%|█▌        | 5/32 [00:02<00:08,  3.08it/s, reward=1.62, num_turns=2, num_tools=1, failed=0, completion_tokens=22.9]step 9:  16%|█▌        | 5/32 [00:02<00:08,  3.08it/s, reward=1.4, num_turns=2, num_tools=1, failed=0, completion_tokens=23.7] step 9:  19%|█▉        | 6/32 [00:02<00:08,  3.08it/s, reward=1.25, num_turns=2, num_tools=1, failed=0, completion_tokens=24.2]step 9:  22%|██▏       | 7/32 [00:02<00:08,  3.08it/s, reward=1.64, num_turns=2, num_tools=1, failed=0, completion_tokens=24.9]step 9:  25%|██▌       | 8/32 [00:02<00:07,  3.08it/s, reward=1.69, num_turns=2, num_tools=1, failed=0, completion_tokens=24.9]step 9:  28%|██▊       | 9/32 [00:02<00:07,  3.08it/s, reward=1.94, num_turns=2, num_tools=1, failed=0, completion_tokens=25.3]step 9:  31%|███▏      | 10/32 [00:02<00:07,  3.08it/s, reward=1.8, num_turns=2, num_tools=1, failed=0, completion_tokens=25.8]step 9:  34%|███▍      | 11/32 [00:02<00:06,  3.08it/s, reward=1.68, num_turns=2, num_tools=1, failed=0, completion_tokens=26.1]step 9:  38%|███▊      | 12/32 [00:02<00:06,  3.08it/s, reward=1.88, num_turns=2, num_tools=1, failed=0, completion_tokens=26.2]step 9:  41%|████      | 13/32 [00:02<00:06,  3.08it/s, reward=2.04, num_turns=2, num_tools=1, failed=0, completion_tokens=26.3]step 9:  44%|████▍     | 14/32 [00:02<00:05,  3.08it/s, reward=1.75, num_turns=2, num_tools=1, failed=0, completion_tokens=26.5]step 9:  47%|████▋     | 15/32 [00:02<00:05,  3.08it/s, reward=1.9, num_turns=2, num_tools=1, failed=0, completion_tokens=26.6] step 9:  50%|█████     | 16/32 [00:02<00:05,  3.08it/s, reward=1.66, num_turns=2, num_tools=1, failed=0, completion_tokens=26.7]step 9:  53%|█████▎    | 17/32 [00:02<00:04,  3.08it/s, reward=1.75, num_turns=2, num_tools=1, failed=0, completion_tokens=26.8]step 9:  56%|█████▋    | 18/32 [00:02<00:04,  3.08it/s, reward=1.55, num_turns=2, num_tools=1, failed=0, completion_tokens=27]  step 9:  59%|█████▉    | 19/32 [00:02<00:04,  3.08it/s, reward=1.31, num_turns=1.95, num_tools=0.947, failed=0, completion_tokens=30.1]step 9:  62%|██████▎   | 20/32 [00:02<00:03,  3.08it/s, reward=1.14, num_turns=1.95, num_tools=0.95, failed=0, completion_tokens=30.1] step 9:  66%|██████▌   | 21/32 [00:02<00:03,  3.08it/s, reward=0.992, num_turns=1.95, num_tools=0.952, failed=0, completion_tokens=30.2]step 9:  69%|██████▉   | 22/32 [00:02<00:03,  3.08it/s, reward=0.856, num_turns=1.95, num_tools=0.955, failed=0, completion_tokens=30.4]step 9:  72%|███████▏  | 23/32 [00:02<00:02,  3.08it/s, reward=0.688, num_turns=1.91, num_tools=0.913, failed=0, completion_tokens=33]  step 9:  75%|███████▌  | 24/32 [00:02<00:02,  3.08it/s, reward=0.826, num_turns=1.92, num_tools=0.917, failed=0, completion_tokens=32.8]step 9:  78%|███████▊  | 25/32 [00:02<00:00, 19.61it/s, reward=0.826, num_turns=1.92, num_tools=0.917, failed=0, completion_tokens=32.8]step 9:  78%|███████▊  | 25/32 [00:02<00:00, 19.61it/s, reward=0.713, num_turns=1.92, num_tools=0.92, failed=0, completion_tokens=32.9] step 9:  81%|████████▏ | 26/32 [00:02<00:00, 19.61it/s, reward=0.609, num_turns=1.92, num_tools=0.923, failed=0, completion_tokens=33.3]step 9:  84%|████████▍ | 27/32 [00:02<00:00, 19.61it/s, reward=0.512, num_turns=1.93, num_tools=0.926, failed=0, completion_tokens=33.3]step 9:  88%|████████▊ | 28/32 [00:02<00:00, 19.61it/s, reward=0.637, num_turns=1.93, num_tools=0.929, failed=0, completion_tokens=33.1]step 9:  91%|█████████ | 29/32 [00:02<00:00, 19.61it/s, reward=0.546, num_turns=1.93, num_tools=0.931, failed=0, completion_tokens=33.9]step 9:  94%|█████████▍| 30/32 [00:02<00:00, 19.61it/s, reward=0.461, num_turns=1.93, num_tools=0.933, failed=0, completion_tokens=33.7]step 9:  97%|█████████▋| 31/32 [00:02<00:00, 19.61it/s, reward=0.382, num_turns=1.94, num_tools=0.935, failed=0, completion_tokens=34.9]step 9: 100%|██████████| 32/32 [00:02<00:00, 19.61it/s, reward=0.307, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=35.4]step 9: 100%|██████████| 32/32 [00:02<00:00, 12.05it/s, reward=0.307, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=35.4]
+  group 0: mean=+0.94 std=0.583 min=+0.5 max=+2.0 | What is the temperature in Tokyo in Fahrenheit?
+  group 1: mean=+3.92 std=0.220 min=+3.3 max=+4.0 | Convert 22 kg to lbs.
+  group 2: mean=-1.62 std=1.798 min=-3.0 max=+3.0 | Which is hotter right now, Tokyo or Dubai?
+  group 3: mean=-2.00 std=0.000 min=-2.0 max=-2.0 | Which is hotter right now, Tokyo or Cairo?
+  Avg reward: 0.307 | Avg tools/rollout: 0.9 | groups with variance: 3/4
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0007
+Packed 24 trajectories into 2 sequences of length 2048
+train:   0%|          | 0/2 [00:00<?, ?it/s]The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None}.
+==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
+   \\   /|    Num examples = 10,000,000 | Num Epochs = 3 | Total steps = 30,000,000
+O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 1
+\        /    Data Parallel GPUs = 1 | Total batch size (2 x 1 x 1) = 2
+ "-____-"     Trainable parameters = 4,399,104 of 498,431,872 (0.88% trained)
+train:  50%|█████     | 1/2 [00:11<00:11, 11.28s/it]train:  50%|█████     | 1/2 [00:11<00:11, 11.28s/it, loss/train=-0.343, loss/grad_norm=1.12, loss/learning_rate=5e-5, loss/entropy=0.874]train: 100%|██████████| 2/2 [00:11<00:00,  4.87s/it, loss/train=-0.343, loss/grad_norm=1.12, loss/learning_rate=5e-5, loss/entropy=0.874]train: 100%|██████████| 2/2 [00:11<00:00,  4.87s/it, loss/train=-0.0745, loss/grad_norm=1.77, loss/learning_rate=5e-5, loss/entropy=0.299](APIServer pid=14938) Adapters before cleanup: ['default']
+(APIServer pid=14938) Keeping active adapter(s): ['default']
+(APIServer pid=14938) Adapters after cleanup: ['default']
+train: 100%|██████████| 2/2 [00:39<00:00, 19.69s/it, loss/train=-0.0745, loss/grad_norm=1.77, loss/learning_rate=5e-5, loss/entropy=0.299]
+
+============================================================
+Step 10/50
+============================================================
+step 10:   0%|          | 0/32 [00:00<?, ?it/s]step 10:   3%|▎         | 1/32 [00:01<00:43,  1.41s/it]step 10:   3%|▎         | 1/32 [00:01<00:43,  1.41s/it, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=20.5]step 10:   6%|▋         | 2/32 [00:01<00:42,  1.41s/it, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=20.5]step 10:   9%|▉         | 3/32 [00:01<00:40,  1.41s/it, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=20.5]step 10:  12%|█▎        | 4/32 [00:01<00:39,  1.41s/it, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=20.5]step 10:  16%|█▌        | 5/32 [00:01<00:38,  1.41s/it, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=20.5]step 10:  19%|█▉        | 6/32 [00:01<00:36,  1.41s/it, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=20.5]step 10:  22%|██▏       | 7/32 [00:01<00:35,  1.41s/it, reward=2, num_turns=2, num_tools=1, failed=0, completion_tokens=20.5]step 10:  25%|██▌       | 8/32 [00:01<00:33,  1.41s/it, reward=1.5, num_turns=2, num_tools=1, failed=0, completion_tokens=21.2]step 10:  28%|██▊       | 9/32 [00:01<00:32,  1.41s/it, reward=1.11, num_turns=2, num_tools=1, failed=0, completion_tokens=22.2]step 10:  31%|███▏      | 10/32 [00:01<00:02,  8.57it/s, reward=1.11, num_turns=2, num_tools=1, failed=0, completion_tokens=22.2]step 10:  31%|███▏      | 10/32 [00:01<00:02,  8.57it/s, reward=0.8, num_turns=2, num_tools=1, failed=0, completion_tokens=22.2] step 10:  34%|███▍      | 11/32 [00:01<00:02,  8.57it/s, reward=0.545, num_turns=2, num_tools=1, failed=0, completion_tokens=22.6]step 10:  38%|███▊      | 12/32 [00:01<00:02,  8.57it/s, reward=0.333, num_turns=2, num_tools=1, failed=0, completion_tokens=24]  step 10:  41%|████      | 13/32 [00:01<00:02,  8.57it/s, reward=0.154, num_turns=2, num_tools=1, failed=0, completion_tokens=24.2]step 10:  44%|████▍     | 14/32 [00:01<00:02,  8.57it/s, reward=0, num_turns=2, num_tools=1, failed=0, completion_tokens=24.5]    step 10:  47%|████▋     | 15/32 [00:01<00:01,  8.57it/s, reward=-0.133, num_turns=2, num_tools=1, failed=0, completion_tokens=24.7]step 10:  50%|█████     | 16/32 [00:01<00:01, 14.11it/s, reward=-0.133, num_turns=2, num_tools=1, failed=0, completion_tokens=24.7]step 10:  50%|█████     | 16/32 [00:01<00:01, 14.11it/s, reward=-0.25, num_turns=2, num_tools=1, failed=0, completion_tokens=25.4] step 10:  53%|█████▎    | 17/32 [00:01<00:01, 14.11it/s, reward=-0.353, num_turns=2, num_tools=1, failed=0, completion_tokens=27] step 10:  56%|█████▋    | 18/32 [00:01<00:00, 14.11it/s, reward=-0.444, num_turns=2, num_tools=1, failed=0, completion_tokens=27.8]step 10:  59%|█████▉    | 19/32 [00:01<00:00, 14.11it/s, reward=-0.526, num_turns=2, num_tools=1, failed=0, completion_tokens=29.1]step 10:  62%|██████▎   | 20/32 [00:01<00:00, 14.11it/s, reward=-0.6, num_turns=2, num_tools=1, failed=0, completion_tokens=29.9]  step 10:  66%|██████▌   | 21/32 [00:01<00:00, 14.11it/s, reward=-0.667, num_turns=2, num_tools=1, failed=0, completion_tokens=30.5]step 10:  69%|██████▉   | 22/32 [00:01<00:00, 19.98it/s, reward=-0.667, num_turns=2, num_tools=1, failed=0, completion_tokens=30.5]step 10:  69%|██████▉   | 22/32 [00:01<00:00, 19.98it/s, reward=-0.727, num_turns=2, num_tools=1, failed=0, completion_tokens=31.7]step 10:  72%|███████▏  | 23/32 [00:01<00:00, 19.98it/s, reward=-0.783, num_turns=2, num_tools=1, failed=0, completion_tokens=33.1]step 10:  75%|███████▌  | 24/32 [00:01<00:00, 19.98it/s, reward=-0.833, num_turns=2, num_tools=1, failed=0, completion_tokens=34.3]step 10:  78%|███████▊  | 25/32 [00:01<00:00, 19.98it/s, reward=-0.88, num_turns=2, num_tools=1, failed=0, completion_tokens=35.7] step 10:  81%|████████▏ | 26/32 [00:01<00:00, 19.98it/s, reward=-0.923, num_turns=2, num_tools=1, failed=0, completion_tokens=36.9]step 10:  84%|████████▍ | 27/32 [00:01<00:00, 23.30it/s, reward=-0.923, num_turns=2, num_tools=1, failed=0, completion_tokens=36.9]step 10:  84%|████████▍ | 27/32 [00:01<00:00, 23.30it/s, reward=-0.963, num_turns=2, num_tools=1, failed=0, completion_tokens=38.3]step 10:  88%|████████▊ | 28/32 [00:01<00:00, 23.30it/s, reward=-1, num_turns=2, num_tools=1, failed=0, completion_tokens=39.1]    step 10:  91%|█████████ | 29/32 [00:01<00:00, 23.30it/s, reward=-1.03, num_turns=2, num_tools=1, failed=0, completion_tokens=40.6]step 10:  94%|█████████▍| 30/32 [00:01<00:00, 23.30it/s, reward=-1.07, num_turns=2, num_tools=1, failed=0, completion_tokens=40]  step 10:  97%|█████████▋| 31/32 [00:02<00:00, 23.30it/s, reward=-1.1, num_turns=2.03, num_tools=1.03, failed=0, completion_tokens=40.3]step 10: 100%|██████████| 32/32 [00:03<00:00,  7.29it/s, reward=-1.1, num_turns=2.03, num_tools=1.03, failed=0, completion_tokens=40.3]step 10: 100%|██████████| 32/32 [00:03<00:00,  7.29it/s, reward=-1.12, num_turns=2.03, num_tools=1.03, failed=0, completion_tokens=47.7]step 10: 100%|██████████| 32/32 [00:03<00:00,  8.82it/s, reward=-1.12, num_turns=2.03, num_tools=1.03, failed=0, completion_tokens=47.7]
+  group 0: mean=-2.00 std=0.000 min=-2.0 max=-2.0 | What is the speed of light?
+  group 1: mean=-2.00 std=0.000 min=-2.0 max=-2.0 | What is the GDP of India?
+  group 2: mean=-2.00 std=0.000 min=-2.0 max=-2.0 | What is the GDP of Germany?
+  group 3: mean=+1.50 std=1.323 min=-2.0 max=+2.0 | What is 404 minus 5?
+  Avg reward: -1.125 | Avg tools/rollout: 1.0 | groups with variance: 1/4
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0008
+Packed 8 trajectories into 1 sequences of length 2048
+train:   0%|          | 0/1 [00:00<?, ?it/s]train: 100%|██████████| 1/1 [00:01<00:00,  1.87s/it]train: 100%|██████████| 1/1 [00:01<00:00,  1.87s/it, loss/train=-0.524, loss/grad_norm=2.19, loss/learning_rate=5e-5, loss/entropy=0.304](APIServer pid=14938) Adapters before cleanup: ['default']
+(APIServer pid=14938) Keeping active adapter(s): ['default']
+(APIServer pid=14938) Adapters after cleanup: ['default']
+train: 100%|██████████| 1/1 [00:29<00:00, 29.74s/it, loss/train=-0.524, loss/grad_norm=2.19, loss/learning_rate=5e-5, loss/entropy=0.304]
+  Running validation...
+validation:   0%|          | 0/400 [00:00<?, ?it/s]validation:   0%|          | 1/400 [00:13<1:30:20, 13.59s/it]validation:   0%|          | 1/400 [00:13<1:30:20, 13.59s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=37]validation:   0%|          | 2/400 [00:13<37:45,  5.69s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=37]  validation:   0%|          | 2/400 [00:13<37:45,  5.69s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=52]validation:   1%|          | 3/400 [00:13<20:50,  3.15s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=52]validation:   1%|          | 3/400 [00:13<20:50,  3.15s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=36.3]validation:   1%|          | 4/400 [00:13<20:46,  3.15s/it, reward=-1.25, num_turns=1.25, num_tools=0.25, failed=0, completion_tokens=34.2]validation:   1%|▏         | 5/400 [00:13<20:43,  3.15s/it, reward=-0.2, num_turns=1.4, num_tools=0.4, failed=0, completion_tokens=33]     validation:   2%|▏         | 6/400 [00:13<20:40,  3.15s/it, reward=0.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=32.3]validation:   2%|▏         | 7/400 [00:13<20:37,  3.15s/it, reward=1, num_turns=1.57, num_tools=0.571, failed=0, completion_tokens=31.9]validation:   2%|▏         | 8/400 [00:13<20:34,  3.15s/it, reward=1.12, num_turns=1.62, num_tools=0.625, failed=0, completion_tokens=31.4]validation:   2%|▏         | 9/400 [00:13<20:31,  3.15s/it, reward=1.22, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=31.6]validation:   2%|▎         | 10/400 [00:13<20:28,  3.15s/it, reward=1.25, num_turns=1.7, num_tools=0.7, failed=0, completion_tokens=32.4]  validation:   3%|▎         | 11/400 [00:13<20:24,  3.15s/it, reward=0.955, num_turns=1.73, num_tools=0.727, failed=0, completion_tokens=32.4]validation:   3%|▎         | 12/400 [00:13<20:21,  3.15s/it, reward=0.708, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=32]   validation:   3%|▎         | 13/400 [00:13<20:18,  3.15s/it, reward=0.5, num_turns=1.77, num_tools=0.769, failed=0, completion_tokens=32.2]validation:   4%|▎         | 14/400 [00:13<20:15,  3.15s/it, reward=0.25, num_turns=1.71, num_tools=0.714, failed=0, completion_tokens=33.6]validation:   4%|▍         | 15/400 [00:13<20:12,  3.15s/it, reward=0.0333, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=34.8]validation:   4%|▍         | 16/400 [00:14<02:27,  2.61it/s, reward=0.0333, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=34.8]validation:   4%|▍         | 16/400 [00:14<02:27,  2.61it/s, reward=-0.156, num_turns=1.62, num_tools=0.625, failed=0, completion_tokens=34.8]validation:   4%|▍         | 17/400 [00:14<02:26,  2.61it/s, reward=-0.324, num_turns=1.59, num_tools=0.588, failed=0, completion_tokens=39.1]validation:   4%|▍         | 18/400 [00:15<02:33,  2.49it/s, reward=-0.324, num_turns=1.59, num_tools=0.588, failed=0, completion_tokens=39.1]validation:   4%|▍         | 18/400 [00:15<02:33,  2.49it/s, reward=-0.417, num_turns=1.61, num_tools=0.611, failed=0, completion_tokens=40.5]validation:   5%|▍         | 19/400 [00:15<02:32,  2.49it/s, reward=-0.553, num_turns=1.58, num_tools=0.579, failed=0, completion_tokens=41.1]validation:   5%|▌         | 20/400 [00:15<02:32,  2.49it/s, reward=-0.675, num_turns=1.55, num_tools=0.55, failed=0, completion_tokens=46.6] validation:   5%|▌         | 21/400 [00:15<02:32,  2.49it/s, reward=-0.548, num_turns=1.57, num_tools=0.571, failed=0, completion_tokens=45.6]validation:   6%|▌         | 22/400 [00:15<02:31,  2.49it/s, reward=-0.432, num_turns=1.59, num_tools=0.591, failed=0, completion_tokens=44.8]validation:   6%|▌         | 23/400 [00:15<02:31,  2.49it/s, reward=-0.326, num_turns=1.61, num_tools=0.609, failed=0, completion_tokens=44.1]validation:   6%|▌         | 24/400 [00:15<02:30,  2.49it/s, reward=-0.292, num_turns=1.62, num_tools=0.625, failed=0, completion_tokens=43.4]validation:   6%|▋         | 25/400 [00:15<02:30,  2.49it/s, reward=-0.26, num_turns=1.64, num_tools=0.64, failed=0, completion_tokens=42.8]  validation:   6%|▋         | 26/400 [00:15<02:30,  2.49it/s, reward=-0.327, num_turns=1.65, num_tools=0.654, failed=0, completion_tokens=41.9]validation:   7%|▋         | 27/400 [00:15<02:29,  2.49it/s, reward=-0.389, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=41.3]validation:   7%|▋         | 28/400 [00:15<02:29,  2.49it/s, reward=-0.446, num_turns=1.68, num_tools=0.679, failed=0, completion_tokens=40.8]validation:   7%|▋         | 29/400 [00:15<02:28,  2.49it/s, reward=-0.293, num_turns=1.69, num_tools=0.69, failed=0, completion_tokens=40.2] validation:   8%|▊         | 30/400 [00:15<01:04,  5.75it/s, reward=-0.293, num_turns=1.69, num_tools=0.69, failed=0, completion_tokens=40.2]validation:   8%|▊         | 30/400 [00:15<01:04,  5.75it/s, reward=-0.35, num_turns=1.7, num_tools=0.7, failed=0, completion_tokens=40.3]   validation:   8%|▊         | 31/400 [00:15<01:04,  5.75it/s, reward=-0.403, num_turns=1.71, num_tools=0.742, failed=0, completion_tokens=39.9]validation:   8%|▊         | 32/400 [00:15<01:04,  5.75it/s, reward=-0.453, num_turns=1.72, num_tools=0.75, failed=0, completion_tokens=39.5] validation:   8%|▊         | 33/400 [00:15<01:03,  5.75it/s, reward=-0.5, num_turns=1.73, num_tools=0.758, failed=0, completion_tokens=39]   validation:   8%|▊         | 34/400 [00:15<00:53,  6.84it/s, reward=-0.5, num_turns=1.73, num_tools=0.758, failed=0, completion_tokens=39]validation:   8%|▊         | 34/400 [00:15<00:53,  6.84it/s, reward=-0.544, num_turns=1.74, num_tools=0.765, failed=0, completion_tokens=38.6]validation:   9%|▉         | 35/400 [00:15<00:53,  6.84it/s, reward=-0.586, num_turns=1.74, num_tools=0.771, failed=0, completion_tokens=38.4]validation:   9%|▉         | 36/400 [00:15<00:53,  6.84it/s, reward=-0.625, num_turns=1.75, num_tools=0.778, failed=0, completion_tokens=38.2]validation:   9%|▉         | 37/400 [00:15<00:53,  6.84it/s, reward=-0.689, num_turns=1.73, num_tools=0.757, failed=0, completion_tokens=44.9]validation:  10%|▉         | 38/400 [00:15<00:52,  6.84it/s, reward=-0.583, num_turns=1.74, num_tools=0.763, failed=0, completion_tokens=44.5]validation:  10%|▉         | 39/400 [00:15<00:52,  6.84it/s, reward=-0.62, num_turns=1.74, num_tools=0.769, failed=0, completion_tokens=44.2] validation:  10%|█         | 40/400 [00:15<00:52,  6.84it/s, reward=-0.679, num_turns=1.73, num_tools=0.75, failed=0, completion_tokens=48.1]validation:  10%|█         | 41/400 [00:16<00:36,  9.91it/s, reward=-0.679, num_turns=1.73, num_tools=0.75, failed=0, completion_tokens=48.1]validation:  10%|█         | 41/400 [00:16<00:36,  9.91it/s, reward=-0.614, num_turns=1.73, num_tools=0.756, failed=0, completion_tokens=47.6]validation:  10%|█         | 42/400 [00:16<00:36,  9.91it/s, reward=-0.647, num_turns=1.74, num_tools=0.762, failed=0, completion_tokens=47.1]validation:  11%|█         | 43/400 [00:16<00:36,  9.91it/s, reward=-0.539, num_turns=1.74, num_tools=0.767, failed=0, completion_tokens=46.7]validation:  11%|█         | 44/400 [00:16<00:35,  9.91it/s, reward=-0.436, num_turns=1.75, num_tools=0.773, failed=0, completion_tokens=46.2]validation:  11%|█▏        | 45/400 [00:16<00:35,  9.91it/s, reward=-0.337, num_turns=1.76, num_tools=0.778, failed=0, completion_tokens=45.9]validation:  12%|█▏        | 46/400 [00:16<00:35,  9.91it/s, reward=-0.243, num_turns=1.76, num_tools=0.783, failed=0, completion_tokens=45.5]validation:  12%|█▏        | 47/400 [00:16<00:35,  9.91it/s, reward=-0.152, num_turns=1.77, num_tools=0.787, failed=0, completion_tokens=45.1]validation:  12%|█▏        | 48/400 [00:16<00:35,  9.91it/s, reward=-0.128, num_turns=1.77, num_tools=0.792, failed=0, completion_tokens=44.9]validation:  12%|█▏        | 49/400 [00:16<00:35,  9.91it/s, reward=-0.105, num_turns=1.78, num_tools=0.796, failed=0, completion_tokens=44.6]validation:  12%|█▎        | 50/400 [00:16<00:35,  9.91it/s, reward=-0.143, num_turns=1.78, num_tools=0.8, failed=0, completion_tokens=44.4]  validation:  13%|█▎        | 51/400 [00:16<00:35,  9.91it/s, reward=-0.121, num_turns=1.78, num_tools=0.804, failed=0, completion_tokens=44.6]validation:  13%|█▎        | 52/400 [00:16<00:35,  9.91it/s, reward=-0.176, num_turns=1.77, num_tools=0.788, failed=0, completion_tokens=50.6]validation:  13%|█▎        | 53/400 [00:16<00:35,  9.91it/s, reward=-0.16, num_turns=1.77, num_tools=0.792, failed=0, completion_tokens=50.5] validation:  14%|█▎        | 54/400 [00:16<00:34,  9.91it/s, reward=-0.194, num_turns=1.78, num_tools=0.796, failed=0, completion_tokens=50.1][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  14%|█▍        | 55/400 [00:16<00:18, 18.42it/s, reward=-0.194, num_turns=1.78, num_tools=0.796, failed=0, completion_tokens=50.1]validation:  14%|█▍        | 55/400 [00:16<00:18, 18.42it/s, reward=-0.158, num_turns=1.78, num_tools=0.8, failed=0, completion_tokens=49.8]  validation:  14%|█▍        | 56/400 [00:16<00:18, 18.42it/s, reward=-0.119, num_turns=1.79, num_tools=0.804, failed=0, completion_tokens=49.5][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  14%|█▍        | 57/400 [00:16<00:18, 18.42it/s, reward=-0.17, num_turns=1.77, num_tools=0.789, failed=0.0175, completion_tokens=49.5]validation:  14%|█▍        | 58/400 [00:16<00:18, 18.42it/s, reward=-0.218, num_turns=1.76, num_tools=0.776, failed=0.0345, completion_tokens=49.5]validation:  15%|█▍        | 59/400 [00:16<00:18, 18.42it/s, reward=-0.266, num_turns=1.75, num_tools=0.763, failed=0.0508, completion_tokens=49.5]validation:  15%|█▌        | 60/400 [00:16<00:18, 18.42it/s, reward=-0.311, num_turns=1.73, num_tools=0.75, failed=0.0667, completion_tokens=49.5] validation:  15%|█▌        | 61/400 [00:16<00:18, 18.42it/s, reward=-0.355, num_turns=1.72, num_tools=0.738, failed=0.082, completion_tokens=49.5]validation:  16%|█▌        | 62/400 [00:16<00:18, 18.42it/s, reward=-0.398, num_turns=1.71, num_tools=0.726, failed=0.0968, completion_tokens=49.5]validation:  16%|█▌        | 63/400 [00:16<00:18, 18.42it/s, reward=-0.439, num_turns=1.7, num_tools=0.714, failed=0.111, completion_tokens=49.5]  validation:  16%|█▌        | 64/400 [00:16<00:18, 18.42it/s, reward=-0.479, num_turns=1.69, num_tools=0.703, failed=0.125, completion_tokens=49.5]validation:  16%|█▋        | 65/400 [00:16<00:18, 18.42it/s, reward=-0.518, num_turns=1.68, num_tools=0.692, failed=0.138, completion_tokens=49.5]validation:  16%|█▋        | 66/400 [00:16<00:18, 18.42it/s, reward=-0.556, num_turns=1.67, num_tools=0.682, failed=0.152, completion_tokens=49.5]validation:  17%|█▋        | 67/400 [00:16<00:18, 18.42it/s, reward=-0.592, num_turns=1.66, num_tools=0.672, failed=0.164, completion_tokens=49.5]validation:  17%|█▋        | 68/400 [00:16<00:18, 18.42it/s, reward=-0.627, num_turns=1.65, num_tools=0.662, failed=0.176, completion_tokens=49.5]validation:  17%|█▋        | 69/400 [00:16<00:17, 18.42it/s, reward=-0.662, num_turns=1.64, num_tools=0.652, failed=0.188, completion_tokens=49.5]validation:  18%|█▊        | 70/400 [00:16<00:17, 18.42it/s, reward=-0.695, num_turns=1.63, num_tools=0.643, failed=0.2, completion_tokens=49.5]  validation:  18%|█▊        | 71/400 [00:16<00:17, 18.42it/s, reward=-0.728, num_turns=1.62, num_tools=0.634, failed=0.211, completion_tokens=49.5]validation:  18%|█▊        | 72/400 [00:16<00:17, 18.42it/s, reward=-0.759, num_turns=1.61, num_tools=0.625, failed=0.222, completion_tokens=49.5]validation:  18%|█▊        | 73/400 [00:16<00:17, 18.42it/s, reward=-0.79, num_turns=1.6, num_tools=0.616, failed=0.233, completion_tokens=49.5]  validation:  18%|█▊        | 74/400 [00:16<00:17, 18.42it/s, reward=-0.82, num_turns=1.59, num_tools=0.608, failed=0.243, completion_tokens=49.5]validation:  19%|█▉        | 75/400 [00:16<00:17, 18.42it/s, reward=-0.849, num_turns=1.59, num_tools=0.6, failed=0.253, completion_tokens=49.5] validation:  19%|█▉        | 76/400 [00:16<00:17, 18.42it/s, reward=-0.877, num_turns=1.58, num_tools=0.592, failed=0.263, completion_tokens=49.5]validation:  19%|█▉        | 77/400 [00:16<00:17, 18.42it/s, reward=-0.905, num_turns=1.57, num_tools=0.584, failed=0.273, completion_tokens=49.5]validation:  20%|█▉        | 78/400 [00:16<00:17, 18.42it/s, reward=-0.932, num_turns=1.56, num_tools=0.577, failed=0.282, completion_tokens=49.5]validation:  20%|█▉        | 79/400 [00:16<00:17, 18.42it/s, reward=-0.958, num_turns=1.56, num_tools=0.57, failed=0.291, completion_tokens=49.5] validation:  20%|██        | 80/400 [00:16<00:17, 18.42it/s, reward=-0.983, num_turns=1.55, num_tools=0.562, failed=0.3, completion_tokens=49.5] validation:  20%|██        | 81/400 [00:16<00:17, 18.42it/s, reward=-1.01, num_turns=1.54, num_tools=0.556, failed=0.309, completion_tokens=49.5]validation:  20%|██        | 82/400 [00:16<00:17, 18.42it/s, reward=-1.03, num_turns=1.54, num_tools=0.549, failed=0.317, completion_tokens=49.5]validation:  21%|██        | 83/400 [00:16<00:17, 18.42it/s, reward=-1.06, num_turns=1.53, num_tools=0.542, failed=0.325, completion_tokens=49.5]validation:  21%|██        | 84/400 [00:16<00:17, 18.42it/s, reward=-1.08, num_turns=1.52, num_tools=0.536, failed=0.333, completion_tokens=49.5]validation:  21%|██▏       | 85/400 [00:16<00:17, 18.42it/s, reward=-1.1, num_turns=1.52, num_tools=0.529, failed=0.341, completion_tokens=49.5] validation:  22%|██▏       | 86/400 [00:16<00:17, 18.42it/s, reward=-1.12, num_turns=1.51, num_tools=0.523, failed=0.349, completion_tokens=49.5]validation:  22%|██▏       | 87/400 [00:16<00:16, 18.42it/s, reward=-1.15, num_turns=1.51, num_tools=0.517, failed=0.356, completion_tokens=49.5]validation:  22%|██▏       | 88/400 [00:16<00:16, 18.42it/s, reward=-1.17, num_turns=1.5, num_tools=0.511, failed=0.364, completion_tokens=49.5] validation:  22%|██▏       | 89/400 [00:16<00:16, 18.42it/s, reward=-1.19, num_turns=1.49, num_tools=0.506, failed=0.371, completion_tokens=49.5]validation:  22%|██▎       | 90/400 [00:16<00:16, 18.42it/s, reward=-1.2, num_turns=1.5, num_tools=0.511, failed=0.367, completion_tokens=49.2]  [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  23%|██▎       | 91/400 [00:16<00:16, 18.42it/s, reward=-1.21, num_turns=1.51, num_tools=0.516, failed=0.363, completion_tokens=49.6][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  23%|██▎       | 92/400 [00:16<00:16, 18.42it/s, reward=-1.22, num_turns=1.5, num_tools=0.511, failed=0.37, completion_tokens=49.6]  validation:  23%|██▎       | 93/400 [00:16<00:16, 18.42it/s, reward=-1.24, num_turns=1.49, num_tools=0.505, failed=0.376, completion_tokens=49.6]validation:  24%|██▎       | 94/400 [00:16<00:16, 18.42it/s, reward=-1.26, num_turns=1.49, num_tools=0.5, failed=0.383, completion_tokens=49.6]  validation:  24%|██▍       | 95/400 [00:16<00:06, 50.79it/s, reward=-1.26, num_turns=1.49, num_tools=0.5, failed=0.383, completion_tokens=49.6]validation:  24%|██▍       | 95/400 [00:16<00:06, 50.79it/s, reward=-1.28, num_turns=1.48, num_tools=0.495, failed=0.389, completion_tokens=49.6]validation:  24%|██▍       | 96/400 [00:16<00:05, 50.79it/s, reward=-1.3, num_turns=1.48, num_tools=0.49, failed=0.396, completion_tokens=49.6]  [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  24%|██▍       | 97/400 [00:16<00:05, 50.79it/s, reward=-1.32, num_turns=1.47, num_tools=0.485, failed=0.402, completion_tokens=49.6]validation:  24%|██▍       | 98/400 [00:16<00:05, 50.79it/s, reward=-1.33, num_turns=1.47, num_tools=0.48, failed=0.408, completion_tokens=49.6] validation:  25%|██▍       | 99/400 [00:16<00:05, 50.79it/s, reward=-1.35, num_turns=1.46, num_tools=0.475, failed=0.414, completion_tokens=49.6][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  25%|██▌       | 100/400 [00:16<00:05, 50.79it/s, reward=-1.37, num_turns=1.46, num_tools=0.47, failed=0.42, completion_tokens=49.6] validation:  25%|██▌       | 101/400 [00:16<00:05, 50.79it/s, reward=-1.38, num_turns=1.46, num_tools=0.465, failed=0.426, completion_tokens=49.6]validation:  26%|██▌       | 102/400 [00:16<00:05, 50.79it/s, reward=-1.4, num_turns=1.45, num_tools=0.461, failed=0.431, completion_tokens=49.6] validation:  26%|██▌       | 103/400 [00:16<00:05, 50.79it/s, reward=-1.41, num_turns=1.45, num_tools=0.456, failed=0.437, completion_tokens=49.6]validation:  26%|██▌       | 104/400 [00:16<00:05, 50.79it/s, reward=-1.43, num_turns=1.44, num_tools=0.452, failed=0.442, completion_tokens=49.6]validation:  26%|██▋       | 105/400 [00:16<00:05, 50.79it/s, reward=-1.44, num_turns=1.44, num_tools=0.448, failed=0.448, completion_tokens=49.6]validation:  26%|██▋       | 106/400 [00:16<00:05, 50.79it/s, reward=-1.46, num_turns=1.43, num_tools=0.443, failed=0.453, completion_tokens=49.6]validation:  27%|██▋       | 107/400 [00:16<00:05, 50.79it/s, reward=-1.47, num_turns=1.43, num_tools=0.439, failed=0.458, completion_tokens=49.6]validation:  27%|██▋       | 108/400 [00:16<00:05, 50.79it/s, reward=-1.49, num_turns=1.43, num_tools=0.435, failed=0.463, completion_tokens=49.6]validation:  27%|██▋       | 109/400 [00:16<00:05, 50.79it/s, reward=-1.5, num_turns=1.42, num_tools=0.431, failed=0.468, completion_tokens=49.6] validation:  28%|██▊       | 110/400 [00:16<00:05, 50.79it/s, reward=-1.52, num_turns=1.42, num_tools=0.427, failed=0.473, completion_tokens=49.6]validation:  28%|██▊       | 111/400 [00:16<00:06, 42.54it/s, reward=-1.52, num_turns=1.42, num_tools=0.427, failed=0.473, completion_tokens=49.6]validation:  28%|██▊       | 111/400 [00:16<00:06, 42.54it/s, reward=-1.53, num_turns=1.41, num_tools=0.423, failed=0.477, completion_tokens=49.6]validation:  28%|██▊       | 112/400 [00:16<00:06, 42.54it/s, reward=-1.54, num_turns=1.41, num_tools=0.42, failed=0.482, completion_tokens=49.6] validation:  28%|██▊       | 113/400 [00:16<00:06, 42.54it/s, reward=-1.55, num_turns=1.41, num_tools=0.416, failed=0.487, completion_tokens=49.6]validation:  28%|██▊       | 114/400 [00:16<00:06, 42.54it/s, reward=-1.57, num_turns=1.4, num_tools=0.412, failed=0.491, completion_tokens=49.6] validation:  29%|██▉       | 115/400 [00:16<00:06, 42.54it/s, reward=-1.58, num_turns=1.4, num_tools=0.409, failed=0.496, completion_tokens=49.6]validation:  29%|██▉       | 116/400 [00:16<00:06, 42.54it/s, reward=-1.59, num_turns=1.4, num_tools=0.405, failed=0.5, completion_tokens=49.6]  validation:  29%|██▉       | 117/400 [00:16<00:06, 42.54it/s, reward=-1.6, num_turns=1.39, num_tools=0.402, failed=0.504, completion_tokens=49.6]validation:  30%|██▉       | 118/400 [00:16<00:06, 42.54it/s, reward=-1.62, num_turns=1.39, num_tools=0.398, failed=0.508, completion_tokens=49.6]validation:  30%|██▉       | 119/400 [00:16<00:06, 42.54it/s, reward=-1.63, num_turns=1.39, num_tools=0.395, failed=0.513, completion_tokens=49.6]validation:  30%|███       | 120/400 [00:16<00:06, 42.54it/s, reward=-1.64, num_turns=1.38, num_tools=0.392, failed=0.517, completion_tokens=49.6]validation:  30%|███       | 121/400 [00:16<00:06, 42.54it/s, reward=-1.65, num_turns=1.38, num_tools=0.388, failed=0.521, completion_tokens=49.6]validation:  30%|███       | 122/400 [00:16<00:06, 42.54it/s, reward=-1.66, num_turns=1.38, num_tools=0.385, failed=0.525, completion_tokens=49.6]validation:  31%|███       | 123/400 [00:16<00:06, 42.54it/s, reward=-1.67, num_turns=1.37, num_tools=0.382, failed=0.528, completion_tokens=49.6]validation:  31%|███       | 124/400 [00:16<00:06, 42.54it/s, reward=-1.68, num_turns=1.37, num_tools=0.379, failed=0.532, completion_tokens=49.6][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  31%|███▏      | 125/400 [00:17<00:05, 46.65it/s, reward=-1.68, num_turns=1.37, num_tools=0.379, failed=0.532, completion_tokens=49.6]validation:  31%|███▏      | 125/400 [00:17<00:05, 46.65it/s, reward=-1.69, num_turns=1.37, num_tools=0.376, failed=0.536, completion_tokens=49.6]validation:  32%|███▏      | 126/400 [00:17<00:05, 46.65it/s, reward=-1.7, num_turns=1.37, num_tools=0.373, failed=0.54, completion_tokens=49.6]  validation:  32%|███▏      | 127/400 [00:17<00:05, 46.65it/s, reward=-1.71, num_turns=1.36, num_tools=0.37, failed=0.543, completion_tokens=49.6]validation:  32%|███▏      | 128/400 [00:17<00:05, 46.65it/s, reward=-1.72, num_turns=1.36, num_tools=0.367, failed=0.547, completion_tokens=49.6]validation:  32%|███▏      | 129/400 [00:17<00:05, 46.65it/s, reward=-1.73, num_turns=1.36, num_tools=0.364, failed=0.55, completion_tokens=49.6] validation:  32%|███▎      | 130/400 [00:17<00:05, 46.65it/s, reward=-1.74, num_turns=1.35, num_tools=0.362, failed=0.554, completion_tokens=49.6]validation:  33%|███▎      | 131/400 [00:17<00:05, 46.65it/s, reward=-1.75, num_turns=1.35, num_tools=0.359, failed=0.557, completion_tokens=49.6]validation:  33%|███▎      | 132/400 [00:17<00:05, 46.65it/s, reward=-1.76, num_turns=1.35, num_tools=0.356, failed=0.561, completion_tokens=49.6]validation:  33%|███▎      | 133/400 [00:17<00:05, 46.65it/s, reward=-1.77, num_turns=1.35, num_tools=0.353, failed=0.564, completion_tokens=49.6]validation:  34%|███▎      | 134/400 [00:17<00:05, 46.65it/s, reward=-1.74, num_turns=1.35, num_tools=0.358, failed=0.56, completion_tokens=49.3] validation:  34%|███▍      | 135/400 [00:17<00:05, 46.65it/s, reward=-1.75, num_turns=1.35, num_tools=0.356, failed=0.556, completion_tokens=49.3]validation:  34%|███▍      | 136/400 [00:17<00:05, 46.65it/s, reward=-1.76, num_turns=1.35, num_tools=0.353, failed=0.551, completion_tokens=49.4]validation:  34%|███▍      | 137/400 [00:17<00:05, 46.65it/s, reward=-1.74, num_turns=1.35, num_tools=0.358, failed=0.547, completion_tokens=49.2]validation:  34%|███▍      | 138/400 [00:17<00:05, 46.65it/s, reward=-1.74, num_turns=1.36, num_tools=0.362, failed=0.543, completion_tokens=49]  validation:  35%|███▍      | 139/400 [00:17<00:05, 46.65it/s, reward=-1.75, num_turns=1.35, num_tools=0.36, failed=0.547, completion_tokens=49] validation:  35%|███▌      | 140/400 [00:17<00:05, 46.65it/s, reward=-1.76, num_turns=1.35, num_tools=0.357, failed=0.55, completion_tokens=49]validation:  35%|███▌      | 141/400 [00:17<00:05, 46.65it/s, reward=-1.76, num_turns=1.35, num_tools=0.355, failed=0.553, completion_tokens=49]validation:  36%|███▌      | 142/400 [00:17<00:05, 46.65it/s, reward=-1.77, num_turns=1.35, num_tools=0.352, failed=0.556, completion_tokens=49]validation:  36%|███▌      | 143/400 [00:17<00:05, 46.65it/s, reward=-1.78, num_turns=1.34, num_tools=0.35, failed=0.559, completion_tokens=49] validation:  36%|███▌      | 144/400 [00:17<00:05, 46.65it/s, reward=-1.79, num_turns=1.34, num_tools=0.347, failed=0.562, completion_tokens=49]validation:  36%|███▋      | 145/400 [00:17<00:05, 46.65it/s, reward=-1.8, num_turns=1.34, num_tools=0.345, failed=0.566, completion_tokens=49] validation:  36%|███▋      | 146/400 [00:17<00:05, 46.65it/s, reward=-1.81, num_turns=1.34, num_tools=0.342, failed=0.568, completion_tokens=49]validation:  37%|███▋      | 147/400 [00:17<00:05, 46.65it/s, reward=-1.82, num_turns=1.33, num_tools=0.34, failed=0.571, completion_tokens=49] validation:  37%|███▋      | 148/400 [00:17<00:05, 46.65it/s, reward=-1.82, num_turns=1.33, num_tools=0.338, failed=0.574, completion_tokens=49]validation:  37%|███▋      | 149/400 [00:17<00:05, 46.65it/s, reward=-1.83, num_turns=1.33, num_tools=0.336, failed=0.577, completion_tokens=49]validation:  38%|███▊      | 150/400 [00:17<00:05, 46.65it/s, reward=-1.84, num_turns=1.33, num_tools=0.333, failed=0.58, completion_tokens=49] validation:  38%|███▊      | 151/400 [00:17<00:05, 46.65it/s, reward=-1.85, num_turns=1.32, num_tools=0.331, failed=0.583, completion_tokens=49]validation:  38%|███▊      | 152/400 [00:17<00:05, 46.65it/s, reward=-1.85, num_turns=1.32, num_tools=0.329, failed=0.586, completion_tokens=49]validation:  38%|███▊      | 153/400 [00:17<00:05, 46.65it/s, reward=-1.86, num_turns=1.32, num_tools=0.327, failed=0.588, completion_tokens=49]validation:  38%|███▊      | 154/400 [00:17<00:05, 46.65it/s, reward=-1.87, num_turns=1.32, num_tools=0.325, failed=0.591, completion_tokens=49]validation:  39%|███▉      | 155/400 [00:17<00:05, 46.65it/s, reward=-1.88, num_turns=1.32, num_tools=0.323, failed=0.594, completion_tokens=49]validation:  39%|███▉      | 156/400 [00:17<00:05, 46.65it/s, reward=-1.88, num_turns=1.31, num_tools=0.321, failed=0.596, completion_tokens=49]validation:  39%|███▉      | 157/400 [00:17<00:05, 46.65it/s, reward=-1.89, num_turns=1.31, num_tools=0.318, failed=0.599, completion_tokens=49]validation:  40%|███▉      | 158/400 [00:17<00:05, 46.65it/s, reward=-1.9, num_turns=1.31, num_tools=0.316, failed=0.601, completion_tokens=49] validation:  40%|███▉      | 159/400 [00:17<00:05, 46.65it/s, reward=-1.9, num_turns=1.31, num_tools=0.314, failed=0.604, completion_tokens=49]validation:  40%|████      | 160/400 [00:17<00:05, 46.65it/s, reward=-1.91, num_turns=1.31, num_tools=0.312, failed=0.606, completion_tokens=49]validation:  40%|████      | 161/400 [00:17<00:05, 46.65it/s, reward=-1.92, num_turns=1.3, num_tools=0.311, failed=0.609, completion_tokens=49] validation:  40%|████      | 162/400 [00:17<00:05, 46.65it/s, reward=-1.92, num_turns=1.3, num_tools=0.309, failed=0.611, completion_tokens=49]validation:  41%|████      | 163/400 [00:17<00:05, 46.65it/s, reward=-1.93, num_turns=1.3, num_tools=0.307, failed=0.613, completion_tokens=49]validation:  41%|████      | 164/400 [00:17<00:05, 46.65it/s, reward=-1.94, num_turns=1.3, num_tools=0.305, failed=0.616, completion_tokens=49]validation:  41%|████▏     | 165/400 [00:17<00:05, 46.65it/s, reward=-1.94, num_turns=1.3, num_tools=0.303, failed=0.618, completion_tokens=49]validation:  42%|████▏     | 166/400 [00:17<00:05, 46.65it/s, reward=-1.95, num_turns=1.3, num_tools=0.301, failed=0.62, completion_tokens=49] validation:  42%|████▏     | 167/400 [00:17<00:04, 46.65it/s, reward=-1.96, num_turns=1.29, num_tools=0.299, failed=0.623, completion_tokens=49]validation:  42%|████▏     | 168/400 [00:17<00:04, 46.65it/s, reward=-1.96, num_turns=1.29, num_tools=0.298, failed=0.625, completion_tokens=49]validation:  42%|████▏     | 169/400 [00:17<00:04, 46.65it/s, reward=-1.97, num_turns=1.29, num_tools=0.296, failed=0.621, completion_tokens=50.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  42%|████▎     | 170/400 [00:17<00:04, 46.65it/s, reward=-1.98, num_turns=1.29, num_tools=0.294, failed=0.624, completion_tokens=50.8]validation:  43%|████▎     | 171/400 [00:17<00:04, 46.65it/s, reward=-1.98, num_turns=1.29, num_tools=0.292, failed=0.626, completion_tokens=50.8]validation:  43%|████▎     | 172/400 [00:17<00:04, 46.65it/s, reward=-1.99, num_turns=1.29, num_tools=0.297, failed=0.628, completion_tokens=50.6]validation:  43%|████▎     | 173/400 [00:17<00:04, 46.65it/s, reward=-1.99, num_turns=1.29, num_tools=0.301, failed=0.63, completion_tokens=50.4] validation:  44%|████▎     | 174/400 [00:17<00:04, 46.65it/s, reward=-2, num_turns=1.3, num_tools=0.305, failed=0.632, completion_tokens=50.1]   validation:  44%|████▍     | 175/400 [00:17<00:04, 46.65it/s, reward=-2, num_turns=1.3, num_tools=0.309, failed=0.634, completion_tokens=49.9]validation:  44%|████▍     | 176/400 [00:17<00:04, 46.65it/s, reward=-2.01, num_turns=1.31, num_tools=0.312, failed=0.636, completion_tokens=49.7]validation:  44%|████▍     | 177/400 [00:17<00:04, 46.65it/s, reward=-2.02, num_turns=1.31, num_tools=0.316, failed=0.638, completion_tokens=49.3]validation:  44%|████▍     | 178/400 [00:17<00:04, 46.65it/s, reward=-2.02, num_turns=1.31, num_tools=0.315, failed=0.64, completion_tokens=49.3] validation:  45%|████▍     | 179/400 [00:17<00:04, 46.65it/s, reward=-2.03, num_turns=1.31, num_tools=0.318, failed=0.642, completion_tokens=48.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  45%|████▌     | 180/400 [00:17<00:02, 93.58it/s, reward=-2.03, num_turns=1.31, num_tools=0.318, failed=0.642, completion_tokens=48.9]validation:  45%|████▌     | 180/400 [00:17<00:02, 93.58it/s, reward=-2.03, num_turns=1.31, num_tools=0.317, failed=0.644, completion_tokens=48.9]validation:  45%|████▌     | 181/400 [00:17<00:02, 93.58it/s, reward=-2.03, num_turns=1.31, num_tools=0.32, failed=0.641, completion_tokens=48.6] validation:  46%|████▌     | 182/400 [00:17<00:02, 93.58it/s, reward=-2.03, num_turns=1.32, num_tools=0.324, failed=0.637, completion_tokens=48.3]validation:  46%|████▌     | 183/400 [00:17<00:02, 93.58it/s, reward=-2.03, num_turns=1.32, num_tools=0.328, failed=0.634, completion_tokens=48]  validation:  46%|████▌     | 184/400 [00:17<00:02, 93.58it/s, reward=-2.03, num_turns=1.33, num_tools=0.332, failed=0.63, completion_tokens=47.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  46%|████▋     | 185/400 [00:17<00:02, 93.58it/s, reward=-2.03, num_turns=1.33, num_tools=0.335, failed=0.627, completion_tokens=47.4]validation:  46%|████▋     | 186/400 [00:17<00:02, 93.58it/s, reward=-2.03, num_turns=1.33, num_tools=0.339, failed=0.624, completion_tokens=47.1]validation:  47%|████▋     | 187/400 [00:17<00:02, 93.58it/s, reward=-2.03, num_turns=1.34, num_tools=0.342, failed=0.62, completion_tokens=46.7] validation:  47%|████▋     | 188/400 [00:17<00:02, 93.58it/s, reward=-2, num_turns=1.34, num_tools=0.346, failed=0.617, completion_tokens=46.4]  validation:  47%|████▋     | 189/400 [00:17<00:02, 93.58it/s, reward=-1.97, num_turns=1.34, num_tools=0.349, failed=0.614, completion_tokens=46.1]validation:  48%|████▊     | 190/400 [00:17<00:02, 93.58it/s, reward=-1.94, num_turns=1.35, num_tools=0.353, failed=0.611, completion_tokens=45.8]validation:  48%|████▊     | 191/400 [00:17<00:02, 93.58it/s, reward=-1.9, num_turns=1.35, num_tools=0.356, failed=0.607, completion_tokens=45.5] validation:  48%|████▊     | 192/400 [00:17<00:02, 93.58it/s, reward=-1.91, num_turns=1.35, num_tools=0.359, failed=0.604, completion_tokens=45.3]validation:  48%|████▊     | 193/400 [00:17<00:02, 93.58it/s, reward=-1.91, num_turns=1.36, num_tools=0.363, failed=0.601, completion_tokens=45.1]validation:  48%|████▊     | 194/400 [00:17<00:02, 93.58it/s, reward=-1.91, num_turns=1.36, num_tools=0.366, failed=0.598, completion_tokens=44.9]validation:  49%|████▉     | 195/400 [00:17<00:02, 93.58it/s, reward=-1.91, num_turns=1.36, num_tools=0.369, failed=0.595, completion_tokens=44.7]validation:  49%|████▉     | 196/400 [00:17<00:02, 93.58it/s, reward=-1.91, num_turns=1.37, num_tools=0.372, failed=0.592, completion_tokens=44.5]validation:  49%|████▉     | 197/400 [00:17<00:02, 77.26it/s, reward=-1.91, num_turns=1.37, num_tools=0.372, failed=0.592, completion_tokens=44.5]validation:  49%|████▉     | 197/400 [00:17<00:02, 77.26it/s, reward=-1.91, num_turns=1.37, num_tools=0.376, failed=0.589, completion_tokens=44.4]validation:  50%|████▉     | 198/400 [00:17<00:02, 77.26it/s, reward=-1.91, num_turns=1.37, num_tools=0.379, failed=0.586, completion_tokens=44.2]validation:  50%|████▉     | 199/400 [00:17<00:02, 77.26it/s, reward=-1.91, num_turns=1.38, num_tools=0.382, failed=0.583, completion_tokens=44]  validation:  50%|█████     | 200/400 [00:17<00:02, 77.26it/s, reward=-1.88, num_turns=1.38, num_tools=0.385, failed=0.58, completion_tokens=43.8]validation:  50%|█████     | 201/400 [00:17<00:02, 77.26it/s, reward=-1.85, num_turns=1.38, num_tools=0.388, failed=0.577, completion_tokens=43.6]validation:  50%|█████     | 202/400 [00:17<00:02, 77.26it/s, reward=-1.82, num_turns=1.39, num_tools=0.391, failed=0.574, completion_tokens=43.4]validation:  51%|█████     | 203/400 [00:17<00:02, 77.26it/s, reward=-1.79, num_turns=1.39, num_tools=0.394, failed=0.571, completion_tokens=43.3]validation:  51%|█████     | 204/400 [00:17<00:02, 77.26it/s, reward=-1.8, num_turns=1.39, num_tools=0.397, failed=0.574, completion_tokens=43.2] validation:  51%|█████▏    | 205/400 [00:17<00:02, 77.26it/s, reward=-1.8, num_turns=1.4, num_tools=0.4, failed=0.571, completion_tokens=43.2]   validation:  52%|█████▏    | 206/400 [00:17<00:02, 77.26it/s, reward=-1.81, num_turns=1.4, num_tools=0.403, failed=0.573, completion_tokens=43.1][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  52%|█████▏    | 207/400 [00:17<00:02, 77.26it/s, reward=-1.81, num_turns=1.4, num_tools=0.406, failed=0.57, completion_tokens=42.9] validation:  52%|█████▏    | 208/400 [00:17<00:02, 77.26it/s, reward=-1.81, num_turns=1.4, num_tools=0.404, failed=0.572, completion_tokens=42.9]validation:  52%|█████▏    | 209/400 [00:17<00:02, 77.26it/s, reward=-1.82, num_turns=1.4, num_tools=0.402, failed=0.569, completion_tokens=42.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  52%|█████▎    | 210/400 [00:17<00:02, 82.28it/s, reward=-1.82, num_turns=1.4, num_tools=0.402, failed=0.569, completion_tokens=42.9]validation:  52%|█████▎    | 210/400 [00:17<00:02, 82.28it/s, reward=-1.81, num_turns=1.4, num_tools=0.405, failed=0.567, completion_tokens=42.8]validation:  53%|█████▎    | 211/400 [00:17<00:02, 82.28it/s, reward=-1.81, num_turns=1.4, num_tools=0.403, failed=0.569, completion_tokens=42.8]validation:  53%|█████▎    | 212/400 [00:17<00:02, 82.28it/s, reward=-1.82, num_turns=1.4, num_tools=0.401, failed=0.571, completion_tokens=42.8]validation:  53%|█████▎    | 213/400 [00:17<00:02, 82.28it/s, reward=-1.79, num_turns=1.4, num_tools=0.404, failed=0.568, completion_tokens=42.6]validation:  54%|█████▎    | 214/400 [00:17<00:02, 82.28it/s, reward=-1.76, num_turns=1.4, num_tools=0.407, failed=0.565, completion_tokens=42.5]validation:  54%|█████▍    | 215/400 [00:17<00:02, 82.28it/s, reward=-1.74, num_turns=1.4, num_tools=0.409, failed=0.563, completion_tokens=42.3]validation:  54%|█████▍    | 216/400 [00:17<00:02, 82.28it/s, reward=-1.74, num_turns=1.41, num_tools=0.412, failed=0.56, completion_tokens=42.3]validation:  54%|█████▍    | 217/400 [00:17<00:02, 82.28it/s, reward=-1.74, num_turns=1.41, num_tools=0.415, failed=0.558, completion_tokens=42.1]validation:  55%|█████▍    | 218/400 [00:17<00:02, 82.28it/s, reward=-1.71, num_turns=1.41, num_tools=0.417, failed=0.555, completion_tokens=42]  validation:  55%|█████▍    | 219/400 [00:17<00:02, 82.28it/s, reward=-1.69, num_turns=1.42, num_tools=0.42, failed=0.553, completion_tokens=41.9]validation:  55%|█████▌    | 220/400 [00:17<00:02, 82.28it/s, reward=-1.67, num_turns=1.42, num_tools=0.423, failed=0.55, completion_tokens=41.9]validation:  55%|█████▌    | 221/400 [00:17<00:02, 82.28it/s, reward=-1.67, num_turns=1.42, num_tools=0.425, failed=0.548, completion_tokens=41.8]validation:  56%|█████▌    | 222/400 [00:17<00:02, 82.28it/s, reward=-1.68, num_turns=1.42, num_tools=0.428, failed=0.545, completion_tokens=41.7]validation:  56%|█████▌    | 223/400 [00:17<00:02, 80.59it/s, reward=-1.68, num_turns=1.42, num_tools=0.428, failed=0.545, completion_tokens=41.7]validation:  56%|█████▌    | 223/400 [00:17<00:02, 80.59it/s, reward=-1.68, num_turns=1.43, num_tools=0.43, failed=0.543, completion_tokens=41.6] validation:  56%|█████▌    | 224/400 [00:17<00:02, 80.59it/s, reward=-1.68, num_turns=1.43, num_tools=0.433, failed=0.54, completion_tokens=41.4]validation:  56%|█████▋    | 225/400 [00:17<00:02, 80.59it/s, reward=-1.66, num_turns=1.43, num_tools=0.436, failed=0.538, completion_tokens=41.3]validation:  56%|█████▋    | 226/400 [00:17<00:02, 80.59it/s, reward=-1.65, num_turns=1.43, num_tools=0.438, failed=0.535, completion_tokens=41.1]validation:  57%|█████▋    | 227/400 [00:17<00:02, 80.59it/s, reward=-1.65, num_turns=1.44, num_tools=0.441, failed=0.533, completion_tokens=40.9]validation:  57%|█████▋    | 228/400 [00:17<00:02, 80.59it/s, reward=-1.65, num_turns=1.44, num_tools=0.443, failed=0.531, completion_tokens=40.8]validation:  57%|█████▋    | 229/400 [00:17<00:02, 80.59it/s, reward=-1.65, num_turns=1.44, num_tools=0.445, failed=0.528, completion_tokens=40.7]validation:  57%|█████▊    | 230/400 [00:17<00:02, 80.59it/s, reward=-1.65, num_turns=1.44, num_tools=0.448, failed=0.526, completion_tokens=40.6]validation:  58%|█████▊    | 231/400 [00:18<00:02, 80.59it/s, reward=-1.65, num_turns=1.45, num_tools=0.45, failed=0.524, completion_tokens=40.5] validation:  58%|█████▊    | 232/400 [00:18<00:02, 80.59it/s, reward=-1.66, num_turns=1.45, num_tools=0.453, failed=0.522, completion_tokens=40.3]validation:  58%|█████▊    | 233/400 [00:18<00:02, 80.59it/s, reward=-1.63, num_turns=1.45, num_tools=0.455, failed=0.519, completion_tokens=40.2]validation:  58%|█████▊    | 234/400 [00:18<00:02, 78.61it/s, reward=-1.63, num_turns=1.45, num_tools=0.455, failed=0.519, completion_tokens=40.2]validation:  58%|█████▊    | 234/400 [00:18<00:02, 78.61it/s, reward=-1.62, num_turns=1.45, num_tools=0.457, failed=0.517, completion_tokens=40]  validation:  59%|█████▉    | 235/400 [00:18<00:02, 78.61it/s, reward=-1.6, num_turns=1.46, num_tools=0.46, failed=0.515, completion_tokens=39.9]validation:  59%|█████▉    | 236/400 [00:18<00:02, 78.61it/s, reward=-1.59, num_turns=1.46, num_tools=0.462, failed=0.513, completion_tokens=39.7]validation:  59%|█████▉    | 237/400 [00:18<00:02, 78.61it/s, reward=-1.57, num_turns=1.46, num_tools=0.464, failed=0.511, completion_tokens=39.6]validation:  60%|█████▉    | 238/400 [00:18<00:02, 78.61it/s, reward=-1.56, num_turns=1.46, num_tools=0.466, failed=0.508, completion_tokens=39.4]validation:  60%|█████▉    | 239/400 [00:18<00:02, 78.61it/s, reward=-1.56, num_turns=1.46, num_tools=0.469, failed=0.506, completion_tokens=39.4]validation:  60%|██████    | 240/400 [00:18<00:02, 78.61it/s, reward=-1.53, num_turns=1.47, num_tools=0.471, failed=0.504, completion_tokens=39.3]validation:  60%|██████    | 241/400 [00:18<00:02, 78.61it/s, reward=-1.52, num_turns=1.47, num_tools=0.473, failed=0.502, completion_tokens=39.2]validation:  60%|██████    | 242/400 [00:18<00:02, 78.61it/s, reward=-1.5, num_turns=1.47, num_tools=0.475, failed=0.5, completion_tokens=39.1]   validation:  61%|██████    | 243/400 [00:18<00:01, 78.61it/s, reward=-1.5, num_turns=1.47, num_tools=0.477, failed=0.498, completion_tokens=39.1]validation:  61%|██████    | 244/400 [00:18<00:01, 78.61it/s, reward=-1.48, num_turns=1.48, num_tools=0.48, failed=0.496, completion_tokens=38.9]validation:  61%|██████▏   | 245/400 [00:18<00:01, 78.61it/s, reward=-1.48, num_turns=1.48, num_tools=0.482, failed=0.494, completion_tokens=38.8]validation:  62%|██████▏   | 246/400 [00:18<00:01, 78.61it/s, reward=-1.46, num_turns=1.48, num_tools=0.484, failed=0.492, completion_tokens=38.7]validation:  62%|██████▏   | 247/400 [00:18<00:01, 78.61it/s, reward=-1.45, num_turns=1.48, num_tools=0.486, failed=0.49, completion_tokens=38.7] validation:  62%|██████▏   | 248/400 [00:18<00:01, 78.61it/s, reward=-1.45, num_turns=1.48, num_tools=0.488, failed=0.488, completion_tokens=38.5]validation:  62%|██████▏   | 249/400 [00:18<00:01, 88.18it/s, reward=-1.45, num_turns=1.48, num_tools=0.488, failed=0.488, completion_tokens=38.5]validation:  62%|██████▏   | 249/400 [00:18<00:01, 88.18it/s, reward=-1.46, num_turns=1.49, num_tools=0.49, failed=0.486, completion_tokens=38.3] validation:  62%|██████▎   | 250/400 [00:18<00:01, 88.18it/s, reward=-1.44, num_turns=1.49, num_tools=0.492, failed=0.484, completion_tokens=38.4]validation:  63%|██████▎   | 251/400 [00:18<00:01, 88.18it/s, reward=-1.44, num_turns=1.49, num_tools=0.494, failed=0.482, completion_tokens=38.4]validation:  63%|██████▎   | 252/400 [00:18<00:01, 88.18it/s, reward=-1.45, num_turns=1.49, num_tools=0.496, failed=0.48, completion_tokens=38.3] validation:  63%|██████▎   | 253/400 [00:18<00:01, 88.18it/s, reward=-1.45, num_turns=1.49, num_tools=0.498, failed=0.478, completion_tokens=38.3]validation:  64%|██████▎   | 254/400 [00:18<00:01, 88.18it/s, reward=-1.45, num_turns=1.5, num_tools=0.5, failed=0.476, completion_tokens=38.2]   validation:  64%|██████▍   | 255/400 [00:18<00:01, 88.18it/s, reward=-1.44, num_turns=1.5, num_tools=0.502, failed=0.475, completion_tokens=38.1]validation:  64%|██████▍   | 256/400 [00:18<00:01, 88.18it/s, reward=-1.44, num_turns=1.5, num_tools=0.504, failed=0.473, completion_tokens=38]  validation:  64%|██████▍   | 257/400 [00:18<00:01, 88.18it/s, reward=-1.43, num_turns=1.5, num_tools=0.506, failed=0.471, completion_tokens=37.9]validation:  64%|██████▍   | 258/400 [00:18<00:01, 88.18it/s, reward=-1.41, num_turns=1.5, num_tools=0.508, failed=0.469, completion_tokens=37.8]validation:  65%|██████▍   | 259/400 [00:18<00:01, 88.18it/s, reward=-1.4, num_turns=1.51, num_tools=0.51, failed=0.467, completion_tokens=37.8] validation:  65%|██████▌   | 260/400 [00:18<00:01, 88.18it/s, reward=-1.4, num_turns=1.51, num_tools=0.512, failed=0.465, completion_tokens=37.7]validation:  65%|██████▌   | 261/400 [00:18<00:01, 88.18it/s, reward=-1.39, num_turns=1.51, num_tools=0.513, failed=0.464, completion_tokens=37.6]validation:  66%|██████▌   | 262/400 [00:18<00:01, 88.18it/s, reward=-1.39, num_turns=1.51, num_tools=0.515, failed=0.462, completion_tokens=37.6]validation:  66%|██████▌   | 263/400 [00:18<00:01, 88.18it/s, reward=-1.38, num_turns=1.51, num_tools=0.517, failed=0.46, completion_tokens=37.5] validation:  66%|██████▌   | 264/400 [00:18<00:01, 88.18it/s, reward=-1.38, num_turns=1.52, num_tools=0.519, failed=0.458, completion_tokens=37.4]validation:  66%|██████▋   | 265/400 [00:18<00:01, 88.18it/s, reward=-1.36, num_turns=1.52, num_tools=0.521, failed=0.457, completion_tokens=37.4]validation:  66%|██████▋   | 266/400 [00:18<00:01, 88.18it/s, reward=-1.36, num_turns=1.52, num_tools=0.523, failed=0.455, completion_tokens=37.3]validation:  67%|██████▋   | 267/400 [00:18<00:01, 88.18it/s, reward=-1.34, num_turns=1.52, num_tools=0.524, failed=0.453, completion_tokens=37.2]validation:  67%|██████▋   | 268/400 [00:18<00:01, 88.18it/s, reward=-1.32, num_turns=1.52, num_tools=0.526, failed=0.451, completion_tokens=37.1]validation:  67%|██████▋   | 269/400 [00:18<00:01, 88.18it/s, reward=-1.32, num_turns=1.52, num_tools=0.528, failed=0.45, completion_tokens=37.1] validation:  68%|██████▊   | 270/400 [00:18<00:01, 88.18it/s, reward=-1.31, num_turns=1.53, num_tools=0.53, failed=0.448, completion_tokens=37]  validation:  68%|██████▊   | 271/400 [00:18<00:01, 88.18it/s, reward=-1.3, num_turns=1.53, num_tools=0.531, failed=0.446, completion_tokens=37][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  68%|██████▊   | 272/400 [00:18<00:01, 88.18it/s, reward=-1.31, num_turns=1.53, num_tools=0.533, failed=0.449, completion_tokens=37]validation:  68%|██████▊   | 273/400 [00:18<00:01, 114.79it/s, reward=-1.31, num_turns=1.53, num_tools=0.533, failed=0.449, completion_tokens=37]validation:  68%|██████▊   | 273/400 [00:18<00:01, 114.79it/s, reward=-1.29, num_turns=1.53, num_tools=0.535, failed=0.447, completion_tokens=37]validation:  68%|██████▊   | 274/400 [00:18<00:01, 114.79it/s, reward=-1.3, num_turns=1.53, num_tools=0.536, failed=0.445, completion_tokens=37] validation:  69%|██████▉   | 275/400 [00:18<00:01, 114.79it/s, reward=-1.3, num_turns=1.53, num_tools=0.538, failed=0.444, completion_tokens=36.9]validation:  69%|██████▉   | 276/400 [00:18<00:01, 114.79it/s, reward=-1.3, num_turns=1.54, num_tools=0.54, failed=0.442, completion_tokens=36.8] validation:  69%|██████▉   | 277/400 [00:18<00:01, 114.79it/s, reward=-1.31, num_turns=1.54, num_tools=0.542, failed=0.44, completion_tokens=37] validation:  70%|██████▉   | 278/400 [00:18<00:01, 114.79it/s, reward=-1.31, num_turns=1.54, num_tools=0.543, failed=0.439, completion_tokens=36.9]validation:  70%|██████▉   | 279/400 [00:18<00:01, 114.79it/s, reward=-1.31, num_turns=1.54, num_tools=0.545, failed=0.437, completion_tokens=36.8]validation:  70%|███████   | 280/400 [00:18<00:01, 114.79it/s, reward=-1.3, num_turns=1.54, num_tools=0.546, failed=0.436, completion_tokens=36.8] validation:  70%|███████   | 281/400 [00:18<00:01, 114.79it/s, reward=-1.3, num_turns=1.54, num_tools=0.548, failed=0.434, completion_tokens=36.7]validation:  70%|███████   | 282/400 [00:18<00:01, 114.79it/s, reward=-1.31, num_turns=1.55, num_tools=0.55, failed=0.433, completion_tokens=36.7]validation:  71%|███████   | 283/400 [00:18<00:01, 114.79it/s, reward=-1.31, num_turns=1.55, num_tools=0.551, failed=0.431, completion_tokens=36.7]validation:  71%|███████   | 284/400 [00:18<00:01, 114.79it/s, reward=-1.3, num_turns=1.55, num_tools=0.553, failed=0.43, completion_tokens=36.7]  validation:  71%|███████▏  | 285/400 [00:18<00:01, 114.79it/s, reward=-1.29, num_turns=1.55, num_tools=0.554, failed=0.428, completion_tokens=36.6]validation:  72%|███████▏  | 286/400 [00:18<00:00, 114.79it/s, reward=-1.3, num_turns=1.55, num_tools=0.556, failed=0.427, completion_tokens=36.5] validation:  72%|███████▏  | 287/400 [00:18<00:00, 114.79it/s, reward=-1.28, num_turns=1.55, num_tools=0.557, failed=0.425, completion_tokens=36.4]validation:  72%|███████▏  | 288/400 [00:18<00:00, 114.79it/s, reward=-1.29, num_turns=1.56, num_tools=0.559, failed=0.424, completion_tokens=36.5]validation:  72%|███████▏  | 289/400 [00:18<00:00, 114.79it/s, reward=-1.29, num_turns=1.56, num_tools=0.561, failed=0.422, completion_tokens=36.5]validation:  72%|███████▎  | 290/400 [00:18<00:00, 114.79it/s, reward=-1.29, num_turns=1.56, num_tools=0.562, failed=0.421, completion_tokens=36.5]validation:  73%|███████▎  | 291/400 [00:18<00:00, 114.79it/s, reward=-1.29, num_turns=1.56, num_tools=0.564, failed=0.419, completion_tokens=36.4]validation:  73%|███████▎  | 292/400 [00:18<00:00, 114.79it/s, reward=-1.3, num_turns=1.56, num_tools=0.565, failed=0.418, completion_tokens=36.5] validation:  73%|███████▎  | 293/400 [00:18<00:00, 114.79it/s, reward=-1.29, num_turns=1.56, num_tools=0.567, failed=0.416, completion_tokens=36.8]validation:  74%|███████▎  | 294/400 [00:18<00:00, 114.79it/s, reward=-1.28, num_turns=1.56, num_tools=0.568, failed=0.415, completion_tokens=36.7]validation:  74%|███████▍  | 295/400 [00:18<00:00, 136.76it/s, reward=-1.28, num_turns=1.56, num_tools=0.568, failed=0.415, completion_tokens=36.7]validation:  74%|███████▍  | 295/400 [00:18<00:00, 136.76it/s, reward=-1.27, num_turns=1.57, num_tools=0.569, failed=0.414, completion_tokens=36.7]validation:  74%|███████▍  | 296/400 [00:18<00:00, 136.76it/s, reward=-1.26, num_turns=1.57, num_tools=0.571, failed=0.412, completion_tokens=36.6]validation:  74%|███████▍  | 297/400 [00:18<00:00, 136.76it/s, reward=-1.26, num_turns=1.57, num_tools=0.572, failed=0.411, completion_tokens=36.5]validation:  74%|███████▍  | 298/400 [00:18<00:00, 136.76it/s, reward=-1.24, num_turns=1.57, num_tools=0.574, failed=0.409, completion_tokens=36.5]validation:  75%|███████▍  | 299/400 [00:18<00:00, 136.76it/s, reward=-1.23, num_turns=1.57, num_tools=0.575, failed=0.408, completion_tokens=36.4]validation:  75%|███████▌  | 300/400 [00:18<00:00, 136.76it/s, reward=-1.23, num_turns=1.57, num_tools=0.577, failed=0.407, completion_tokens=36.4]validation:  75%|███████▌  | 301/400 [00:18<00:00, 136.76it/s, reward=-1.23, num_turns=1.57, num_tools=0.578, failed=0.405, completion_tokens=36.3]validation:  76%|███████▌  | 302/400 [00:18<00:00, 136.76it/s, reward=-1.22, num_turns=1.58, num_tools=0.579, failed=0.404, completion_tokens=36.2]validation:  76%|███████▌  | 303/400 [00:18<00:00, 136.76it/s, reward=-1.21, num_turns=1.58, num_tools=0.581, failed=0.403, completion_tokens=36.2]validation:  76%|███████▌  | 304/400 [00:18<00:00, 136.76it/s, reward=-1.21, num_turns=1.58, num_tools=0.582, failed=0.401, completion_tokens=36.1]validation:  76%|███████▋  | 305/400 [00:18<00:00, 136.76it/s, reward=-1.2, num_turns=1.58, num_tools=0.584, failed=0.4, completion_tokens=36]     validation:  76%|███████▋  | 306/400 [00:18<00:00, 136.76it/s, reward=-1.19, num_turns=1.58, num_tools=0.585, failed=0.399, completion_tokens=36]validation:  77%|███████▋  | 307/400 [00:18<00:00, 136.76it/s, reward=-1.18, num_turns=1.58, num_tools=0.586, failed=0.397, completion_tokens=35.9]validation:  77%|███████▋  | 308/400 [00:18<00:00, 136.76it/s, reward=-1.18, num_turns=1.58, num_tools=0.588, failed=0.396, completion_tokens=35.8]validation:  77%|███████▋  | 309/400 [00:18<00:00, 136.76it/s, reward=-1.18, num_turns=1.59, num_tools=0.589, failed=0.395, completion_tokens=35.9]validation:  78%|███████▊  | 310/400 [00:18<00:00, 136.76it/s, reward=-1.18, num_turns=1.59, num_tools=0.59, failed=0.394, completion_tokens=35.8] validation:  78%|███████▊  | 311/400 [00:18<00:00, 136.76it/s, reward=-1.19, num_turns=1.59, num_tools=0.592, failed=0.392, completion_tokens=35.8]validation:  78%|███████▊  | 312/400 [00:18<00:00, 136.76it/s, reward=-1.17, num_turns=1.59, num_tools=0.593, failed=0.391, completion_tokens=35.8]validation:  78%|███████▊  | 313/400 [00:18<00:00, 136.76it/s, reward=-1.18, num_turns=1.59, num_tools=0.594, failed=0.39, completion_tokens=35.8] validation:  78%|███████▊  | 314/400 [00:18<00:00, 136.76it/s, reward=-1.16, num_turns=1.59, num_tools=0.596, failed=0.389, completion_tokens=35.7]validation:  79%|███████▉  | 315/400 [00:18<00:00, 136.76it/s, reward=-1.16, num_turns=1.59, num_tools=0.597, failed=0.387, completion_tokens=35.7]validation:  79%|███████▉  | 316/400 [00:18<00:00, 136.76it/s, reward=-1.15, num_turns=1.59, num_tools=0.598, failed=0.386, completion_tokens=35.6]validation:  79%|███████▉  | 317/400 [00:18<00:00, 136.76it/s, reward=-1.14, num_turns=1.6, num_tools=0.599, failed=0.385, completion_tokens=35.6] validation:  80%|███████▉  | 318/400 [00:18<00:00, 152.02it/s, reward=-1.14, num_turns=1.6, num_tools=0.599, failed=0.385, completion_tokens=35.6]validation:  80%|███████▉  | 318/400 [00:18<00:00, 152.02it/s, reward=-1.12, num_turns=1.6, num_tools=0.601, failed=0.384, completion_tokens=35.5]validation:  80%|███████▉  | 319/400 [00:18<00:00, 152.02it/s, reward=-1.11, num_turns=1.6, num_tools=0.602, failed=0.382, completion_tokens=35.5]validation:  80%|████████  | 320/400 [00:18<00:00, 152.02it/s, reward=-1.09, num_turns=1.6, num_tools=0.603, failed=0.381, completion_tokens=35.5]validation:  80%|████████  | 321/400 [00:18<00:00, 152.02it/s, reward=-1.07, num_turns=1.6, num_tools=0.604, failed=0.38, completion_tokens=35.4] validation:  80%|████████  | 322/400 [00:18<00:00, 152.02it/s, reward=-1.07, num_turns=1.6, num_tools=0.606, failed=0.379, completion_tokens=35.5]validation:  81%|████████  | 323/400 [00:18<00:00, 152.02it/s, reward=-1.05, num_turns=1.6, num_tools=0.607, failed=0.378, completion_tokens=35.4]validation:  81%|████████  | 324/400 [00:18<00:00, 152.02it/s, reward=-1.06, num_turns=1.6, num_tools=0.608, failed=0.377, completion_tokens=35.5]validation:  81%|████████▏ | 325/400 [00:18<00:00, 152.02it/s, reward=-1.05, num_turns=1.61, num_tools=0.609, failed=0.375, completion_tokens=35.5]validation:  82%|████████▏ | 326/400 [00:18<00:00, 152.02it/s, reward=-1.06, num_turns=1.61, num_tools=0.61, failed=0.374, completion_tokens=35.4] validation:  82%|████████▏ | 327/400 [00:18<00:00, 152.02it/s, reward=-1.06, num_turns=1.61, num_tools=0.612, failed=0.373, completion_tokens=35.4]validation:  82%|████████▏ | 328/400 [00:18<00:00, 152.02it/s, reward=-1.06, num_turns=1.61, num_tools=0.613, failed=0.372, completion_tokens=35.4]validation:  82%|████████▏ | 329/400 [00:18<00:00, 152.02it/s, reward=-1.06, num_turns=1.61, num_tools=0.614, failed=0.371, completion_tokens=35.4]validation:  82%|████████▎ | 330/400 [00:18<00:00, 152.02it/s, reward=-1.07, num_turns=1.61, num_tools=0.615, failed=0.37, completion_tokens=35.4] validation:  83%|████████▎ | 331/400 [00:18<00:00, 152.02it/s, reward=-1.07, num_turns=1.61, num_tools=0.616, failed=0.369, completion_tokens=35.5]validation:  83%|████████▎ | 332/400 [00:18<00:00, 152.02it/s, reward=-1.07, num_turns=1.61, num_tools=0.617, failed=0.367, completion_tokens=35.5]validation:  83%|████████▎ | 333/400 [00:18<00:00, 152.02it/s, reward=-1.07, num_turns=1.62, num_tools=0.619, failed=0.366, completion_tokens=35.5]validation:  84%|████████▎ | 334/400 [00:18<00:00, 152.02it/s, reward=-1.07, num_turns=1.62, num_tools=0.62, failed=0.365, completion_tokens=35.5] validation:  84%|████████▍ | 335/400 [00:18<00:00, 152.02it/s, reward=-1.08, num_turns=1.61, num_tools=0.618, failed=0.364, completion_tokens=35.8]validation:  84%|████████▍ | 336/400 [00:18<00:00, 147.35it/s, reward=-1.08, num_turns=1.61, num_tools=0.618, failed=0.364, completion_tokens=35.8]validation:  84%|████████▍ | 336/400 [00:18<00:00, 147.35it/s, reward=-1.08, num_turns=1.62, num_tools=0.619, failed=0.363, completion_tokens=35.8]validation:  84%|████████▍ | 337/400 [00:18<00:00, 147.35it/s, reward=-1.07, num_turns=1.62, num_tools=0.62, failed=0.362, completion_tokens=36]   validation:  84%|████████▍ | 338/400 [00:18<00:00, 147.35it/s, reward=-1.07, num_turns=1.62, num_tools=0.621, failed=0.361, completion_tokens=36]validation:  85%|████████▍ | 339/400 [00:18<00:00, 147.35it/s, reward=-1.07, num_turns=1.62, num_tools=0.622, failed=0.36, completion_tokens=36.1]validation:  85%|████████▌ | 340/400 [00:18<00:00, 147.35it/s, reward=-1.07, num_turns=1.62, num_tools=0.624, failed=0.359, completion_tokens=36.1]validation:  85%|████████▌ | 341/400 [00:18<00:00, 147.35it/s, reward=-1.07, num_turns=1.62, num_tools=0.625, failed=0.358, completion_tokens=36.1]validation:  86%|████████▌ | 342/400 [00:18<00:00, 147.35it/s, reward=-1.06, num_turns=1.62, num_tools=0.626, failed=0.357, completion_tokens=36.2]validation:  86%|████████▌ | 343/400 [00:18<00:00, 147.35it/s, reward=-1.06, num_turns=1.62, num_tools=0.627, failed=0.356, completion_tokens=36.2]validation:  86%|████████▌ | 344/400 [00:18<00:00, 147.35it/s, reward=-1.06, num_turns=1.62, num_tools=0.628, failed=0.355, completion_tokens=36.3]validation:  86%|████████▋ | 345/400 [00:18<00:00, 147.35it/s, reward=-1.06, num_turns=1.63, num_tools=0.629, failed=0.354, completion_tokens=36.3]validation:  86%|████████▋ | 346/400 [00:18<00:00, 147.35it/s, reward=-1.06, num_turns=1.63, num_tools=0.63, failed=0.353, completion_tokens=36.4] validation:  87%|████████▋ | 347/400 [00:18<00:00, 147.35it/s, reward=-1.05, num_turns=1.63, num_tools=0.634, failed=0.352, completion_tokens=36.5]validation:  87%|████████▋ | 348/400 [00:18<00:00, 147.35it/s, reward=-1.04, num_turns=1.63, num_tools=0.635, failed=0.351, completion_tokens=36.5]validation:  87%|████████▋ | 349/400 [00:18<00:00, 147.35it/s, reward=-1.04, num_turns=1.63, num_tools=0.636, failed=0.35, completion_tokens=36.5] validation:  88%|████████▊ | 350/400 [00:18<00:00, 147.35it/s, reward=-1.04, num_turns=1.63, num_tools=0.637, failed=0.349, completion_tokens=36.5]validation:  88%|████████▊ | 351/400 [00:18<00:00, 147.35it/s, reward=-1.03, num_turns=1.63, num_tools=0.638, failed=0.348, completion_tokens=36.5]validation:  88%|████████▊ | 352/400 [00:18<00:00, 147.35it/s, reward=-1.04, num_turns=1.63, num_tools=0.639, failed=0.347, completion_tokens=36.6]validation:  88%|████████▊ | 353/400 [00:18<00:00, 151.15it/s, reward=-1.04, num_turns=1.63, num_tools=0.639, failed=0.347, completion_tokens=36.6]validation:  88%|████████▊ | 353/400 [00:18<00:00, 151.15it/s, reward=-1.04, num_turns=1.63, num_tools=0.64, failed=0.346, completion_tokens=36.6] validation:  88%|████████▊ | 354/400 [00:18<00:00, 151.15it/s, reward=-1.04, num_turns=1.64, num_tools=0.641, failed=0.345, completion_tokens=36.6]validation:  89%|████████▉ | 355/400 [00:18<00:00, 151.15it/s, reward=-1.05, num_turns=1.64, num_tools=0.642, failed=0.344, completion_tokens=36.6]validation:  89%|████████▉ | 356/400 [00:18<00:00, 151.15it/s, reward=-1.05, num_turns=1.64, num_tools=0.643, failed=0.343, completion_tokens=36.6]validation:  89%|████████▉ | 357/400 [00:18<00:00, 151.15it/s, reward=-1.05, num_turns=1.64, num_tools=0.644, failed=0.342, completion_tokens=36.7]validation:  90%|████████▉ | 358/400 [00:18<00:00, 151.15it/s, reward=-1.05, num_turns=1.64, num_tools=0.645, failed=0.341, completion_tokens=36.7]validation:  90%|████████▉ | 359/400 [00:18<00:00, 151.15it/s, reward=-1.06, num_turns=1.64, num_tools=0.649, failed=0.34, completion_tokens=36.6] validation:  90%|█████████ | 360/400 [00:18<00:00, 151.15it/s, reward=-1.06, num_turns=1.64, num_tools=0.647, failed=0.339, completion_tokens=37] validation:  90%|█████████ | 361/400 [00:18<00:00, 151.15it/s, reward=-1.06, num_turns=1.64, num_tools=0.648, failed=0.338, completion_tokens=37.1]validation:  90%|█████████ | 362/400 [00:18<00:00, 151.15it/s, reward=-1.07, num_turns=1.64, num_tools=0.649, failed=0.337, completion_tokens=37.2]validation:  91%|█████████ | 363/400 [00:18<00:00, 151.15it/s, reward=-1.07, num_turns=1.64, num_tools=0.65, failed=0.336, completion_tokens=37.3] validation:  91%|█████████ | 364/400 [00:18<00:00, 151.15it/s, reward=-1.07, num_turns=1.65, num_tools=0.651, failed=0.335, completion_tokens=37.3]validation:  91%|█████████▏| 365/400 [00:18<00:00, 151.15it/s, reward=-1.07, num_turns=1.65, num_tools=0.652, failed=0.334, completion_tokens=37.5]validation:  92%|█████████▏| 366/400 [00:18<00:00, 151.15it/s, reward=-1.08, num_turns=1.65, num_tools=0.653, failed=0.333, completion_tokens=37.6]validation:  92%|█████████▏| 367/400 [00:18<00:00, 151.15it/s, reward=-1.07, num_turns=1.65, num_tools=0.654, failed=0.332, completion_tokens=37.8]validation:  92%|█████████▏| 368/400 [00:18<00:00, 151.15it/s, reward=-1.07, num_turns=1.65, num_tools=0.655, failed=0.332, completion_tokens=37.9]validation:  92%|█████████▏| 369/400 [00:18<00:00, 151.15it/s, reward=-1.07, num_turns=1.65, num_tools=0.656, failed=0.331, completion_tokens=37.9]validation:  92%|█████████▎| 370/400 [00:18<00:00, 120.71it/s, reward=-1.07, num_turns=1.65, num_tools=0.656, failed=0.331, completion_tokens=37.9]validation:  92%|█████████▎| 370/400 [00:18<00:00, 120.71it/s, reward=-1.07, num_turns=1.65, num_tools=0.657, failed=0.33, completion_tokens=38.2] validation:  93%|█████████▎| 371/400 [00:18<00:00, 120.71it/s, reward=-1.07, num_turns=1.65, num_tools=0.658, failed=0.329, completion_tokens=38.2]validation:  93%|█████████▎| 372/400 [00:18<00:00, 120.71it/s, reward=-1.07, num_turns=1.65, num_tools=0.659, failed=0.328, completion_tokens=38.4]validation:  93%|█████████▎| 373/400 [00:18<00:00, 120.71it/s, reward=-1.08, num_turns=1.65, num_tools=0.66, failed=0.327, completion_tokens=38.5] validation:  94%|█████████▎| 374/400 [00:18<00:00, 120.71it/s, reward=-1.08, num_turns=1.66, num_tools=0.66, failed=0.326, completion_tokens=38.5]validation:  94%|█████████▍| 375/400 [00:18<00:00, 120.71it/s, reward=-1.08, num_turns=1.66, num_tools=0.661, failed=0.325, completion_tokens=38.7]validation:  94%|█████████▍| 376/400 [00:18<00:00, 120.71it/s, reward=-1.08, num_turns=1.66, num_tools=0.662, failed=0.324, completion_tokens=38.9]validation:  94%|█████████▍| 377/400 [00:18<00:00, 120.71it/s, reward=-1.08, num_turns=1.66, num_tools=0.663, failed=0.324, completion_tokens=38.9]validation:  94%|█████████▍| 378/400 [00:19<00:00, 120.71it/s, reward=-1.08, num_turns=1.66, num_tools=0.664, failed=0.323, completion_tokens=39.1]validation:  95%|█████████▍| 379/400 [00:19<00:00, 120.71it/s, reward=-1.08, num_turns=1.66, num_tools=0.665, failed=0.322, completion_tokens=39.1]validation:  95%|█████████▌| 380/400 [00:19<00:00, 120.71it/s, reward=-1.09, num_turns=1.66, num_tools=0.666, failed=0.321, completion_tokens=39.4]validation:  95%|█████████▌| 381/400 [00:19<00:00, 120.71it/s, reward=-1.09, num_turns=1.66, num_tools=0.667, failed=0.32, completion_tokens=39.5] validation:  96%|█████████▌| 382/400 [00:19<00:00, 120.71it/s, reward=-1.09, num_turns=1.66, num_tools=0.668, failed=0.319, completion_tokens=39.6]validation:  96%|█████████▌| 383/400 [00:19<00:00, 120.71it/s, reward=-1.09, num_turns=1.66, num_tools=0.668, failed=0.319, completion_tokens=39.8]validation:  96%|█████████▌| 384/400 [00:19<00:00, 90.59it/s, reward=-1.09, num_turns=1.66, num_tools=0.668, failed=0.319, completion_tokens=39.8] validation:  96%|█████████▌| 384/400 [00:19<00:00, 90.59it/s, reward=-1.1, num_turns=1.66, num_tools=0.669, failed=0.318, completion_tokens=40]   validation:  96%|█████████▋| 385/400 [00:19<00:00, 90.59it/s, reward=-1.1, num_turns=1.67, num_tools=0.673, failed=0.317, completion_tokens=40.1]validation:  96%|█████████▋| 386/400 [00:19<00:00, 90.59it/s, reward=-1.09, num_turns=1.67, num_tools=0.674, failed=0.316, completion_tokens=40.3]validation:  97%|█████████▋| 387/400 [00:19<00:00, 90.59it/s, reward=-1.09, num_turns=1.67, num_tools=0.674, failed=0.315, completion_tokens=40.6]validation:  97%|█████████▋| 388/400 [00:19<00:00, 90.59it/s, reward=-1.1, num_turns=1.67, num_tools=0.675, failed=0.314, completion_tokens=40.8] validation:  97%|█████████▋| 389/400 [00:19<00:00, 90.59it/s, reward=-1.1, num_turns=1.67, num_tools=0.679, failed=0.314, completion_tokens=41.4]validation:  98%|█████████▊| 390/400 [00:20<00:00, 90.59it/s, reward=-1.1, num_turns=1.67, num_tools=0.682, failed=0.313, completion_tokens=41.6]validation:  98%|█████████▊| 391/400 [00:20<00:00, 90.59it/s, reward=-1.1, num_turns=1.67, num_tools=0.683, failed=0.312, completion_tokens=42]  validation:  98%|█████████▊| 392/400 [00:20<00:00, 90.59it/s, reward=-1.1, num_turns=1.67, num_tools=0.684, failed=0.311, completion_tokens=42.4]validation:  98%|█████████▊| 393/400 [00:20<00:00, 90.59it/s, reward=-1.1, num_turns=1.68, num_tools=0.69, failed=0.31, completion_tokens=42.6]  validation:  98%|█████████▊| 394/400 [00:20<00:00, 90.59it/s, reward=-1.11, num_turns=1.68, num_tools=0.688, failed=0.31, completion_tokens=44.2]validation:  99%|█████████▉| 395/400 [00:20<00:00, 90.59it/s, reward=-1.11, num_turns=1.68, num_tools=0.689, failed=0.309, completion_tokens=44.8]validation:  99%|█████████▉| 396/400 [00:20<00:00, 24.89it/s, reward=-1.11, num_turns=1.68, num_tools=0.689, failed=0.309, completion_tokens=44.8]validation:  99%|█████████▉| 396/400 [00:20<00:00, 24.89it/s, reward=-1.11, num_turns=1.68, num_tools=0.687, failed=0.308, completion_tokens=46.4]validation:  99%|█████████▉| 397/400 [00:20<00:00, 24.89it/s, reward=-1.12, num_turns=1.68, num_tools=0.688, failed=0.307, completion_tokens=47.1]validation: 100%|█████████▉| 398/400 [00:20<00:00, 24.89it/s, reward=-1.12, num_turns=1.68, num_tools=0.686, failed=0.307, completion_tokens=48.7]validation: 100%|█████████▉| 399/400 [00:20<00:00, 24.89it/s, reward=-1.12, num_turns=1.68, num_tools=0.689, failed=0.306, completion_tokens=49.1]validation: 100%|██████████| 400/400 [00:20<00:00, 24.89it/s, reward=-1.12, num_turns=1.68, num_tools=0.69, failed=0.305, completion_tokens=49.8] validation: 100%|██████████| 400/400 [00:20<00:00, 19.19it/s, reward=-1.12, num_turns=1.68, num_tools=0.69, failed=0.305, completion_tokens=49.8]
+  Val avg reward: -1.124
+
+============================================================
+Step 11/50
+============================================================
+step 11:   0%|          | 0/32 [00:00<?, ?it/s]step 11:   3%|▎         | 1/32 [00:00<00:30,  1.02it/s]step 11:   3%|▎         | 1/32 [00:00<00:30,  1.02it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=5]step 11:   6%|▋         | 2/32 [00:01<00:18,  1.59it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=5]step 11:   6%|▋         | 2/32 [00:01<00:18,  1.59it/s, reward=-2.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=14.5]step 11:   9%|▉         | 3/32 [00:01<00:18,  1.59it/s, reward=-2.33, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=18]step 11:  12%|█▎        | 4/32 [00:01<00:17,  1.59it/s, reward=-2.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=28.8]  step 11:  16%|█▌        | 5/32 [00:01<00:16,  1.59it/s, reward=-2.4, num_turns=1.6, num_tools=0.6, failed=0, completion_tokens=28.2]step 11:  19%|█▉        | 6/32 [00:01<00:16,  1.59it/s, reward=-1.83, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=27.2]step 11:  22%|██▏       | 7/32 [00:01<00:15,  1.59it/s, reward=-1.43, num_turns=1.71, num_tools=0.714, failed=0, completion_tokens=27]  step 11:  25%|██▌       | 8/32 [00:01<00:15,  1.59it/s, reward=-1.5, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=26.4]step 11:  28%|██▊       | 9/32 [00:01<00:14,  1.59it/s, reward=-1.56, num_turns=1.78, num_tools=0.778, failed=0, completion_tokens=26]step 11:  31%|███▏      | 10/32 [00:01<00:13,  1.59it/s, reward=-1.6, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=26.1] step 11:  34%|███▍      | 11/32 [00:01<00:13,  1.59it/s, reward=-1.64, num_turns=1.82, num_tools=0.818, failed=0, completion_tokens=26.6]step 11:  38%|███▊      | 12/32 [00:01<00:12,  1.59it/s, reward=-1.67, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=26.7]step 11:  41%|████      | 13/32 [00:01<00:01, 14.27it/s, reward=-1.67, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=26.7]step 11:  41%|████      | 13/32 [00:01<00:01, 14.27it/s, reward=-1.69, num_turns=1.85, num_tools=0.846, failed=0, completion_tokens=26.9]step 11:  44%|████▍     | 14/32 [00:01<00:01, 14.27it/s, reward=-1.71, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=27.2]step 11:  47%|████▋     | 15/32 [00:01<00:01, 14.27it/s, reward=-1.73, num_turns=1.87, num_tools=0.867, failed=0, completion_tokens=27.5]step 11:  50%|█████     | 16/32 [00:01<00:01, 14.27it/s, reward=-1.54, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=28]  step 11:  53%|█████▎    | 17/32 [00:01<00:01, 14.27it/s, reward=-1.57, num_turns=1.88, num_tools=0.882, failed=0, completion_tokens=28.6]step 11:  56%|█████▋    | 18/32 [00:01<00:00, 14.27it/s, reward=-1.59, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=29.1]step 11:  59%|█████▉    | 19/32 [00:01<00:00, 14.27it/s, reward=-1.46, num_turns=1.89, num_tools=0.895, failed=0, completion_tokens=29.6]step 11:  62%|██████▎   | 20/32 [00:01<00:00, 14.27it/s, reward=-1.33, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=30.1]   step 11:  66%|██████▌   | 21/32 [00:01<00:00, 14.27it/s, reward=-1.37, num_turns=1.9, num_tools=0.905, failed=0, completion_tokens=30.3]step 11:  69%|██████▉   | 22/32 [00:01<00:00, 14.27it/s, reward=-1.39, num_turns=1.91, num_tools=0.909, failed=0, completion_tokens=30.5]step 11:  72%|███████▏  | 23/32 [00:01<00:00, 14.27it/s, reward=-1.42, num_turns=1.91, num_tools=0.913, failed=0, completion_tokens=30.7]step 11:  75%|███████▌  | 24/32 [00:01<00:00, 14.27it/s, reward=-1.44, num_turns=1.92, num_tools=0.917, failed=0, completion_tokens=30.9]step 11:  78%|███████▊  | 25/32 [00:01<00:00, 29.24it/s, reward=-1.44, num_turns=1.92, num_tools=0.917, failed=0, completion_tokens=30.9]step 11:  78%|███████▊  | 25/32 [00:01<00:00, 29.24it/s, reward=-1.33, num_turns=1.92, num_tools=0.92, failed=0, completion_tokens=31.3] step 11:  81%|████████▏ | 26/32 [00:01<00:00, 29.24it/s, reward=-1.24, num_turns=1.92, num_tools=0.923, failed=0, completion_tokens=31.9]step 11:  84%|████████▍ | 27/32 [00:01<00:00, 29.24it/s, reward=-1.17, num_turns=1.93, num_tools=0.926, failed=0, completion_tokens=32.5]step 11:  88%|████████▊ | 28/32 [00:01<00:00, 29.24it/s, reward=-1.23, num_turns=1.89, num_tools=0.893, failed=0, completion_tokens=35.7]step 11:  91%|█████████ | 29/32 [00:01<00:00, 29.24it/s, reward=-1.26, num_turns=1.9, num_tools=0.897, failed=0, completion_tokens=36.5] step 11:  94%|█████████▍| 30/32 [00:01<00:00, 29.24it/s, reward=-1.28, num_turns=1.9, num_tools=0.933, failed=0, completion_tokens=37.2]step 11:  97%|█████████▋| 31/32 [00:02<00:00, 29.24it/s, reward=-1.31, num_turns=1.9, num_tools=0.935, failed=0, completion_tokens=39.4]step 11: 100%|██████████| 32/32 [00:02<00:00, 29.24it/s, reward=-1.36, num_turns=1.88, num_tools=0.906, failed=0, completion_tokens=48.7]step 11: 100%|██████████| 32/32 [00:02<00:00, 12.45it/s, reward=-1.36, num_turns=1.88, num_tools=0.906, failed=0, completion_tokens=48.7]
+  group 0: mean=-1.50 std=1.509 min=-3.0 max=+1.3 | What is France's population density in people per 
+  group 1: mean=-0.94 std=1.667 min=-3.0 max=+1.5 | What is the distance from Earth to the Sun in km i
+  group 2: mean=-2.12 std=0.331 min=-3.0 max=-2.0 | What is the GDP of France?
+  group 3: mean=-0.88 std=1.452 min=-2.0 max=+1.0 | How old was Guido van Rossum in 2020?
+  Avg reward: -1.359 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0009
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0005
+Packed 32 trajectories into 3 sequences of length 2048
+train:   0%|          | 0/3 [00:00<?, ?it/s]train:  33%|███▎      | 1/3 [00:01<00:03,  1.99s/it]train:  33%|███▎      | 1/3 [00:01<00:03,  1.99s/it, loss/train=-0.711, loss/grad_norm=1.94, loss/learning_rate=5e-5, loss/entropy=0.804]train:  67%|██████▋   | 2/3 [00:02<00:01,  1.02s/it, loss/train=-0.711, loss/grad_norm=1.94, loss/learning_rate=5e-5, loss/entropy=0.804]train:  67%|██████▋   | 2/3 [00:02<00:01,  1.02s/it, loss/train=0.395, loss/grad_norm=2.08, loss/learning_rate=5e-5, loss/entropy=1.18]  train: 100%|██████████| 3/3 [00:02<00:00,  1.41it/s, loss/train=0.395, loss/grad_norm=2.08, loss/learning_rate=5e-5, loss/entropy=1.18]train: 100%|██████████| 3/3 [00:02<00:00,  1.41it/s, loss/train=-0.169, loss/grad_norm=1.78, loss/learning_rate=5e-5, loss/entropy=1.04](APIServer pid=14938) Adapters before cleanup: ['default']
+(APIServer pid=14938) Keeping active adapter(s): ['default']
+(APIServer pid=14938) Adapters after cleanup: ['default']
+train: 100%|██████████| 3/3 [00:30<00:00, 10.23s/it, loss/train=-0.169, loss/grad_norm=1.78, loss/learning_rate=5e-5, loss/entropy=1.04]
+
+============================================================
+Step 12/50
+============================================================
+step 12:   0%|          | 0/32 [00:00<?, ?it/s]step 12:   3%|▎         | 1/32 [00:01<00:38,  1.25s/it]step 12:   3%|▎         | 1/32 [00:01<00:38,  1.25s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=40]step 12:   6%|▋         | 2/32 [00:01<00:17,  1.68it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=40]step 12:   6%|▋         | 2/32 [00:01<00:17,  1.68it/s, reward=0.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=31.2]step 12:   9%|▉         | 3/32 [00:01<00:17,  1.68it/s, reward=-0.333, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=28.5]step 12:  12%|█▎        | 4/32 [00:01<00:16,  1.68it/s, reward=0.75, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=26.6]   step 12:  16%|█▌        | 5/32 [00:01<00:16,  1.68it/s, reward=1.4, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=25.7]   step 12:  19%|█▉        | 6/32 [00:01<00:15,  1.68it/s, reward=1.83, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=25.1]step 12:  22%|██▏       | 7/32 [00:01<00:14,  1.68it/s, reward=2.14, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=24.7]step 12:  25%|██▌       | 8/32 [00:01<00:14,  1.68it/s, reward=2, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=24.8]   step 12:  28%|██▊       | 9/32 [00:01<00:13,  1.68it/s, reward=1.89, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=24.6]step 12:  31%|███▏      | 10/32 [00:01<00:13,  1.68it/s, reward=1.5, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=24.7]   step 12:  34%|███▍      | 11/32 [00:01<00:12,  1.68it/s, reward=1.41, num_turns=1.91, num_tools=0.909, failed=0, completion_tokens=24.7]step 12:  38%|███▊      | 12/32 [00:01<00:11,  1.68it/s, reward=1.33, num_turns=1.92, num_tools=0.917, failed=0, completion_tokens=24.8]step 12:  41%|████      | 13/32 [00:01<00:11,  1.68it/s, reward=1.27, num_turns=1.92, num_tools=0.923, failed=0, completion_tokens=25]  step 12:  44%|████▍     | 14/32 [00:01<00:10,  1.68it/s, reward=1.21, num_turns=1.93, num_tools=0.929, failed=0, completion_tokens=25.1]step 12:  47%|████▋     | 15/32 [00:01<00:10,  1.68it/s, reward=1.17, num_turns=1.93, num_tools=0.933, failed=0, completion_tokens=25.3]step 12:  50%|█████     | 16/32 [00:01<00:09,  1.68it/s, reward=1.12, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=25.5]step 12:  53%|█████▎    | 17/32 [00:01<00:08,  1.68it/s, reward=0.941, num_turns=1.94, num_tools=0.941, failed=0, completion_tokens=25.7]step 12:  56%|█████▋    | 18/32 [00:01<00:00, 21.05it/s, reward=0.941, num_turns=1.94, num_tools=0.941, failed=0, completion_tokens=25.7]step 12:  56%|█████▋    | 18/32 [00:01<00:00, 21.05it/s, reward=0.778, num_turns=1.94, num_tools=0.944, failed=0, completion_tokens=26.1]step 12:  59%|█████▉    | 19/32 [00:01<00:00, 21.05it/s, reward=0.842, num_turns=1.95, num_tools=0.947, failed=0, completion_tokens=26.6]step 12:  62%|██████▎   | 20/32 [00:01<00:00, 21.05it/s, reward=1, num_turns=1.95, num_tools=0.95, failed=0, completion_tokens=27.1]     step 12:  66%|██████▌   | 21/32 [00:01<00:00, 21.05it/s, reward=1.14, num_turns=1.95, num_tools=0.952, failed=0, completion_tokens=26.9]step 12:  69%|██████▉   | 22/32 [00:01<00:00, 21.05it/s, reward=1, num_turns=1.95, num_tools=0.955, failed=0, completion_tokens=27.6]   step 12:  72%|███████▏  | 23/32 [00:01<00:00, 21.05it/s, reward=0.87, num_turns=1.96, num_tools=0.957, failed=0, completion_tokens=28.3]step 12:  75%|███████▌  | 24/32 [00:01<00:00, 21.05it/s, reward=1, num_turns=1.96, num_tools=0.958, failed=0, completion_tokens=28.1]   step 12:  78%|███████▊  | 25/32 [00:01<00:00, 21.05it/s, reward=0.96, num_turns=1.96, num_tools=0.96, failed=0, completion_tokens=29]step 12:  81%|████████▏ | 26/32 [00:01<00:00, 24.55it/s, reward=0.96, num_turns=1.96, num_tools=0.96, failed=0, completion_tokens=29]step 12:  81%|████████▏ | 26/32 [00:01<00:00, 24.55it/s, reward=0.962, num_turns=1.96, num_tools=0.962, failed=0, completion_tokens=28.9]step 12:  84%|████████▍ | 27/32 [00:01<00:00, 24.55it/s, reward=0.852, num_turns=1.96, num_tools=0.963, failed=0, completion_tokens=29.9]step 12:  88%|████████▊ | 28/32 [00:01<00:00, 24.55it/s, reward=0.75, num_turns=1.96, num_tools=0.964, failed=0, completion_tokens=30.8] step 12:  91%|█████████ | 29/32 [00:01<00:00, 24.55it/s, reward=0.655, num_turns=1.97, num_tools=0.966, failed=0, completion_tokens=31.8]step 12:  94%|█████████▍| 30/32 [00:01<00:00, 24.55it/s, reward=0.567, num_turns=1.97, num_tools=0.967, failed=0, completion_tokens=32.2]step 12:  97%|█████████▋| 31/32 [00:02<00:00, 24.55it/s, reward=0.452, num_turns=1.94, num_tools=0.935, failed=0, completion_tokens=40]  step 12: 100%|██████████| 32/32 [00:03<00:00, 10.05it/s, reward=0.452, num_turns=1.94, num_tools=0.935, failed=0, completion_tokens=40]step 12: 100%|██████████| 32/32 [00:03<00:00, 10.05it/s, reward=0.484, num_turns=1.97, num_tools=0.969, failed=0, completion_tokens=43.4]step 12: 100%|██████████| 32/32 [00:03<00:00, 10.12it/s, reward=0.484, num_turns=1.97, num_tools=0.969, failed=0, completion_tokens=43.4]
+  group 0: mean=-0.44 std=1.861 min=-3.0 max=+2.0 | What is the distance from Earth to the Sun in km i
+  group 1: mean=+4.00 std=0.000 min=+4.0 max=+4.0 | What's the weather like in Sydney?
+  group 2: mean=+0.50 std=0.250 min=+0.0 max=+1.0 | What is the temperature in Berlin in Fahrenheit?
+  group 3: mean=-2.12 std=0.331 min=-3.0 max=-2.0 | What is the speed of light?
+  Avg reward: 0.484 | Avg tools/rollout: 1.0 | groups with variance: 3/4
+Packed 18 trajectories into 2 sequences of length 2048
+train:   0%|          | 0/2 [00:00<?, ?it/s]train:  50%|█████     | 1/2 [00:01<00:01,  1.95s/it]train:  50%|█████     | 1/2 [00:01<00:01,  1.95s/it, loss/train=-0.202, loss/grad_norm=1.38, loss/learning_rate=5e-5, loss/entropy=0.754]train: 100%|██████████| 2/2 [00:02<00:00,  1.00s/it, loss/train=-0.202, loss/grad_norm=1.38, loss/learning_rate=5e-5, loss/entropy=0.754]train: 100%|██████████| 2/2 [00:02<00:00,  1.00s/it, loss/train=-1.73, loss/grad_norm=1.7, loss/learning_rate=5e-5, loss/entropy=1.34]   (APIServer pid=14938) Adapters before cleanup: ['default']
+(APIServer pid=14938) Keeping active adapter(s): ['default']
+(APIServer pid=14938) Adapters after cleanup: ['default']
+train: 100%|██████████| 2/2 [00:30<00:00, 15.35s/it, loss/train=-1.73, loss/grad_norm=1.7, loss/learning_rate=5e-5, loss/entropy=1.34]
+
+============================================================
+Step 13/50
+============================================================
+step 13:   0%|          | 0/32 [00:00<?, ?it/s]step 13:   3%|▎         | 1/32 [00:01<00:37,  1.22s/it]step 13:   3%|▎         | 1/32 [00:01<00:37,  1.22s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=36]step 13:   6%|▋         | 2/32 [00:01<00:36,  1.22s/it, reward=-2.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=28]step 13:   9%|▉         | 3/32 [00:01<00:10,  2.79it/s, reward=-2.5, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=28]step 13:   9%|▉         | 3/32 [00:01<00:10,  2.79it/s, reward=-0.333, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=26.2]step 13:  12%|█▎        | 4/32 [00:01<00:10,  2.79it/s, reward=0.125, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=25.2]  step 13:  16%|█▌        | 5/32 [00:01<00:09,  2.79it/s, reward=-0.3, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=24.8]   step 13:  19%|█▉        | 6/32 [00:01<00:09,  2.79it/s, reward=0.0833, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=24.4]step 13:  22%|██▏       | 7/32 [00:01<00:08,  2.79it/s, reward=0.143, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=24.5] step 13:  25%|██▌       | 8/32 [00:01<00:08,  2.79it/s, reward=-0.125, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=24.6]step 13:  28%|██▊       | 9/32 [00:01<00:08,  2.79it/s, reward=0.111, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=24.3] step 13:  31%|███▏      | 10/32 [00:01<00:07,  2.79it/s, reward=0.15, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=24.5]   step 13:  34%|███▍      | 11/32 [00:01<00:07,  2.79it/s, reward=-0.0455, num_turns=1.91, num_tools=0.909, failed=0, completion_tokens=24.7]step 13:  38%|███▊      | 12/32 [00:01<00:07,  2.79it/s, reward=0, num_turns=1.92, num_tools=0.917, failed=0, completion_tokens=24.8]      step 13:  41%|████      | 13/32 [00:01<00:06,  2.79it/s, reward=0.0385, num_turns=1.92, num_tools=0.923, failed=0, completion_tokens=25]step 13:  44%|████▍     | 14/32 [00:01<00:06,  2.79it/s, reward=-0.107, num_turns=1.93, num_tools=0.929, failed=0, completion_tokens=25.2]step 13:  47%|████▋     | 15/32 [00:01<00:06,  2.79it/s, reward=-0.0667, num_turns=1.93, num_tools=0.933, failed=0, completion_tokens=25.4]step 13:  50%|█████     | 16/32 [00:01<00:00, 19.08it/s, reward=-0.0667, num_turns=1.93, num_tools=0.933, failed=0, completion_tokens=25.4]step 13:  50%|█████     | 16/32 [00:01<00:00, 19.08it/s, reward=-0.188, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=25.8] step 13:  53%|█████▎    | 17/32 [00:01<00:00, 19.08it/s, reward=-0.147, num_turns=1.94, num_tools=0.941, failed=0, completion_tokens=26.1]step 13:  56%|█████▋    | 18/32 [00:01<00:00, 19.08it/s, reward=-0.25, num_turns=1.94, num_tools=0.944, failed=0, completion_tokens=26.6] step 13:  59%|█████▉    | 19/32 [00:01<00:00, 19.08it/s, reward=-0.132, num_turns=1.95, num_tools=0.947, failed=0, completion_tokens=26.4]step 13:  62%|██████▎   | 20/32 [00:01<00:00, 19.08it/s, reward=-0.025, num_turns=1.95, num_tools=0.95, failed=0, completion_tokens=26.2] step 13:  66%|██████▌   | 21/32 [00:01<00:00, 19.08it/s, reward=0.0714, num_turns=1.95, num_tools=0.952, failed=0, completion_tokens=26] step 13:  69%|██████▉   | 22/32 [00:01<00:00, 19.08it/s, reward=0.0985, num_turns=1.95, num_tools=0.955, failed=0, completion_tokens=26.8]step 13:  72%|███████▏  | 23/32 [00:01<00:00, 26.42it/s, reward=0.0985, num_turns=1.95, num_tools=0.955, failed=0, completion_tokens=26.8]step 13:  72%|███████▏  | 23/32 [00:01<00:00, 26.42it/s, reward=0.00725, num_turns=1.96, num_tools=0.957, failed=0, completion_tokens=27.5]step 13:  75%|███████▌  | 24/32 [00:01<00:00, 26.42it/s, reward=-0.0764, num_turns=1.96, num_tools=0.958, failed=0, completion_tokens=28]  step 13:  78%|███████▊  | 25/32 [00:01<00:00, 26.42it/s, reward=0.00667, num_turns=1.96, num_tools=0.96, failed=0, completion_tokens=27.7]step 13:  81%|████████▏ | 26/32 [00:01<00:00, 26.42it/s, reward=0.0641, num_turns=1.96, num_tools=0.962, failed=0, completion_tokens=27.8]step 13:  84%|████████▍ | 27/32 [00:01<00:00, 26.42it/s, reward=-0.0123, num_turns=1.96, num_tools=0.963, failed=0, completion_tokens=27.8]step 13:  88%|████████▊ | 28/32 [00:01<00:00, 26.42it/s, reward=0.0238, num_turns=1.96, num_tools=0.964, failed=0, completion_tokens=28.6] step 13:  91%|█████████ | 29/32 [00:01<00:00, 26.42it/s, reward=-0.0805, num_turns=1.93, num_tools=0.931, failed=0, completion_tokens=32.3]step 13:  94%|█████████▍| 30/32 [00:02<00:00, 11.53it/s, reward=-0.0805, num_turns=1.93, num_tools=0.931, failed=0, completion_tokens=32.3]step 13:  94%|█████████▍| 30/32 [00:02<00:00, 11.53it/s, reward=-0.0333, num_turns=1.93, num_tools=0.933, failed=0, completion_tokens=37.5]step 13:  97%|█████████▋| 31/32 [00:03<00:00, 11.53it/s, reward=0.0215, num_turns=1.97, num_tools=0.968, failed=0, completion_tokens=40.6] step 13: 100%|██████████| 32/32 [00:03<00:00, 11.53it/s, reward=-0.0729, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=54.7]step 13: 100%|██████████| 32/32 [00:03<00:00,  9.95it/s, reward=-0.0729, num_turns=1.94, num_tools=0.938, failed=0, completion_tokens=54.7]
+  group 0: mean=-2.12 std=0.331 min=-3.0 max=-2.0 | Which is hotter right now, Tokyo or Cairo?
+  group 1: mean=+0.75 std=0.433 min=+0.5 max=+1.5 | What is the temperature in London in Fahrenheit?
+  group 2: mean=-0.67 std=1.886 min=-3.0 max=+1.7 | What is Germany's population density in people per
+  group 3: mean=+1.75 std=1.561 min=-2.0 max=+4.0 | What is 575 times 22?
+  Avg reward: -0.073 | Avg tools/rollout: 0.9 | groups with variance: 4/4
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0011
+Packed 32 trajectories into 3 sequences of length 2048
+train:   0%|          | 0/3 [00:00<?, ?it/s]train:  33%|███▎      | 1/3 [00:01<00:03,  1.94s/it]train:  33%|███▎      | 1/3 [00:01<00:03,  1.94s/it, loss/train=0.274, loss/grad_norm=1.74, loss/learning_rate=5e-5, loss/entropy=0.954]train:  67%|██████▋   | 2/3 [00:02<00:00,  1.00it/s, loss/train=0.274, loss/grad_norm=1.74, loss/learning_rate=5e-5, loss/entropy=0.954]train:  67%|██████▋   | 2/3 [00:02<00:00,  1.00it/s, loss/train=0.162, loss/grad_norm=1.79, loss/learning_rate=5e-5, loss/entropy=0.522]train: 100%|██████████| 3/3 [00:02<00:00,  1.44it/s, loss/train=0.162, loss/grad_norm=1.79, loss/learning_rate=5e-5, loss/entropy=0.522]train: 100%|██████████| 3/3 [00:02<00:00,  1.44it/s, loss/train=-1.37, loss/grad_norm=0.978, loss/learning_rate=5e-5, loss/entropy=0.731](APIServer pid=14938) Adapters before cleanup: ['default']
+(APIServer pid=14938) Keeping active adapter(s): ['default']
+(APIServer pid=14938) Adapters after cleanup: ['default']
+train: 100%|██████████| 3/3 [00:30<00:00, 10.25s/it, loss/train=-1.37, loss/grad_norm=0.978, loss/learning_rate=5e-5, loss/entropy=0.731]
+
+============================================================
+Step 14/50
+============================================================
+step 14:   0%|          | 0/32 [00:00<?, ?it/s]step 14:   3%|▎         | 1/32 [00:01<00:43,  1.39s/it]step 14:   3%|▎         | 1/32 [00:01<00:43,  1.39s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=21]step 14:   6%|▋         | 2/32 [00:01<00:41,  1.39s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=21]step 14:   9%|▉         | 3/32 [00:01<00:40,  1.39s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=21.3]step 14:  12%|█▎        | 4/32 [00:01<00:38,  1.39s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=21.6]step 14:  16%|█▌        | 5/32 [00:01<00:37,  1.39s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=21.9]step 14:  19%|█▉        | 6/32 [00:01<00:36,  1.39s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=22.3]step 14:  22%|██▏       | 7/32 [00:01<00:34,  1.39s/it, reward=4, num_turns=2, num_tools=1, failed=0, completion_tokens=22.7]step 14:  25%|██▌       | 8/32 [00:01<00:33,  1.39s/it, reward=3.75, num_turns=2, num_tools=1, failed=0, completion_tokens=23.2]step 14:  28%|██▊       | 9/32 [00:01<00:31,  1.39s/it, reward=3.11, num_turns=2, num_tools=1, failed=0, completion_tokens=23.2]step 14:  31%|███▏      | 10/32 [00:01<00:30,  1.39s/it, reward=3.13, num_turns=2, num_tools=1, failed=0, completion_tokens=23.7]step 14:  34%|███▍      | 11/32 [00:01<00:29,  1.39s/it, reward=3.15, num_turns=2, num_tools=1, failed=0, completion_tokens=24.1]step 14:  38%|███▊      | 12/32 [00:01<00:27,  1.39s/it, reward=3.17, num_turns=2, num_tools=1, failed=0, completion_tokens=24.5]step 14:  41%|████      | 13/32 [00:01<00:26,  1.39s/it, reward=3, num_turns=2, num_tools=1, failed=0, completion_tokens=24.8]   step 14:  44%|████▍     | 14/32 [00:01<00:01, 12.73it/s, reward=3, num_turns=2, num_tools=1, failed=0, completion_tokens=24.8]step 14:  44%|████▍     | 14/32 [00:01<00:01, 12.73it/s, reward=3.07, num_turns=2, num_tools=1, failed=0, completion_tokens=25.1]step 14:  47%|████▋     | 15/32 [00:01<00:01, 12.73it/s, reward=3.13, num_turns=2, num_tools=1, failed=0, completion_tokens=25.3]step 14:  50%|█████     | 16/32 [00:01<00:01, 12.73it/s, reward=3.19, num_turns=2, num_tools=1, failed=0, completion_tokens=25.5]step 14:  53%|█████▎    | 17/32 [00:01<00:01, 12.73it/s, reward=3.24, num_turns=2, num_tools=1, failed=0, completion_tokens=25.7]step 14:  56%|█████▋    | 18/32 [00:01<00:01, 12.73it/s, reward=3.28, num_turns=2, num_tools=1, failed=0, completion_tokens=25.9]step 14:  59%|█████▉    | 19/32 [00:01<00:01, 12.73it/s, reward=3.13, num_turns=2, num_tools=1, failed=0, completion_tokens=25.9]step 14:  62%|██████▎   | 20/32 [00:01<00:00, 12.73it/s, reward=3, num_turns=2, num_tools=1, failed=0, completion_tokens=26]     step 14:  66%|██████▌   | 21/32 [00:01<00:00, 12.73it/s, reward=2.88, num_turns=2, num_tools=1, failed=0, completion_tokens=26]step 14:  69%|██████▉   | 22/32 [00:01<00:00, 12.73it/s, reward=2.77, num_turns=2, num_tools=1, failed=0, completion_tokens=26.1]step 14:  72%|███████▏  | 23/32 [00:01<00:00, 12.73it/s, reward=2.72, num_turns=2, num_tools=1, failed=0, completion_tokens=26]  step 14:  75%|███████▌  | 24/32 [00:01<00:00, 12.73it/s, reward=2.62, num_turns=2, num_tools=1, failed=0, completion_tokens=26.2]step 14:  78%|███████▊  | 25/32 [00:01<00:00, 12.73it/s, reward=2.68, num_turns=2, num_tools=1, failed=0, completion_tokens=26.1]step 14:  81%|████████▏ | 26/32 [00:01<00:00, 12.73it/s, reward=2.64, num_turns=2, num_tools=1, failed=0, completion_tokens=26.4]step 14:  84%|████████▍ | 27/32 [00:01<00:00, 25.95it/s, reward=2.64, num_turns=2, num_tools=1, failed=0, completion_tokens=26.4]step 14:  84%|████████▍ | 27/32 [00:01<00:00, 25.95it/s, reward=2.43, num_turns=1.96, num_tools=0.963, failed=0, completion_tokens=29.6]step 14:  88%|████████▊ | 28/32 [00:01<00:00, 25.95it/s, reward=2.27, num_turns=1.96, num_tools=0.964, failed=0, completion_tokens=30]  step 14:  91%|█████████ | 29/32 [00:01<00:00, 25.95it/s, reward=2.13, num_turns=1.97, num_tools=0.966, failed=0, completion_tokens=30.2]step 14:  94%|█████████▍| 30/32 [00:01<00:00, 25.95it/s, reward=1.99, num_turns=1.97, num_tools=0.967, failed=0, completion_tokens=30.8]step 14:  97%|█████████▋| 31/32 [00:01<00:00, 25.95it/s, reward=1.86, num_turns=1.97, num_tools=0.968, failed=0, completion_tokens=32]  step 14: 100%|██████████| 32/32 [00:01<00:00, 25.95it/s, reward=1.74, num_turns=1.97, num_tools=0.969, failed=0, completion_tokens=33.1]step 14: 100%|██████████| 32/32 [00:01<00:00, 17.14it/s, reward=1.74, num_turns=1.97, num_tools=0.969, failed=0, completion_tokens=33.1]
+  group 0: mean=+4.00 std=0.000 min=+4.0 max=+4.0 | What's the weather like in Paris?
+  group 1: mean=+3.75 std=0.323 min=+3.3 max=+4.0 | Convert 28 kg to lbs.
+  group 2: mean=-1.67 std=1.302 min=-3.0 max=+1.7 | What is the population of Germany divided by its a
+  group 3: mean=+0.88 std=0.545 min=+0.5 max=+2.0 | What is the temperature in Tokyo in Fahrenheit?
+  Avg reward: 1.740 | Avg tools/rollout: 1.0 | groups with variance: 3/4
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0012
+Packed 24 trajectories into 2 sequences of length 2048
+train:   0%|          | 0/2 [00:00<?, ?it/s]train:  50%|█████     | 1/2 [00:01<00:01,  1.83s/it]train:  50%|█████     | 1/2 [00:01<00:01,  1.83s/it, loss/train=-0.223, loss/grad_norm=0.761, loss/learning_rate=5e-5, loss/entropy=0.571]train: 100%|██████████| 2/2 [00:02<00:00,  1.07it/s, loss/train=-0.223, loss/grad_norm=0.761, loss/learning_rate=5e-5, loss/entropy=0.571]train: 100%|██████████| 2/2 [00:02<00:00,  1.07it/s, loss/train=0.383, loss/grad_norm=2.75, loss/learning_rate=5e-5, loss/entropy=0.603]  (APIServer pid=14938) Adapters before cleanup: ['default']
+(APIServer pid=14938) Keeping active adapter(s): ['default']
+(APIServer pid=14938) Adapters after cleanup: ['default']
+train: 100%|██████████| 2/2 [00:29<00:00, 14.67s/it, loss/train=0.383, loss/grad_norm=2.75, loss/learning_rate=5e-5, loss/entropy=0.603]
+
+============================================================
+Step 15/50
+============================================================
+step 15:   0%|          | 0/32 [00:00<?, ?it/s]step 15:   3%|▎         | 1/32 [00:01<00:42,  1.38s/it]step 15:   3%|▎         | 1/32 [00:01<00:42,  1.38s/it, reward=1, num_turns=2, num_tools=1, failed=0, completion_tokens=22.5]step 15:   6%|▋         | 2/32 [00:01<00:41,  1.38s/it, reward=1.5, num_turns=2, num_tools=1, failed=0, completion_tokens=22.2]step 15:   9%|▉         | 3/32 [00:01<00:40,  1.38s/it, reward=1.67, num_turns=2, num_tools=1, failed=0, completion_tokens=22.2]step 15:  12%|█▎        | 4/32 [00:01<00:38,  1.38s/it, reward=1.75, num_turns=2, num_tools=1, failed=0, completion_tokens=22.1]step 15:  16%|█▌        | 5/32 [00:01<00:37,  1.38s/it, reward=1.8, num_turns=2, num_tools=1, failed=0, completion_tokens=22.1] step 15:  19%|█▉        | 6/32 [00:01<00:35,  1.38s/it, reward=1.17, num_turns=2, num_tools=1, failed=0, completion_tokens=22.9]step 15:  22%|██▏       | 7/32 [00:01<00:34,  1.38s/it, reward=0.714, num_turns=2, num_tools=1, failed=0, completion_tokens=23.5]step 15:  25%|██▌       | 8/32 [00:01<00:33,  1.38s/it, reward=1.12, num_turns=2, num_tools=1, failed=0, completion_tokens=24.1] step 15:  28%|██▊       | 9/32 [00:01<00:31,  1.38s/it, reward=1.44, num_turns=2, num_tools=1, failed=0, completion_tokens=24.6]step 15:  31%|███▏      | 10/32 [00:01<00:30,  1.38s/it, reward=1.1, num_turns=2, num_tools=1, failed=0, completion_tokens=24.9]step 15:  34%|███▍      | 11/32 [00:01<00:29,  1.38s/it, reward=0.818, num_turns=2, num_tools=1, failed=0, completion_tokens=25]step 15:  38%|███▊      | 12/32 [00:01<00:27,  1.38s/it, reward=1.08, num_turns=2, num_tools=1, failed=0, completion_tokens=25.2]step 15:  41%|████      | 13/32 [00:01<00:26,  1.38s/it, reward=1.31, num_turns=2, num_tools=1, failed=0, completion_tokens=25.4]step 15:  44%|████▍     | 14/32 [00:01<00:24,  1.38s/it, reward=1.5, num_turns=2, num_tools=1, failed=0, completion_tokens=25.5] step 15:  47%|████▋     | 15/32 [00:01<00:23,  1.38s/it, reward=1.62, num_turns=2, num_tools=1, failed=0, completion_tokens=25.7]step 15:  50%|█████     | 16/32 [00:01<00:22,  1.38s/it, reward=1.4, num_turns=2, num_tools=1, failed=0, completion_tokens=25.9] step 15:  53%|█████▎    | 17/32 [00:01<00:20,  1.38s/it, reward=1.55, num_turns=2, num_tools=1, failed=0, completion_tokens=26.1]step 15:  56%|█████▋    | 18/32 [00:01<00:19,  1.38s/it, reward=1.57, num_turns=2, num_tools=1, failed=0, completion_tokens=25.9]step 15:  59%|█████▉    | 19/32 [00:01<00:17,  1.38s/it, reward=1.6, num_turns=2, num_tools=1, failed=0, completion_tokens=25.7] step 15:  62%|██████▎   | 20/32 [00:01<00:16,  1.38s/it, reward=1.62, num_turns=2, num_tools=1, failed=0, completion_tokens=25.5]step 15:  66%|██████▌   | 21/32 [00:01<00:15,  1.38s/it, reward=1.63, num_turns=2, num_tools=1, failed=0, completion_tokens=25.3]step 15:  69%|██████▉   | 22/32 [00:01<00:13,  1.38s/it, reward=1.74, num_turns=2, num_tools=1, failed=0, completion_tokens=25.4]step 15:  72%|███████▏  | 23/32 [00:01<00:12,  1.38s/it, reward=1.58, num_turns=2, num_tools=1, failed=0, completion_tokens=25.5]step 15:  75%|███████▌  | 24/32 [00:01<00:00, 21.63it/s, reward=1.58, num_turns=2, num_tools=1, failed=0, completion_tokens=25.5]step 15:  75%|███████▌  | 24/32 [00:01<00:00, 21.63it/s, reward=1.43, num_turns=2, num_tools=1, failed=0, completion_tokens=26]  step 15:  78%|███████▊  | 25/32 [00:01<00:00, 21.63it/s, reward=1.29, num_turns=2, num_tools=1, failed=0, completion_tokens=26.6]step 15:  81%|████████▏ | 26/32 [00:01<00:00, 21.63it/s, reward=1.17, num_turns=2, num_tools=1, failed=0, completion_tokens=27.2]step 15:  84%|████████▍ | 27/32 [00:01<00:00, 21.63it/s, reward=1.05, num_turns=2, num_tools=1, failed=0, completion_tokens=27.4]step 15:  88%|████████▊ | 28/32 [00:01<00:00, 21.63it/s, reward=1.05, num_turns=2, num_tools=1, failed=0, completion_tokens=28.8]step 15:  91%|█████████ | 29/32 [00:01<00:00, 21.63it/s, reward=0.943, num_turns=2, num_tools=1, failed=0, completion_tokens=30.1]step 15:  94%|█████████▍| 30/32 [00:01<00:00, 21.63it/s, reward=0.844, num_turns=2, num_tools=1, failed=0, completion_tokens=31.4]step 15:  97%|█████████▋| 31/32 [00:01<00:00, 21.63it/s, reward=0.753, num_turns=2.03, num_tools=1.03, failed=0, completion_tokens=32.1]step 15: 100%|██████████| 32/32 [00:02<00:00, 21.63it/s, reward=0.635, num_turns=2, num_tools=1, failed=0, completion_tokens=40.6]      step 15: 100%|██████████| 32/32 [00:02<00:00, 13.23it/s, reward=0.635, num_turns=2, num_tools=1, failed=0, completion_tokens=40.6]
+  group 0: mean=-2.12 std=0.331 min=-3.0 max=-2.0 | Which is hotter right now, Paris or Mumbai?
+  group 1: mean=-1.25 std=1.299 min=-2.0 max=+1.0 | How old was Guido van Rossum in 2020?
+  group 2: mean=+3.92 std=0.220 min=+3.3 max=+4.0 | Convert 26 kg to lbs.
+  group 3: mean=+2.00 std=0.000 min=+2.0 max=+2.0 | What is 965 plus 85?
+  Avg reward: 0.635 | Avg tools/rollout: 1.0 | groups with variance: 3/4
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0013
+Packed 24 trajectories into 2 sequences of length 2048
+train:   0%|          | 0/2 [00:00<?, ?it/s]train:  50%|█████     | 1/2 [00:01<00:01,  1.90s/it]train:  50%|█████     | 1/2 [00:01<00:01,  1.90s/it, loss/train=0.154, loss/grad_norm=1.1, loss/learning_rate=5e-5, loss/entropy=0.721]train: 100%|██████████| 2/2 [00:02<00:00,  1.02it/s, loss/train=0.154, loss/grad_norm=1.1, loss/learning_rate=5e-5, loss/entropy=0.721]train: 100%|██████████| 2/2 [00:02<00:00,  1.02it/s, loss/train=-0.222, loss/grad_norm=6.06, loss/learning_rate=5e-5, loss/entropy=0.556](APIServer pid=14938) Adapters before cleanup: ['default']
+(APIServer pid=14938) Keeping active adapter(s): ['default']
+(APIServer pid=14938) Adapters after cleanup: ['default']
+train: 100%|██████████| 2/2 [00:30<00:00, 15.15s/it, loss/train=-0.222, loss/grad_norm=6.06, loss/learning_rate=5e-5, loss/entropy=0.556]
+  Running validation...
+validation:   0%|          | 0/400 [00:00<?, ?it/s]validation:   0%|          | 1/400 [00:11<1:19:09, 11.90s/it]validation:   0%|          | 1/400 [00:11<1:19:09, 11.90s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=34]validation:   0%|          | 2/400 [00:11<1:18:58, 11.90s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=35]validation:   1%|          | 3/400 [00:12<21:12,  3.21s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=35]  validation:   1%|          | 3/400 [00:12<21:12,  3.21s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=31]validation:   1%|          | 4/400 [00:12<21:09,  3.21s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=27.2]validation:   1%|▏         | 5/400 [00:12<10:25,  1.58s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=27.2]validation:   1%|▏         | 5/400 [00:12<10:25,  1.58s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=31.2]validation:   2%|▏         | 6/400 [00:12<10:24,  1.58s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=34.2]validation:   2%|▏         | 7/400 [00:12<10:22,  1.58s/it, reward=-2.86, num_turns=1.14, num_tools=0.143, failed=0, completion_tokens=32.4]validation:   2%|▏         | 8/400 [00:12<10:20,  1.58s/it, reward=-2.75, num_turns=1.25, num_tools=0.25, failed=0, completion_tokens=31.7] validation:   2%|▏         | 9/400 [00:12<10:19,  1.58s/it, reward=-2.78, num_turns=1.22, num_tools=0.222, failed=0, completion_tokens=38.1]validation:   2%|▎         | 10/400 [00:12<03:43,  1.75it/s, reward=-2.78, num_turns=1.22, num_tools=0.222, failed=0, completion_tokens=38.1]validation:   2%|▎         | 10/400 [00:12<03:43,  1.75it/s, reward=-2.7, num_turns=1.3, num_tools=0.3, failed=0, completion_tokens=38.3]    validation:   3%|▎         | 11/400 [00:12<03:42,  1.75it/s, reward=-2.64, num_turns=1.36, num_tools=0.364, failed=0, completion_tokens=37.4]validation:   3%|▎         | 12/400 [00:12<03:42,  1.75it/s, reward=-2.33, num_turns=1.42, num_tools=0.417, failed=0, completion_tokens=36.2]validation:   3%|▎         | 13/400 [00:12<03:41,  1.75it/s, reward=-2.38, num_turns=1.38, num_tools=0.385, failed=0, completion_tokens=36.5]validation:   4%|▎         | 14/400 [00:12<03:41,  1.75it/s, reward=-1.93, num_turns=1.43, num_tools=0.429, failed=0, completion_tokens=35.9]validation:   4%|▍         | 15/400 [00:12<03:40,  1.75it/s, reward=-1.53, num_turns=1.47, num_tools=0.467, failed=0, completion_tokens=35.5]validation:   4%|▍         | 16/400 [00:12<03:39,  1.75it/s, reward=-1.19, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=35.1]   validation:   4%|▍         | 17/400 [00:12<03:39,  1.75it/s, reward=-0.882, num_turns=1.53, num_tools=0.529, failed=0, completion_tokens=34.7]validation:   4%|▍         | 18/400 [00:12<03:38,  1.75it/s, reward=-0.611, num_turns=1.56, num_tools=0.556, failed=0, completion_tokens=34.4]validation:   5%|▍         | 19/400 [00:12<03:38,  1.75it/s, reward=-0.368, num_turns=1.58, num_tools=0.579, failed=0, completion_tokens=34.1]validation:   5%|▌         | 20/400 [00:12<03:37,  1.75it/s, reward=-0.183, num_turns=1.6, num_tools=0.6, failed=0, completion_tokens=33.9]   validation:   5%|▌         | 21/400 [00:12<03:37,  1.75it/s, reward=-0.27, num_turns=1.62, num_tools=0.619, failed=0, completion_tokens=33.8]validation:   6%|▌         | 22/400 [00:12<03:36,  1.75it/s, reward=-0.348, num_turns=1.64, num_tools=0.636, failed=0, completion_tokens=33.7]validation:   6%|▌         | 23/400 [00:12<03:35,  1.75it/s, reward=-0.42, num_turns=1.65, num_tools=0.652, failed=0, completion_tokens=34]   validation:   6%|▌         | 24/400 [00:12<03:35,  1.75it/s, reward=-0.486, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=34.5]validation:   6%|▋         | 25/400 [00:12<01:01,  6.05it/s, reward=-0.486, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=34.5]validation:   6%|▋         | 25/400 [00:12<01:01,  6.05it/s, reward=-0.587, num_turns=1.64, num_tools=0.64, failed=0, completion_tokens=34.4] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:   6%|▋         | 26/400 [00:13<01:01,  6.05it/s, reward=-0.679, num_turns=1.62, num_tools=0.615, failed=0.0385, completion_tokens=34.4]validation:   7%|▋         | 27/400 [00:13<01:01,  6.05it/s, reward=-0.765, num_turns=1.59, num_tools=0.593, failed=0.0741, completion_tokens=34.4][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:   7%|▋         | 28/400 [00:13<01:01,  6.05it/s, reward=-0.845, num_turns=1.57, num_tools=0.571, failed=0.107, completion_tokens=34.4] validation:   7%|▋         | 29/400 [00:13<00:57,  6.41it/s, reward=-0.845, num_turns=1.57, num_tools=0.571, failed=0.107, completion_tokens=34.4]validation:   7%|▋         | 29/400 [00:13<00:57,  6.41it/s, reward=-0.92, num_turns=1.55, num_tools=0.552, failed=0.138, completion_tokens=34.4] validation:   8%|▊         | 30/400 [00:13<00:57,  6.41it/s, reward=-0.989, num_turns=1.53, num_tools=0.533, failed=0.167, completion_tokens=34.4]validation:   8%|▊         | 31/400 [00:13<00:57,  6.41it/s, reward=-1.02, num_turns=1.55, num_tools=0.548, failed=0.161, completion_tokens=33.6] validation:   8%|▊         | 32/400 [00:13<00:57,  6.41it/s, reward=-1.08, num_turns=1.53, num_tools=0.531, failed=0.188, completion_tokens=33.6]validation:   8%|▊         | 33/400 [00:13<00:57,  6.41it/s, reward=-1.11, num_turns=1.55, num_tools=0.545, failed=0.182, completion_tokens=33.2]validation:   8%|▊         | 34/400 [00:13<00:57,  6.41it/s, reward=-1.14, num_turns=1.56, num_tools=0.559, failed=0.176, completion_tokens=32.7]validation:   9%|▉         | 35/400 [00:13<00:56,  6.41it/s, reward=-1.08, num_turns=1.57, num_tools=0.571, failed=0.171, completion_tokens=32.4]validation:   9%|▉         | 36/400 [00:13<00:56,  6.41it/s, reward=-0.991, num_turns=1.58, num_tools=0.583, failed=0.167, completion_tokens=32] validation:   9%|▉         | 37/400 [00:13<00:56,  6.41it/s, reward=-0.91, num_turns=1.59, num_tools=0.595, failed=0.162, completion_tokens=31.7]validation:  10%|▉         | 38/400 [00:13<00:56,  6.41it/s, reward=-0.833, num_turns=1.61, num_tools=0.605, failed=0.158, completion_tokens=31.4]validation:  10%|▉         | 39/400 [00:13<00:56,  6.41it/s, reward=-0.709, num_turns=1.62, num_tools=0.615, failed=0.154, completion_tokens=31.2]validation:  10%|█         | 40/400 [00:13<00:56,  6.41it/s, reward=-0.592, num_turns=1.62, num_tools=0.625, failed=0.15, completion_tokens=31.1] validation:  10%|█         | 41/400 [00:13<00:56,  6.41it/s, reward=-0.48, num_turns=1.63, num_tools=0.634, failed=0.146, completion_tokens=31.1]validation:  10%|█         | 42/400 [00:13<00:55,  6.41it/s, reward=-0.373, num_turns=1.64, num_tools=0.643, failed=0.143, completion_tokens=31] validation:  11%|█         | 43/400 [00:13<00:55,  6.41it/s, reward=-0.411, num_turns=1.65, num_tools=0.651, failed=0.14, completion_tokens=30.8]validation:  11%|█         | 44/400 [00:13<00:55,  6.41it/s, reward=-0.47, num_turns=1.64, num_tools=0.636, failed=0.136, completion_tokens=35.9]validation:  11%|█▏        | 45/400 [00:13<00:55,  6.41it/s, reward=-0.504, num_turns=1.64, num_tools=0.644, failed=0.133, completion_tokens=35.6]validation:  12%|█▏        | 46/400 [00:13<00:55,  6.41it/s, reward=-0.536, num_turns=1.65, num_tools=0.652, failed=0.13, completion_tokens=35.3] validation:  12%|█▏        | 47/400 [00:13<00:55,  6.41it/s, reward=-0.567, num_turns=1.66, num_tools=0.66, failed=0.128, completion_tokens=35.1][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  12%|█▏        | 48/400 [00:13<00:23, 15.03it/s, reward=-0.567, num_turns=1.66, num_tools=0.66, failed=0.128, completion_tokens=35.1]validation:  12%|█▏        | 48/400 [00:13<00:23, 15.03it/s, reward=-0.514, num_turns=1.67, num_tools=0.667, failed=0.125, completion_tokens=34.8]validation:  12%|█▏        | 49/400 [00:13<00:23, 15.03it/s, reward=-0.463, num_turns=1.67, num_tools=0.673, failed=0.122, completion_tokens=34.5]validation:  12%|█▎        | 50/400 [00:13<00:23, 15.03it/s, reward=-0.403, num_turns=1.68, num_tools=0.68, failed=0.12, completion_tokens=34.2]  validation:  13%|█▎        | 51/400 [00:13<00:23, 15.03it/s, reward=-0.356, num_turns=1.69, num_tools=0.686, failed=0.118, completion_tokens=33.9]validation:  13%|█▎        | 52/400 [00:13<00:23, 15.03it/s, reward=-0.407, num_turns=1.67, num_tools=0.673, failed=0.135, completion_tokens=33.9]validation:  13%|█▎        | 53/400 [00:13<00:23, 15.03it/s, reward=-0.456, num_turns=1.66, num_tools=0.66, failed=0.151, completion_tokens=33.9] validation:  14%|█▎        | 54/400 [00:13<00:23, 15.03it/s, reward=-0.503, num_turns=1.65, num_tools=0.648, failed=0.167, completion_tokens=33.9]validation:  14%|█▍        | 55/400 [00:13<00:22, 15.03it/s, reward=-0.548, num_turns=1.64, num_tools=0.636, failed=0.182, completion_tokens=33.9]validation:  14%|█▍        | 56/400 [00:13<00:22, 15.03it/s, reward=-0.574, num_turns=1.64, num_tools=0.643, failed=0.179, completion_tokens=33.9]validation:  14%|█▍        | 57/400 [00:13<00:22, 15.03it/s, reward=-0.494, num_turns=1.65, num_tools=0.649, failed=0.175, completion_tokens=33.8]validation:  14%|█▍        | 58/400 [00:13<00:22, 15.03it/s, reward=-0.46, num_turns=1.66, num_tools=0.655, failed=0.172, completion_tokens=33.5] validation:  15%|█▍        | 59/400 [00:13<00:22, 15.03it/s, reward=-0.418, num_turns=1.66, num_tools=0.661, failed=0.169, completion_tokens=33.4]validation:  15%|█▌        | 60/400 [00:13<00:22, 15.03it/s, reward=-0.461, num_turns=1.65, num_tools=0.65, failed=0.167, completion_tokens=35.8] validation:  15%|█▌        | 61/400 [00:13<00:22, 15.03it/s, reward=-0.486, num_turns=1.66, num_tools=0.656, failed=0.164, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  16%|█▌        | 62/400 [00:13<00:22, 15.03it/s, reward=-0.527, num_turns=1.65, num_tools=0.645, failed=0.177, completion_tokens=35.7]validation:  16%|█▌        | 63/400 [00:13<00:22, 15.03it/s, reward=-0.566, num_turns=1.63, num_tools=0.635, failed=0.19, completion_tokens=35.7] validation:  16%|█▌        | 64/400 [00:13<00:22, 15.03it/s, reward=-0.604, num_turns=1.62, num_tools=0.625, failed=0.203, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  16%|█▋        | 65/400 [00:13<00:22, 15.03it/s, reward=-0.641, num_turns=1.62, num_tools=0.615, failed=0.215, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  16%|█▋        | 66/400 [00:13<00:22, 15.03it/s, reward=-0.677, num_turns=1.61, num_tools=0.606, failed=0.227, completion_tokens=35.7]validation:  17%|█▋        | 67/400 [00:13<00:22, 15.03it/s, reward=-0.711, num_turns=1.6, num_tools=0.597, failed=0.239, completion_tokens=35.7] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  17%|█▋        | 68/400 [00:13<00:22, 15.03it/s, reward=-0.745, num_turns=1.59, num_tools=0.588, failed=0.25, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  17%|█▋        | 69/400 [00:13<00:22, 15.03it/s, reward=-0.778, num_turns=1.58, num_tools=0.58, failed=0.261, completion_tokens=35.7]validation:  18%|█▊        | 70/400 [00:13<00:21, 15.03it/s, reward=-0.81, num_turns=1.57, num_tools=0.571, failed=0.271, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  18%|█▊        | 71/400 [00:13<00:21, 15.03it/s, reward=-0.84, num_turns=1.56, num_tools=0.563, failed=0.282, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  18%|█▊        | 72/400 [00:13<00:21, 15.03it/s, reward=-0.87, num_turns=1.56, num_tools=0.556, failed=0.292, completion_tokens=35.7]validation:  18%|█▊        | 73/400 [00:13<00:21, 15.03it/s, reward=-0.9, num_turns=1.55, num_tools=0.548, failed=0.301, completion_tokens=35.7] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  18%|█▊        | 74/400 [00:13<00:21, 15.03it/s, reward=-0.928, num_turns=1.54, num_tools=0.541, failed=0.311, completion_tokens=35.7]validation:  19%|█▉        | 75/400 [00:13<00:21, 15.03it/s, reward=-0.956, num_turns=1.53, num_tools=0.533, failed=0.32, completion_tokens=35.7] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  19%|█▉        | 76/400 [00:13<00:21, 15.03it/s, reward=-0.982, num_turns=1.53, num_tools=0.526, failed=0.329, completion_tokens=35.7]validation:  19%|█▉        | 77/400 [00:13<00:21, 15.03it/s, reward=-1.01, num_turns=1.52, num_tools=0.519, failed=0.338, completion_tokens=35.7] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  20%|█▉        | 78/400 [00:13<00:21, 15.03it/s, reward=-1.03, num_turns=1.51, num_tools=0.513, failed=0.346, completion_tokens=35.7]validation:  20%|█▉        | 79/400 [00:13<00:21, 15.03it/s, reward=-1.06, num_turns=1.51, num_tools=0.506, failed=0.354, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  20%|██        | 80/400 [00:13<00:21, 15.03it/s, reward=-1.08, num_turns=1.5, num_tools=0.5, failed=0.362, completion_tokens=35.7]   validation:  20%|██        | 81/400 [00:13<00:21, 15.03it/s, reward=-1.11, num_turns=1.49, num_tools=0.494, failed=0.37, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  20%|██        | 82/400 [00:13<00:21, 15.03it/s, reward=-1.13, num_turns=1.49, num_tools=0.488, failed=0.378, completion_tokens=35.7]validation:  21%|██        | 83/400 [00:13<00:21, 15.03it/s, reward=-1.15, num_turns=1.48, num_tools=0.482, failed=0.386, completion_tokens=35.7]validation:  21%|██        | 84/400 [00:13<00:21, 15.03it/s, reward=-1.17, num_turns=1.48, num_tools=0.476, failed=0.393, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  21%|██▏       | 85/400 [00:13<00:20, 15.03it/s, reward=-1.2, num_turns=1.47, num_tools=0.471, failed=0.4, completion_tokens=35.7]   validation:  22%|██▏       | 86/400 [00:13<00:20, 15.03it/s, reward=-1.22, num_turns=1.47, num_tools=0.465, failed=0.407, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  22%|██▏       | 87/400 [00:13<00:20, 15.03it/s, reward=-1.24, num_turns=1.46, num_tools=0.46, failed=0.414, completion_tokens=35.7] validation:  22%|██▏       | 88/400 [00:13<00:20, 15.03it/s, reward=-1.26, num_turns=1.45, num_tools=0.455, failed=0.42, completion_tokens=35.7]validation:  22%|██▏       | 89/400 [00:13<00:20, 15.03it/s, reward=-1.28, num_turns=1.45, num_tools=0.449, failed=0.427, completion_tokens=35.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  22%|██▎       | 90/400 [00:13<00:20, 15.03it/s, reward=-1.3, num_turns=1.44, num_tools=0.444, failed=0.433, completion_tokens=35.7] validation:  23%|██▎       | 91/400 [00:13<00:20, 15.03it/s, reward=-1.32, num_turns=1.44, num_tools=0.44, failed=0.44, completion_tokens=35.7] validation:  23%|██▎       | 92/400 [00:13<00:20, 15.03it/s, reward=-1.33, num_turns=1.43, num_tools=0.435, failed=0.446, completion_tokens=35.7]validation:  23%|██▎       | 93/400 [00:13<00:20, 15.03it/s, reward=-1.35, num_turns=1.43, num_tools=0.43, failed=0.452, completion_tokens=35.7] validation:  24%|██▎       | 94/400 [00:13<00:20, 15.03it/s, reward=-1.36, num_turns=1.44, num_tools=0.436, failed=0.447, completion_tokens=35.5]validation:  24%|██▍       | 95/400 [00:13<00:20, 15.03it/s, reward=-1.36, num_turns=1.44, num_tools=0.442, failed=0.442, completion_tokens=35.5]validation:  24%|██▍       | 96/400 [00:13<00:20, 15.03it/s, reward=-1.34, num_turns=1.45, num_tools=0.448, failed=0.438, completion_tokens=35.2]validation:  24%|██▍       | 97/400 [00:13<00:20, 15.03it/s, reward=-1.31, num_turns=1.45, num_tools=0.454, failed=0.433, completion_tokens=35]  validation:  24%|██▍       | 98/400 [00:13<00:20, 15.03it/s, reward=-1.28, num_turns=1.46, num_tools=0.459, failed=0.429, completion_tokens=34.8]validation:  25%|██▍       | 99/400 [00:13<00:20, 15.03it/s, reward=-1.25, num_turns=1.46, num_tools=0.465, failed=0.424, completion_tokens=34.5]validation:  25%|██▌       | 100/400 [00:13<00:19, 15.03it/s, reward=-1.22, num_turns=1.47, num_tools=0.47, failed=0.42, completion_tokens=34.5] validation:  25%|██▌       | 101/400 [00:13<00:19, 15.03it/s, reward=-1.23, num_turns=1.48, num_tools=0.475, failed=0.416, completion_tokens=34.4]validation:  26%|██▌       | 102/400 [00:13<00:06, 47.42it/s, reward=-1.23, num_turns=1.48, num_tools=0.475, failed=0.416, completion_tokens=34.4]validation:  26%|██▌       | 102/400 [00:13<00:06, 47.42it/s, reward=-1.21, num_turns=1.48, num_tools=0.48, failed=0.412, completion_tokens=34.5] validation:  26%|██▌       | 103/400 [00:13<00:06, 47.42it/s, reward=-1.22, num_turns=1.49, num_tools=0.485, failed=0.408, completion_tokens=34.4]validation:  26%|██▌       | 104/400 [00:13<00:06, 47.42it/s, reward=-1.18, num_turns=1.49, num_tools=0.49, failed=0.404, completion_tokens=34.2] validation:  26%|██▋       | 105/400 [00:13<00:06, 47.42it/s, reward=-1.19, num_turns=1.5, num_tools=0.495, failed=0.4, completion_tokens=34.1]  validation:  26%|██▋       | 106/400 [00:13<00:06, 47.42it/s, reward=-1.2, num_turns=1.5, num_tools=0.5, failed=0.396, completion_tokens=34.2] validation:  27%|██▋       | 107/400 [00:13<00:06, 47.42it/s, reward=-1.17, num_turns=1.5, num_tools=0.505, failed=0.393, completion_tokens=34.1]validation:  27%|██▋       | 108/400 [00:13<00:06, 47.42it/s, reward=-1.18, num_turns=1.51, num_tools=0.509, failed=0.389, completion_tokens=33.9]validation:  27%|██▋       | 109/400 [00:13<00:06, 47.42it/s, reward=-1.16, num_turns=1.51, num_tools=0.514, failed=0.385, completion_tokens=34]  validation:  28%|██▊       | 110/400 [00:13<00:06, 47.42it/s, reward=-1.13, num_turns=1.52, num_tools=0.518, failed=0.382, completion_tokens=33.9]validation:  28%|██▊       | 111/400 [00:13<00:06, 47.42it/s, reward=-1.14, num_turns=1.52, num_tools=0.523, failed=0.378, completion_tokens=34.1]validation:  28%|██▊       | 112/400 [00:13<00:06, 47.42it/s, reward=-1.09, num_turns=1.53, num_tools=0.527, failed=0.375, completion_tokens=33.9]validation:  28%|██▊       | 113/400 [00:13<00:06, 47.42it/s, reward=-1.05, num_turns=1.53, num_tools=0.531, failed=0.372, completion_tokens=33.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  28%|██▊       | 114/400 [00:13<00:06, 47.42it/s, reward=-1.06, num_turns=1.53, num_tools=0.526, failed=0.377, completion_tokens=33.7]validation:  29%|██▉       | 115/400 [00:13<00:06, 47.42it/s, reward=-1.07, num_turns=1.53, num_tools=0.53, failed=0.374, completion_tokens=33.8] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  29%|██▉       | 116/400 [00:13<00:05, 47.42it/s, reward=-1.09, num_turns=1.53, num_tools=0.526, failed=0.379, completion_tokens=33.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  29%|██▉       | 117/400 [00:13<00:05, 47.42it/s, reward=-1.1, num_turns=1.52, num_tools=0.521, failed=0.385, completion_tokens=33.8] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  30%|██▉       | 118/400 [00:13<00:05, 47.42it/s, reward=-1.12, num_turns=1.52, num_tools=0.517, failed=0.39, completion_tokens=33.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  30%|██▉       | 119/400 [00:13<00:05, 47.42it/s, reward=-1.14, num_turns=1.51, num_tools=0.513, failed=0.395, completion_tokens=33.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  30%|███       | 120/400 [00:13<00:05, 47.42it/s, reward=-1.15, num_turns=1.51, num_tools=0.508, failed=0.4, completion_tokens=33.8]  [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  30%|███       | 121/400 [00:13<00:04, 57.95it/s, reward=-1.15, num_turns=1.51, num_tools=0.508, failed=0.4, completion_tokens=33.8]validation:  30%|███       | 121/400 [00:13<00:04, 57.95it/s, reward=-1.17, num_turns=1.5, num_tools=0.504, failed=0.405, completion_tokens=33.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  30%|███       | 122/400 [00:13<00:04, 57.95it/s, reward=-1.18, num_turns=1.5, num_tools=0.5, failed=0.41, completion_tokens=33.8]   [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  31%|███       | 123/400 [00:13<00:04, 57.95it/s, reward=-1.2, num_turns=1.5, num_tools=0.496, failed=0.415, completion_tokens=33.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  31%|███       | 124/400 [00:13<00:04, 57.95it/s, reward=-1.21, num_turns=1.49, num_tools=0.492, failed=0.419, completion_tokens=33.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  31%|███▏      | 125/400 [00:13<00:04, 57.95it/s, reward=-1.23, num_turns=1.49, num_tools=0.488, failed=0.424, completion_tokens=33.8]validation:  32%|███▏      | 126/400 [00:13<00:04, 57.95it/s, reward=-1.23, num_turns=1.49, num_tools=0.492, failed=0.421, completion_tokens=34]  validation:  32%|███▏      | 127/400 [00:13<00:04, 57.95it/s, reward=-1.21, num_turns=1.5, num_tools=0.496, failed=0.417, completion_tokens=34.2]validation:  32%|███▏      | 128/400 [00:13<00:04, 57.95it/s, reward=-1.22, num_turns=1.5, num_tools=0.5, failed=0.414, completion_tokens=34.4]  validation:  32%|███▏      | 129/400 [00:13<00:04, 57.95it/s, reward=-1.22, num_turns=1.5, num_tools=0.504, failed=0.411, completion_tokens=34.5]validation:  32%|███▎      | 130/400 [00:13<00:04, 57.95it/s, reward=-1.24, num_turns=1.5, num_tools=0.5, failed=0.408, completion_tokens=35.7]  validation:  33%|███▎      | 131/400 [00:13<00:04, 57.95it/s, reward=-1.24, num_turns=1.5, num_tools=0.504, failed=0.405, completion_tokens=36][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  33%|███▎      | 132/400 [00:13<00:04, 57.95it/s, reward=-1.26, num_turns=1.5, num_tools=0.5, failed=0.409, completion_tokens=36]  validation:  33%|███▎      | 133/400 [00:13<00:04, 57.95it/s, reward=-1.24, num_turns=1.5, num_tools=0.504, failed=0.406, completion_tokens=36.4]validation:  34%|███▎      | 134/400 [00:13<00:04, 57.95it/s, reward=-1.25, num_turns=1.51, num_tools=0.507, failed=0.403, completion_tokens=36.7]validation:  34%|███▍      | 135/400 [00:13<00:04, 57.95it/s, reward=-1.25, num_turns=1.51, num_tools=0.511, failed=0.4, completion_tokens=37.2]  validation:  34%|███▍      | 136/400 [00:13<00:04, 57.95it/s, reward=-1.26, num_turns=1.51, num_tools=0.515, failed=0.397, completion_tokens=36.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  34%|███▍      | 137/400 [00:13<00:04, 57.95it/s, reward=-1.27, num_turns=1.51, num_tools=0.511, failed=0.401, completion_tokens=36.9]validation:  34%|███▍      | 138/400 [00:13<00:04, 57.95it/s, reward=-1.28, num_turns=1.51, num_tools=0.507, failed=0.406, completion_tokens=36.9]validation:  35%|███▍      | 139/400 [00:13<00:04, 54.45it/s, reward=-1.28, num_turns=1.51, num_tools=0.507, failed=0.406, completion_tokens=36.9]validation:  35%|███▍      | 139/400 [00:13<00:04, 54.45it/s, reward=-1.3, num_turns=1.5, num_tools=0.504, failed=0.403, completion_tokens=36.7]  validation:  35%|███▌      | 140/400 [00:13<00:04, 54.45it/s, reward=-1.26, num_turns=1.51, num_tools=0.507, failed=0.4, completion_tokens=36.6]validation:  35%|███▌      | 141/400 [00:13<00:04, 54.45it/s, reward=-1.24, num_turns=1.51, num_tools=0.511, failed=0.397, completion_tokens=36.5]validation:  36%|███▌      | 142/400 [00:13<00:04, 54.45it/s, reward=-1.21, num_turns=1.51, num_tools=0.514, failed=0.394, completion_tokens=36.3]validation:  36%|███▌      | 143/400 [00:13<00:04, 54.45it/s, reward=-1.19, num_turns=1.52, num_tools=0.517, failed=0.392, completion_tokens=36.1]validation:  36%|███▌      | 144/400 [00:13<00:04, 54.45it/s, reward=-1.15, num_turns=1.52, num_tools=0.521, failed=0.389, completion_tokens=36]  validation:  36%|███▋      | 145/400 [00:13<00:04, 54.45it/s, reward=-1.12, num_turns=1.52, num_tools=0.524, failed=0.386, completion_tokens=35.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  36%|███▋      | 146/400 [00:13<00:04, 54.45it/s, reward=-1.13, num_turns=1.52, num_tools=0.521, failed=0.39, completion_tokens=35.8] validation:  37%|███▋      | 147/400 [00:13<00:04, 54.45it/s, reward=-1.12, num_turns=1.52, num_tools=0.524, failed=0.388, completion_tokens=36.3]validation:  37%|███▋      | 148/400 [00:13<00:04, 54.45it/s, reward=-1.1, num_turns=1.53, num_tools=0.527, failed=0.385, completion_tokens=36.1] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  37%|███▋      | 149/400 [00:14<00:04, 54.45it/s, reward=-1.11, num_turns=1.53, num_tools=0.53, failed=0.389, completion_tokens=36.1]validation:  38%|███▊      | 150/400 [00:14<00:04, 54.45it/s, reward=-1.12, num_turns=1.53, num_tools=0.533, failed=0.393, completion_tokens=36.1]validation:  38%|███▊      | 151/400 [00:14<00:04, 54.45it/s, reward=-1.13, num_turns=1.54, num_tools=0.536, failed=0.397, completion_tokens=36.1][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  38%|███▊      | 152/400 [00:14<00:04, 54.45it/s, reward=-1.15, num_turns=1.54, num_tools=0.539, failed=0.401, completion_tokens=36.2]validation:  38%|███▊      | 153/400 [00:14<00:04, 54.45it/s, reward=-1.16, num_turns=1.54, num_tools=0.542, failed=0.405, completion_tokens=36]  validation:  38%|███▊      | 154/400 [00:14<00:04, 54.45it/s, reward=-1.17, num_turns=1.55, num_tools=0.545, failed=0.409, completion_tokens=35.9]validation:  39%|███▉      | 155/400 [00:14<00:04, 54.45it/s, reward=-1.18, num_turns=1.55, num_tools=0.548, failed=0.413, completion_tokens=35.7]validation:  39%|███▉      | 156/400 [00:14<00:04, 54.45it/s, reward=-1.16, num_turns=1.55, num_tools=0.551, failed=0.41, completion_tokens=35.6] validation:  39%|███▉      | 157/400 [00:14<00:04, 54.45it/s, reward=-1.14, num_turns=1.55, num_tools=0.554, failed=0.408, completion_tokens=35.4]validation:  40%|███▉      | 158/400 [00:14<00:04, 54.45it/s, reward=-1.12, num_turns=1.56, num_tools=0.557, failed=0.405, completion_tokens=35.3][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  40%|███▉      | 159/400 [00:14<00:04, 54.45it/s, reward=-1.13, num_turns=1.55, num_tools=0.553, failed=0.409, completion_tokens=35.3]validation:  40%|████      | 160/400 [00:14<00:04, 54.45it/s, reward=-1.14, num_turns=1.56, num_tools=0.556, failed=0.406, completion_tokens=35.2]validation:  40%|████      | 161/400 [00:14<00:04, 54.45it/s, reward=-1.12, num_turns=1.56, num_tools=0.559, failed=0.404, completion_tokens=35]  [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  40%|████      | 162/400 [00:14<00:03, 72.12it/s, reward=-1.12, num_turns=1.56, num_tools=0.559, failed=0.404, completion_tokens=35]validation:  40%|████      | 162/400 [00:14<00:03, 72.12it/s, reward=-1.13, num_turns=1.56, num_tools=0.562, failed=0.407, completion_tokens=34.9]validation:  41%|████      | 163/400 [00:14<00:03, 72.12it/s, reward=-1.14, num_turns=1.56, num_tools=0.564, failed=0.411, completion_tokens=34.9]validation:  41%|████      | 164/400 [00:14<00:03, 72.12it/s, reward=-1.15, num_turns=1.57, num_tools=0.567, failed=0.409, completion_tokens=34.8]validation:  41%|████▏     | 165/400 [00:14<00:03, 72.12it/s, reward=-1.15, num_turns=1.57, num_tools=0.57, failed=0.406, completion_tokens=34.7] validation:  42%|████▏     | 166/400 [00:14<00:03, 72.12it/s, reward=-1.16, num_turns=1.57, num_tools=0.572, failed=0.404, completion_tokens=35] validation:  42%|████▏     | 167/400 [00:14<00:03, 72.12it/s, reward=-1.14, num_turns=1.57, num_tools=0.575, failed=0.401, completion_tokens=35]validation:  42%|████▏     | 168/400 [00:14<00:03, 72.12it/s, reward=-1.11, num_turns=1.58, num_tools=0.577, failed=0.399, completion_tokens=34.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  42%|████▏     | 169/400 [00:14<00:03, 72.12it/s, reward=-1.13, num_turns=1.57, num_tools=0.574, failed=0.402, completion_tokens=34.8]validation:  42%|████▎     | 170/400 [00:14<00:03, 72.12it/s, reward=-1.13, num_turns=1.58, num_tools=0.576, failed=0.4, completion_tokens=34.8]  validation:  43%|████▎     | 171/400 [00:14<00:03, 72.12it/s, reward=-1.14, num_turns=1.58, num_tools=0.579, failed=0.398, completion_tokens=34.8][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  43%|████▎     | 172/400 [00:14<00:03, 72.12it/s, reward=-1.15, num_turns=1.58, num_tools=0.581, failed=0.401, completion_tokens=34.7]validation:  43%|████▎     | 173/400 [00:14<00:03, 72.12it/s, reward=-1.12, num_turns=1.58, num_tools=0.584, failed=0.399, completion_tokens=34.6]validation:  44%|████▎     | 174/400 [00:14<00:03, 72.12it/s, reward=-1.09, num_turns=1.59, num_tools=0.586, failed=0.397, completion_tokens=34.5]validation:  44%|████▍     | 175/400 [00:14<00:03, 72.12it/s, reward=-1.06, num_turns=1.59, num_tools=0.589, failed=0.394, completion_tokens=34.4]validation:  44%|████▍     | 176/400 [00:14<00:03, 72.12it/s, reward=-1.06, num_turns=1.59, num_tools=0.591, failed=0.392, completion_tokens=34.4]validation:  44%|████▍     | 177/400 [00:14<00:03, 72.12it/s, reward=-1.03, num_turns=1.59, num_tools=0.593, failed=0.39, completion_tokens=34.3] validation:  44%|████▍     | 178/400 [00:14<00:03, 72.12it/s, reward=-1.01, num_turns=1.6, num_tools=0.596, failed=0.388, completion_tokens=34.3]validation:  45%|████▍     | 179/400 [00:14<00:02, 76.04it/s, reward=-1.01, num_turns=1.6, num_tools=0.596, failed=0.388, completion_tokens=34.3]validation:  45%|████▍     | 179/400 [00:14<00:02, 76.04it/s, reward=-0.979, num_turns=1.6, num_tools=0.598, failed=0.385, completion_tokens=34.2]validation:  45%|████▌     | 180/400 [00:14<00:02, 76.04it/s, reward=-0.951, num_turns=1.6, num_tools=0.6, failed=0.383, completion_tokens=34.2]  validation:  45%|████▌     | 181/400 [00:14<00:02, 76.04it/s, reward=-0.924, num_turns=1.6, num_tools=0.602, failed=0.381, completion_tokens=34.1]validation:  46%|████▌     | 182/400 [00:14<00:02, 76.04it/s, reward=-0.897, num_turns=1.6, num_tools=0.604, failed=0.379, completion_tokens=34.1]validation:  46%|████▌     | 183/400 [00:14<00:02, 76.04it/s, reward=-0.87, num_turns=1.61, num_tools=0.607, failed=0.377, completion_tokens=34]  validation:  46%|████▌     | 184/400 [00:14<00:02, 76.04it/s, reward=-0.862, num_turns=1.61, num_tools=0.609, failed=0.375, completion_tokens=34]validation:  46%|████▋     | 185/400 [00:14<00:02, 76.04it/s, reward=-0.855, num_turns=1.61, num_tools=0.611, failed=0.373, completion_tokens=33.9]validation:  46%|████▋     | 186/400 [00:14<00:02, 76.04it/s, reward=-0.861, num_turns=1.61, num_tools=0.613, failed=0.371, completion_tokens=33.9]validation:  47%|████▋     | 187/400 [00:14<00:02, 76.04it/s, reward=-0.867, num_turns=1.61, num_tools=0.615, failed=0.369, completion_tokens=33.8]validation:  47%|████▋     | 188/400 [00:14<00:02, 76.04it/s, reward=-0.873, num_turns=1.62, num_tools=0.617, failed=0.367, completion_tokens=33.7]validation:  47%|████▋     | 189/400 [00:14<00:02, 76.04it/s, reward=-0.853, num_turns=1.62, num_tools=0.619, failed=0.365, completion_tokens=33.7]validation:  48%|████▊     | 190/400 [00:14<00:02, 76.04it/s, reward=-0.859, num_turns=1.62, num_tools=0.621, failed=0.363, completion_tokens=33.7]validation:  48%|████▊     | 191/400 [00:14<00:02, 76.04it/s, reward=-0.865, num_turns=1.62, num_tools=0.623, failed=0.361, completion_tokens=33.6]validation:  48%|████▊     | 192/400 [00:14<00:02, 76.04it/s, reward=-0.85, num_turns=1.62, num_tools=0.625, failed=0.359, completion_tokens=33.6] validation:  48%|████▊     | 193/400 [00:14<00:02, 76.04it/s, reward=-0.835, num_turns=1.63, num_tools=0.627, failed=0.358, completion_tokens=33.5]validation:  48%|████▊     | 194/400 [00:14<00:02, 76.04it/s, reward=-0.828, num_turns=1.63, num_tools=0.629, failed=0.356, completion_tokens=33.5]validation:  49%|████▉     | 195/400 [00:14<00:02, 76.04it/s, reward=-0.834, num_turns=1.63, num_tools=0.631, failed=0.354, completion_tokens=33.4]validation:  49%|████▉     | 196/400 [00:14<00:02, 76.04it/s, reward=-0.84, num_turns=1.63, num_tools=0.633, failed=0.352, completion_tokens=33.5] validation:  49%|████▉     | 197/400 [00:14<00:02, 76.04it/s, reward=-0.846, num_turns=1.63, num_tools=0.635, failed=0.35, completion_tokens=33.5]validation:  50%|████▉     | 198/400 [00:14<00:02, 76.04it/s, reward=-0.832, num_turns=1.64, num_tools=0.636, failed=0.348, completion_tokens=33.5]validation:  50%|████▉     | 199/400 [00:14<00:02, 92.39it/s, reward=-0.832, num_turns=1.64, num_tools=0.636, failed=0.348, completion_tokens=33.5]validation:  50%|████▉     | 199/400 [00:14<00:02, 92.39it/s, reward=-0.838, num_turns=1.64, num_tools=0.638, failed=0.347, completion_tokens=33.4]validation:  50%|█████     | 200/400 [00:14<00:02, 92.39it/s, reward=-0.821, num_turns=1.64, num_tools=0.64, failed=0.345, completion_tokens=33.5] validation:  50%|█████     | 201/400 [00:14<00:02, 92.39it/s, reward=-0.827, num_turns=1.64, num_tools=0.642, failed=0.343, completion_tokens=33.4]validation:  50%|█████     | 202/400 [00:14<00:02, 92.39it/s, reward=-0.833, num_turns=1.64, num_tools=0.644, failed=0.342, completion_tokens=33.4]validation:  51%|█████     | 203/400 [00:14<00:02, 92.39it/s, reward=-0.838, num_turns=1.65, num_tools=0.645, failed=0.34, completion_tokens=33.8] validation:  51%|█████     | 204/400 [00:14<00:02, 92.39it/s, reward=-0.844, num_turns=1.65, num_tools=0.647, failed=0.338, completion_tokens=33.8]validation:  51%|█████▏    | 205/400 [00:14<00:02, 92.39it/s, reward=-0.85, num_turns=1.65, num_tools=0.654, failed=0.337, completion_tokens=33.9] validation:  52%|█████▏    | 206/400 [00:14<00:02, 92.39it/s, reward=-0.855, num_turns=1.66, num_tools=0.655, failed=0.335, completion_tokens=33.9]validation:  52%|█████▏    | 207/400 [00:14<00:02, 92.39it/s, reward=-0.841, num_turns=1.66, num_tools=0.657, failed=0.333, completion_tokens=33.8]validation:  52%|█████▏    | 208/400 [00:14<00:02, 92.39it/s, reward=-0.828, num_turns=1.66, num_tools=0.659, failed=0.332, completion_tokens=33.8]validation:  52%|█████▏    | 209/400 [00:14<00:02, 92.39it/s, reward=-0.833, num_turns=1.66, num_tools=0.66, failed=0.33, completion_tokens=33.8]  validation:  52%|█████▎    | 210/400 [00:14<00:02, 92.39it/s, reward=-0.839, num_turns=1.66, num_tools=0.662, failed=0.329, completion_tokens=33.8]validation:  53%|█████▎    | 211/400 [00:14<00:02, 92.39it/s, reward=-0.823, num_turns=1.66, num_tools=0.664, failed=0.327, completion_tokens=33.9]validation:  53%|█████▎    | 212/400 [00:14<00:02, 92.39it/s, reward=-0.829, num_turns=1.67, num_tools=0.665, failed=0.325, completion_tokens=33.9]validation:  53%|█████▎    | 213/400 [00:14<00:02, 92.39it/s, reward=-0.834, num_turns=1.67, num_tools=0.667, failed=0.324, completion_tokens=33.9]validation:  54%|█████▎    | 214/400 [00:14<00:02, 92.39it/s, reward=-0.819, num_turns=1.67, num_tools=0.668, failed=0.322, completion_tokens=33.8]validation:  54%|█████▍    | 215/400 [00:14<00:02, 86.87it/s, reward=-0.819, num_turns=1.67, num_tools=0.668, failed=0.322, completion_tokens=33.8]validation:  54%|█████▍    | 215/400 [00:14<00:02, 86.87it/s, reward=-0.808, num_turns=1.67, num_tools=0.67, failed=0.321, completion_tokens=33.9] validation:  54%|█████▍    | 216/400 [00:14<00:02, 86.87it/s, reward=-0.795, num_turns=1.67, num_tools=0.671, failed=0.319, completion_tokens=33.9]validation:  54%|█████▍    | 217/400 [00:14<00:02, 86.87it/s, reward=-0.8, num_turns=1.67, num_tools=0.673, failed=0.318, completion_tokens=33.9]  validation:  55%|█████▍    | 218/400 [00:14<00:02, 86.87it/s, reward=-0.806, num_turns=1.67, num_tools=0.674, failed=0.317, completion_tokens=33.8]validation:  55%|█████▍    | 219/400 [00:14<00:02, 86.87it/s, reward=-0.811, num_turns=1.68, num_tools=0.676, failed=0.315, completion_tokens=34]  validation:  55%|█████▌    | 220/400 [00:14<00:02, 86.87it/s, reward=-0.817, num_turns=1.68, num_tools=0.677, failed=0.314, completion_tokens=34]validation:  55%|█████▌    | 221/400 [00:14<00:02, 86.87it/s, reward=-0.822, num_turns=1.68, num_tools=0.679, failed=0.312, completion_tokens=33.9]validation:  56%|█████▌    | 222/400 [00:14<00:02, 86.87it/s, reward=-0.827, num_turns=1.68, num_tools=0.685, failed=0.311, completion_tokens=34]  validation:  56%|█████▌    | 223/400 [00:14<00:02, 86.87it/s, reward=-0.833, num_turns=1.69, num_tools=0.691, failed=0.309, completion_tokens=34.2]validation:  56%|█████▌    | 224/400 [00:14<00:02, 86.87it/s, reward=-0.818, num_turns=1.69, num_tools=0.692, failed=0.308, completion_tokens=34.5]validation:  56%|█████▋    | 225/400 [00:14<00:02, 86.87it/s, reward=-0.827, num_turns=1.69, num_tools=0.689, failed=0.307, completion_tokens=35]  validation:  56%|█████▋    | 226/400 [00:14<00:02, 86.87it/s, reward=-0.813, num_turns=1.69, num_tools=0.69, failed=0.305, completion_tokens=34.9]validation:  57%|█████▋    | 227/400 [00:14<00:01, 86.87it/s, reward=-0.818, num_turns=1.69, num_tools=0.692, failed=0.304, completion_tokens=34.9]validation:  57%|█████▋    | 228/400 [00:14<00:01, 86.87it/s, reward=-0.827, num_turns=1.69, num_tools=0.689, failed=0.303, completion_tokens=37.7][2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.473315 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.395957 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.458265 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.430569 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.428553 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.450014 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.401053 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.434205 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.475570 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.407062 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.430383 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.468651 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.404790 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.422452 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.411601 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.474694 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.486884 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.488547 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.428166 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.414558 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.447537 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.453323 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.436907 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.446272 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.496000 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.453066 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.488989 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.478658 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.451624 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.432928 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.420997 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.474647 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.447945 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.449846 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.468465 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.448000 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.409914 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.427265 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.383793 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.403102 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.429834 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.378778 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.444976 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.380470 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.484490 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.379489 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.488917 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.443552 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.484126 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.377713 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.455466 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.442418 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.411960 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.478103 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.397940 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.386468 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.433327 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.437740 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.415018 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.457598 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.395625 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.382885 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.414943 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.408685 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.467779 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.414284 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.449459 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.437600 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.484733 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.490503 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.491294 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.492965 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.478945 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.448204 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.479687 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.458371 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.452620 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.416544 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.466102 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.443001 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.438199 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.386365 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.420463 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.398959 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.415648 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.406742 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.422783 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.468912 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.457281 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.447545 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.404803 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.380038 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.377342 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.471050 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.416476 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.473025 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.460678 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.408835 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.444229 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.387272 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.482396 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.418123 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.453410 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.462687 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.413345 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.438264 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.383264 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.388137 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.494660 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.470784 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.495510 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.438529 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.399639 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.396314 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.392011 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.414534 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.497475 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.393452 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.426093 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.442541 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.382879 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.406868 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.442518 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.409651 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.452520 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.440985 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.396273 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.440177 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.483216 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.488335 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.379325 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.404576 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.411981 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.418952 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.475371 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.477682 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.379507 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.487781 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.404374 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.385444 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.377447 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.456144 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.411518 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.472613 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.448824 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.418550 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.427137 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.466364 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.481079 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.424927 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.385840 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.434185 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.423498 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.470993 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.421335 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.445158 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.401004 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.402147 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.443400 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.427557 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.438579 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.475885 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.469949 seconds
+[2026-04-13 02:44:50] INFO _base_client.py:1693: Retrying request to /chat/completions in 0.483275 seconds
+validation:  57%|█████▋    | 229/400 [00:15<00:03, 54.32it/s, reward=-0.827, num_turns=1.69, num_tools=0.689, failed=0.303, completion_tokens=37.7]validation:  57%|█████▋    | 229/400 [00:15<00:03, 54.32it/s, reward=-0.813, num_turns=1.69, num_tools=0.69, failed=0.301, completion_tokens=38]   validation:  57%|█████▊    | 230/400 [00:15<00:03, 54.32it/s, reward=-0.822, num_turns=1.69, num_tools=0.687, failed=0.3, completion_tokens=40.4]validation:  58%|█████▊    | 231/400 [00:15<00:03, 54.32it/s, reward=-0.828, num_turns=1.69, num_tools=0.693, failed=0.299, completion_tokens=40.4]validation:  58%|█████▊    | 232/400 [00:15<00:03, 54.32it/s, reward=-0.813, num_turns=1.69, num_tools=0.694, failed=0.297, completion_tokens=40.7]validation:  58%|█████▊    | 233/400 [00:15<00:03, 54.32it/s, reward=-0.818, num_turns=1.7, num_tools=0.695, failed=0.296, completion_tokens=41.2] validation:  58%|█████▊    | 234/400 [00:15<00:03, 54.32it/s, reward=-0.828, num_turns=1.69, num_tools=0.692, failed=0.295, completion_tokens=43.7]validation:  59%|█████▉    | 235/400 [00:17<00:03, 54.32it/s, reward=-0.837, num_turns=1.69, num_tools=0.689, failed=0.294, completion_tokens=43.6]validation:  59%|█████▉    | 236/400 [00:17<00:03, 54.32it/s, reward=-0.846, num_turns=1.69, num_tools=0.686, failed=0.292, completion_tokens=43.6]validation:  59%|█████▉    | 237/400 [00:17<00:03, 54.32it/s, reward=-0.851, num_turns=1.69, num_tools=0.688, failed=0.291, completion_tokens=43.6]validation:  60%|█████▉    | 238/400 [00:17<00:02, 54.32it/s, reward=-0.86, num_turns=1.68, num_tools=0.685, failed=0.29, completion_tokens=43.7]  validation:  60%|█████▉    | 239/400 [00:17<00:10, 15.66it/s, reward=-0.86, num_turns=1.68, num_tools=0.685, failed=0.29, completion_tokens=43.7]validation:  60%|█████▉    | 239/400 [00:17<00:10, 15.66it/s, reward=-0.869, num_turns=1.68, num_tools=0.682, failed=0.289, completion_tokens=44.4]validation:  60%|██████    | 240/400 [00:17<00:10, 15.66it/s, reward=-0.878, num_turns=1.68, num_tools=0.679, failed=0.287, completion_tokens=44.4]validation:  60%|██████    | 241/400 [00:17<00:10, 15.66it/s, reward=-0.887, num_turns=1.68, num_tools=0.676, failed=0.286, completion_tokens=46.9]validation:  60%|██████    | 242/400 [00:17<00:10, 15.66it/s, reward=-0.895, num_turns=1.67, num_tools=0.674, failed=0.285, completion_tokens=47.1]validation:  61%|██████    | 243/400 [00:17<00:10, 15.66it/s, reward=-0.904, num_turns=1.67, num_tools=0.671, failed=0.284, completion_tokens=47.1]validation:  61%|██████    | 244/400 [00:17<00:09, 15.66it/s, reward=-0.892, num_turns=1.67, num_tools=0.672, failed=0.283, completion_tokens=46.9]validation:  61%|██████▏   | 245/400 [00:17<00:09, 15.66it/s, reward=-0.897, num_turns=1.67, num_tools=0.673, failed=0.282, completion_tokens=46.8]validation:  62%|██████▏   | 246/400 [00:17<00:09, 16.76it/s, reward=-0.897, num_turns=1.67, num_tools=0.673, failed=0.282, completion_tokens=46.8]validation:  62%|██████▏   | 246/400 [00:17<00:09, 16.76it/s, reward=-0.877, num_turns=1.67, num_tools=0.675, failed=0.28, completion_tokens=46.7] validation:  62%|██████▏   | 247/400 [00:17<00:09, 16.76it/s, reward=-0.881, num_turns=1.68, num_tools=0.676, failed=0.279, completion_tokens=46.5]validation:  62%|██████▏   | 248/400 [00:17<00:09, 16.76it/s, reward=-0.89, num_turns=1.67, num_tools=0.673, failed=0.278, completion_tokens=48]   [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  62%|██████▏   | 249/400 [00:17<00:09, 16.76it/s, reward=-0.898, num_turns=1.67, num_tools=0.671, failed=0.281, completion_tokens=48]validation:  62%|██████▎   | 250/400 [00:18<00:08, 16.76it/s, reward=-0.903, num_turns=1.67, num_tools=0.672, failed=0.28, completion_tokens=47.9]validation:  63%|██████▎   | 251/400 [00:18<00:08, 16.76it/s, reward=-0.907, num_turns=1.67, num_tools=0.673, failed=0.279, completion_tokens=47.7]validation:  63%|██████▎   | 252/400 [00:18<00:08, 18.18it/s, reward=-0.907, num_turns=1.67, num_tools=0.673, failed=0.279, completion_tokens=47.7]validation:  63%|██████▎   | 252/400 [00:18<00:08, 18.18it/s, reward=-0.911, num_turns=1.67, num_tools=0.675, failed=0.278, completion_tokens=47.6]validation:  63%|██████▎   | 253/400 [00:18<00:08, 18.18it/s, reward=-0.916, num_turns=1.68, num_tools=0.676, failed=0.277, completion_tokens=47.4]validation:  64%|██████▎   | 254/400 [00:18<00:08, 18.18it/s, reward=-0.92, num_turns=1.68, num_tools=0.677, failed=0.276, completion_tokens=47.3] validation:  64%|██████▍   | 255/400 [00:18<00:07, 18.18it/s, reward=-0.924, num_turns=1.68, num_tools=0.678, failed=0.275, completion_tokens=47.1]validation:  64%|██████▍   | 256/400 [00:18<00:07, 18.18it/s, reward=-0.928, num_turns=1.68, num_tools=0.68, failed=0.273, completion_tokens=47]   validation:  64%|██████▍   | 257/400 [00:18<00:08, 17.78it/s, reward=-0.928, num_turns=1.68, num_tools=0.68, failed=0.273, completion_tokens=47]validation:  64%|██████▍   | 257/400 [00:18<00:08, 17.78it/s, reward=-0.933, num_turns=1.68, num_tools=0.681, failed=0.272, completion_tokens=46.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  64%|██████▍   | 258/400 [00:18<00:07, 17.78it/s, reward=-0.941, num_turns=1.68, num_tools=0.678, failed=0.275, completion_tokens=46.9]validation:  65%|██████▍   | 259/400 [00:18<00:07, 17.78it/s, reward=-0.949, num_turns=1.68, num_tools=0.676, failed=0.278, completion_tokens=46.9]validation:  65%|██████▌   | 260/400 [00:18<00:07, 17.78it/s, reward=-0.956, num_turns=1.67, num_tools=0.673, failed=0.281, completion_tokens=46.9]validation:  65%|██████▌   | 261/400 [00:18<00:07, 17.78it/s, reward=-0.937, num_turns=1.67, num_tools=0.674, failed=0.28, completion_tokens=46.9] validation:  66%|██████▌   | 262/400 [00:18<00:07, 17.78it/s, reward=-0.919, num_turns=1.68, num_tools=0.676, failed=0.279, completion_tokens=46.8]validation:  66%|██████▌   | 263/400 [00:18<00:07, 17.78it/s, reward=-0.923, num_turns=1.68, num_tools=0.677, failed=0.278, completion_tokens=46.7]validation:  66%|██████▌   | 264/400 [00:18<00:07, 17.78it/s, reward=-0.917, num_turns=1.68, num_tools=0.678, failed=0.277, completion_tokens=46.6]validation:  66%|██████▋   | 265/400 [00:18<00:07, 17.78it/s, reward=-0.908, num_turns=1.68, num_tools=0.679, failed=0.275, completion_tokens=46.6]validation:  66%|██████▋   | 266/400 [00:18<00:07, 17.78it/s, reward=-0.897, num_turns=1.68, num_tools=0.68, failed=0.274, completion_tokens=46.5] validation:  67%|██████▋   | 267/400 [00:18<00:07, 17.78it/s, reward=-0.879, num_turns=1.68, num_tools=0.682, failed=0.273, completion_tokens=46.3]validation:  67%|██████▋   | 268/400 [00:18<00:07, 17.78it/s, reward=-0.861, num_turns=1.68, num_tools=0.683, failed=0.272, completion_tokens=46.2]validation:  67%|██████▋   | 269/400 [00:18<00:07, 17.78it/s, reward=-0.854, num_turns=1.68, num_tools=0.684, failed=0.271, completion_tokens=46.1]validation:  68%|██████▊   | 270/400 [00:18<00:07, 17.78it/s, reward=-0.858, num_turns=1.69, num_tools=0.685, failed=0.27, completion_tokens=46]   validation:  68%|██████▊   | 271/400 [00:18<00:07, 17.78it/s, reward=-0.862, num_turns=1.69, num_tools=0.686, failed=0.269, completion_tokens=45.9]validation:  68%|██████▊   | 272/400 [00:18<00:07, 17.78it/s, reward=-0.85, num_turns=1.69, num_tools=0.688, failed=0.268, completion_tokens=45.8] validation:  68%|██████▊   | 273/400 [00:18<00:07, 17.78it/s, reward=-0.854, num_turns=1.69, num_tools=0.689, failed=0.267, completion_tokens=45.7]validation:  68%|██████▊   | 274/400 [00:18<00:07, 17.78it/s, reward=-0.858, num_turns=1.69, num_tools=0.69, failed=0.266, completion_tokens=45.6] validation:  69%|██████▉   | 275/400 [00:18<00:07, 17.78it/s, reward=-0.841, num_turns=1.69, num_tools=0.691, failed=0.265, completion_tokens=45.5]validation:  69%|██████▉   | 276/400 [00:18<00:06, 17.78it/s, reward=-0.845, num_turns=1.69, num_tools=0.692, failed=0.264, completion_tokens=45.4]validation:  69%|██████▉   | 277/400 [00:18<00:06, 17.78it/s, reward=-0.827, num_turns=1.69, num_tools=0.693, failed=0.264, completion_tokens=45.3]validation:  70%|██████▉   | 278/400 [00:18<00:06, 17.78it/s, reward=-0.81, num_turns=1.69, num_tools=0.694, failed=0.263, completion_tokens=45.2] validation:  70%|██████▉   | 279/400 [00:18<00:06, 17.78it/s, reward=-0.793, num_turns=1.7, num_tools=0.695, failed=0.262, completion_tokens=45.2]validation:  70%|███████   | 280/400 [00:18<00:06, 17.78it/s, reward=-0.776, num_turns=1.7, num_tools=0.696, failed=0.261, completion_tokens=45.1]validation:  70%|███████   | 281/400 [00:18<00:06, 17.78it/s, reward=-0.759, num_turns=1.7, num_tools=0.698, failed=0.26, completion_tokens=45]   validation:  70%|███████   | 282/400 [00:18<00:06, 17.78it/s, reward=-0.742, num_turns=1.7, num_tools=0.699, failed=0.259, completion_tokens=44.9]validation:  71%|███████   | 283/400 [00:18<00:06, 17.78it/s, reward=-0.725, num_turns=1.7, num_tools=0.7, failed=0.258, completion_tokens=44.9]  validation:  71%|███████   | 284/400 [00:18<00:06, 17.78it/s, reward=-0.708, num_turns=1.7, num_tools=0.701, failed=0.257, completion_tokens=44.8]validation:  71%|███████▏  | 285/400 [00:18<00:06, 17.78it/s, reward=-0.692, num_turns=1.7, num_tools=0.702, failed=0.256, completion_tokens=44.7]validation:  72%|███████▏  | 286/400 [00:18<00:06, 17.78it/s, reward=-0.675, num_turns=1.7, num_tools=0.703, failed=0.255, completion_tokens=44.6]validation:  72%|███████▏  | 287/400 [00:18<00:06, 17.78it/s, reward=-0.659, num_turns=1.7, num_tools=0.704, failed=0.254, completion_tokens=44.5]validation:  72%|███████▏  | 288/400 [00:18<00:06, 17.78it/s, reward=-0.653, num_turns=1.7, num_tools=0.705, failed=0.253, completion_tokens=44.5]validation:  72%|███████▏  | 289/400 [00:18<00:06, 17.78it/s, reward=-0.648, num_turns=1.71, num_tools=0.706, failed=0.253, completion_tokens=44.4][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  72%|███████▎  | 290/400 [00:18<00:06, 17.78it/s, reward=-0.656, num_turns=1.7, num_tools=0.703, failed=0.255, completion_tokens=44.4] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  73%|███████▎  | 291/400 [00:18<00:06, 17.78it/s, reward=-0.664, num_turns=1.7, num_tools=0.701, failed=0.258, completion_tokens=44.4][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  73%|███████▎  | 292/400 [00:18<00:06, 17.78it/s, reward=-0.672, num_turns=1.7, num_tools=0.699, failed=0.26, completion_tokens=44.4] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  73%|███████▎  | 293/400 [00:18<00:02, 44.50it/s, reward=-0.672, num_turns=1.7, num_tools=0.699, failed=0.26, completion_tokens=44.4]validation:  73%|███████▎  | 293/400 [00:18<00:02, 44.50it/s, reward=-0.68, num_turns=1.7, num_tools=0.696, failed=0.263, completion_tokens=44.4]validation:  74%|███████▎  | 294/400 [00:18<00:02, 44.50it/s, reward=-0.684, num_turns=1.7, num_tools=0.697, failed=0.262, completion_tokens=44.3]validation:  74%|███████▍  | 295/400 [00:18<00:02, 44.50it/s, reward=-0.689, num_turns=1.7, num_tools=0.698, failed=0.261, completion_tokens=44.3]validation:  74%|███████▍  | 296/400 [00:18<00:02, 44.50it/s, reward=-0.693, num_turns=1.7, num_tools=0.699, failed=0.26, completion_tokens=44.2] validation:  74%|███████▍  | 297/400 [00:18<00:02, 44.50it/s, reward=-0.687, num_turns=1.7, num_tools=0.7, failed=0.259, completion_tokens=44.2] validation:  74%|███████▍  | 298/400 [00:18<00:02, 44.50it/s, reward=-0.692, num_turns=1.7, num_tools=0.701, failed=0.258, completion_tokens=44.2]validation:  75%|███████▍  | 299/400 [00:18<00:02, 44.50it/s, reward=-0.696, num_turns=1.7, num_tools=0.702, failed=0.258, completion_tokens=44.1][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  75%|███████▌  | 300/400 [00:18<00:02, 44.50it/s, reward=-0.704, num_turns=1.7, num_tools=0.703, failed=0.26, completion_tokens=44.1] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  75%|███████▌  | 301/400 [00:18<00:02, 44.50it/s, reward=-0.712, num_turns=1.7, num_tools=0.704, failed=0.262, completion_tokens=44.1][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  76%|███████▌  | 302/400 [00:18<00:02, 44.50it/s, reward=-0.719, num_turns=1.71, num_tools=0.705, failed=0.265, completion_tokens=44] validation:  76%|███████▌  | 303/400 [00:18<00:02, 44.50it/s, reward=-0.727, num_turns=1.71, num_tools=0.706, failed=0.267, completion_tokens=44][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  76%|███████▌  | 304/400 [00:18<00:02, 44.50it/s, reward=-0.734, num_turns=1.71, num_tools=0.707, failed=0.27, completion_tokens=44] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  76%|███████▋  | 305/400 [00:18<00:02, 44.50it/s, reward=-0.742, num_turns=1.7, num_tools=0.705, failed=0.272, completion_tokens=44]validation:  76%|███████▋  | 306/400 [00:18<00:02, 44.50it/s, reward=-0.749, num_turns=1.71, num_tools=0.706, failed=0.275, completion_tokens=43.9][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  77%|███████▋  | 307/400 [00:18<00:01, 53.12it/s, reward=-0.749, num_turns=1.71, num_tools=0.706, failed=0.275, completion_tokens=43.9]validation:  77%|███████▋  | 307/400 [00:18<00:01, 53.12it/s, reward=-0.756, num_turns=1.71, num_tools=0.707, failed=0.277, completion_tokens=43.9]validation:  77%|███████▋  | 308/400 [00:18<00:01, 53.12it/s, reward=-0.764, num_turns=1.7, num_tools=0.705, failed=0.279, completion_tokens=43.9] validation:  77%|███████▋  | 309/400 [00:18<00:01, 53.12it/s, reward=-0.756, num_turns=1.71, num_tools=0.706, failed=0.278, completion_tokens=43.9]validation:  78%|███████▊  | 310/400 [00:18<00:01, 53.12it/s, reward=-0.76, num_turns=1.71, num_tools=0.706, failed=0.277, completion_tokens=43.9] validation:  78%|███████▊  | 311/400 [00:18<00:01, 53.12it/s, reward=-0.75, num_turns=1.71, num_tools=0.707, failed=0.277, completion_tokens=43.8]validation:  78%|███████▊  | 312/400 [00:18<00:01, 53.12it/s, reward=-0.743, num_turns=1.71, num_tools=0.708, failed=0.276, completion_tokens=43.9]validation:  78%|███████▊  | 313/400 [00:18<00:01, 53.12it/s, reward=-0.728, num_turns=1.71, num_tools=0.709, failed=0.275, completion_tokens=43.8]validation:  78%|███████▊  | 314/400 [00:18<00:01, 53.12it/s, reward=-0.718, num_turns=1.71, num_tools=0.71, failed=0.274, completion_tokens=43.8] validation:  79%|███████▉  | 315/400 [00:18<00:01, 53.12it/s, reward=-0.722, num_turns=1.71, num_tools=0.711, failed=0.273, completion_tokens=43.8]validation:  79%|███████▉  | 316/400 [00:18<00:01, 53.12it/s, reward=-0.716, num_turns=1.71, num_tools=0.712, failed=0.272, completion_tokens=43.7]validation:  79%|███████▉  | 317/400 [00:18<00:01, 53.12it/s, reward=-0.708, num_turns=1.71, num_tools=0.713, failed=0.271, completion_tokens=43.6]validation:  80%|███████▉  | 318/400 [00:18<00:01, 53.12it/s, reward=-0.699, num_turns=1.71, num_tools=0.714, failed=0.27, completion_tokens=43.5] validation:  80%|███████▉  | 319/400 [00:18<00:01, 53.12it/s, reward=-0.691, num_turns=1.71, num_tools=0.715, failed=0.27, completion_tokens=43.5]validation:  80%|████████  | 320/400 [00:18<00:01, 47.62it/s, reward=-0.691, num_turns=1.71, num_tools=0.715, failed=0.27, completion_tokens=43.5]validation:  80%|████████  | 320/400 [00:18<00:01, 47.62it/s, reward=-0.682, num_turns=1.72, num_tools=0.716, failed=0.269, completion_tokens=43.4]validation:  80%|████████  | 321/400 [00:18<00:01, 47.62it/s, reward=-0.686, num_turns=1.72, num_tools=0.717, failed=0.268, completion_tokens=43.3]validation:  80%|████████  | 322/400 [00:18<00:01, 47.62it/s, reward=-0.69, num_turns=1.72, num_tools=0.717, failed=0.267, completion_tokens=43.2] validation:  81%|████████  | 323/400 [00:18<00:01, 47.62it/s, reward=-0.684, num_turns=1.72, num_tools=0.718, failed=0.266, completion_tokens=43.2]validation:  81%|████████  | 324/400 [00:18<00:01, 47.62it/s, reward=-0.688, num_turns=1.72, num_tools=0.719, failed=0.265, completion_tokens=43.1][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  81%|████████▏ | 325/400 [00:19<00:01, 47.62it/s, reward=-0.68, num_turns=1.72, num_tools=0.72, failed=0.265, completion_tokens=43]    validation:  82%|████████▏ | 326/400 [00:19<00:01, 47.62it/s, reward=-0.676, num_turns=1.72, num_tools=0.721, failed=0.264, completion_tokens=43.1]validation:  82%|████████▏ | 327/400 [00:19<00:01, 47.62it/s, reward=-0.68, num_turns=1.72, num_tools=0.722, failed=0.263, completion_tokens=43]   validation:  82%|████████▏ | 328/400 [00:19<00:01, 47.62it/s, reward=-0.666, num_turns=1.72, num_tools=0.723, failed=0.262, completion_tokens=43]validation:  82%|████████▏ | 329/400 [00:19<00:01, 47.62it/s, reward=-0.673, num_turns=1.72, num_tools=0.72, failed=0.264, completion_tokens=43] validation:  82%|████████▎ | 330/400 [00:19<00:01, 47.62it/s, reward=-0.668, num_turns=1.72, num_tools=0.721, failed=0.264, completion_tokens=42.9]validation:  83%|████████▎ | 331/400 [00:19<00:01, 47.62it/s, reward=-0.672, num_turns=1.72, num_tools=0.722, failed=0.263, completion_tokens=42.9]validation:  83%|████████▎ | 332/400 [00:19<00:01, 47.62it/s, reward=-0.676, num_turns=1.72, num_tools=0.723, failed=0.262, completion_tokens=42.8]validation:  83%|████████▎ | 333/400 [00:19<00:01, 47.62it/s, reward=-0.68, num_turns=1.72, num_tools=0.724, failed=0.261, completion_tokens=42.8] validation:  84%|████████▎ | 334/400 [00:19<00:01, 47.62it/s, reward=-0.673, num_turns=1.72, num_tools=0.725, failed=0.26, completion_tokens=42.8]validation:  84%|████████▍ | 335/400 [00:19<00:01, 47.62it/s, reward=-0.677, num_turns=1.73, num_tools=0.725, failed=0.26, completion_tokens=42.9]validation:  84%|████████▍ | 336/400 [00:19<00:01, 47.62it/s, reward=-0.673, num_turns=1.73, num_tools=0.726, failed=0.259, completion_tokens=42.9]validation:  84%|████████▍ | 337/400 [00:19<00:01, 47.62it/s, reward=-0.665, num_turns=1.73, num_tools=0.727, failed=0.258, completion_tokens=42.8]validation:  84%|████████▍ | 338/400 [00:19<00:01, 47.62it/s, reward=-0.657, num_turns=1.73, num_tools=0.728, failed=0.257, completion_tokens=42.7]validation:  85%|████████▍ | 339/400 [00:19<00:01, 47.62it/s, reward=-0.661, num_turns=1.73, num_tools=0.729, failed=0.257, completion_tokens=42.7][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  85%|████████▌ | 340/400 [00:19<00:01, 47.62it/s, reward=-0.668, num_turns=1.73, num_tools=0.729, failed=0.259, completion_tokens=42.6]validation:  85%|████████▌ | 341/400 [00:19<00:01, 47.62it/s, reward=-0.675, num_turns=1.73, num_tools=0.727, failed=0.261, completion_tokens=42.6]validation:  86%|████████▌ | 342/400 [00:19<00:01, 47.62it/s, reward=-0.682, num_turns=1.73, num_tools=0.728, failed=0.263, completion_tokens=42.6]validation:  86%|████████▌ | 343/400 [00:19<00:01, 47.62it/s, reward=-0.689, num_turns=1.73, num_tools=0.729, failed=0.265, completion_tokens=42.5]validation:  86%|████████▌ | 344/400 [00:19<00:01, 47.62it/s, reward=-0.695, num_turns=1.73, num_tools=0.73, failed=0.267, completion_tokens=42.5] validation:  86%|████████▋ | 345/400 [00:19<00:01, 47.62it/s, reward=-0.702, num_turns=1.73, num_tools=0.73, failed=0.27, completion_tokens=42.5] validation:  86%|████████▋ | 346/400 [00:19<00:01, 47.62it/s, reward=-0.688, num_turns=1.73, num_tools=0.731, failed=0.269, completion_tokens=42.4]validation:  87%|████████▋ | 347/400 [00:19<00:01, 47.62it/s, reward=-0.692, num_turns=1.73, num_tools=0.732, failed=0.268, completion_tokens=42.4]validation:  87%|████████▋ | 348/400 [00:19<00:01, 47.62it/s, reward=-0.679, num_turns=1.73, num_tools=0.733, failed=0.267, completion_tokens=42.4]validation:  87%|████████▋ | 349/400 [00:19<00:01, 47.62it/s, reward=-0.685, num_turns=1.73, num_tools=0.734, failed=0.269, completion_tokens=42.4]validation:  88%|████████▊ | 350/400 [00:19<00:01, 47.62it/s, reward=-0.68, num_turns=1.73, num_tools=0.734, failed=0.269, completion_tokens=42.3] validation:  88%|████████▊ | 351/400 [00:19<00:01, 47.62it/s, reward=-0.684, num_turns=1.74, num_tools=0.735, failed=0.268, completion_tokens=42.4][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  88%|████████▊ | 352/400 [00:19<00:01, 47.62it/s, reward=-0.691, num_turns=1.73, num_tools=0.733, failed=0.27, completion_tokens=42.4] [rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  88%|████████▊ | 353/400 [00:19<00:00, 47.62it/s, reward=-0.697, num_turns=1.73, num_tools=0.731, failed=0.272, completion_tokens=42.4][rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}}
+validation:  88%|████████▊ | 354/400 [00:19<00:00, 47.62it/s, reward=-0.704, num_turns=1.73, num_tools=0.732, failed=0.274, completion_tokens=42.3]validation:  89%|████████▉ | 355/400 [00:19<00:00, 75.91it/s, reward=-0.704, num_turns=1.73, num_tools=0.732, failed=0.274, completion_tokens=42.3]validation:  89%|████████▉ | 355/400 [00:19<00:00, 75.91it/s, reward=-0.699, num_turns=1.73, num_tools=0.732, failed=0.273, completion_tokens=42.3]validation:  89%|████████▉ | 356/400 [00:19<00:00, 75.91it/s, reward=-0.703, num_turns=1.73, num_tools=0.733, failed=0.272, completion_tokens=42.3]validation:  89%|████████▉ | 357/400 [00:19<00:00, 75.91it/s, reward=-0.706, num_turns=1.73, num_tools=0.734, failed=0.272, completion_tokens=42.4]validation:  90%|████████▉ | 358/400 [00:19<00:00, 75.91it/s, reward=-0.697, num_turns=1.73, num_tools=0.735, failed=0.271, completion_tokens=42.3]validation:  90%|████████▉ | 359/400 [00:19<00:00, 75.91it/s, reward=-0.701, num_turns=1.74, num_tools=0.735, failed=0.27, completion_tokens=42.2] validation:  90%|█████████ | 360/400 [00:19<00:00, 75.91it/s, reward=-0.694, num_turns=1.74, num_tools=0.736, failed=0.269, completion_tokens=42.1]validation:  90%|█████████ | 361/400 [00:19<00:00, 75.91it/s, reward=-0.698, num_turns=1.74, num_tools=0.737, failed=0.269, completion_tokens=42.1]validation:  90%|█████████ | 362/400 [00:19<00:00, 75.91it/s, reward=-0.693, num_turns=1.74, num_tools=0.738, failed=0.268, completion_tokens=42]  validation:  91%|█████████ | 363/400 [00:19<00:00, 75.91it/s, reward=-0.68, num_turns=1.74, num_tools=0.738, failed=0.267, completion_tokens=41.9]validation:  91%|█████████ | 364/400 [00:19<00:00, 75.91it/s, reward=-0.673, num_turns=1.74, num_tools=0.739, failed=0.266, completion_tokens=41.9]validation:  91%|█████████▏| 365/400 [00:19<00:00, 75.91it/s, reward=-0.666, num_turns=1.74, num_tools=0.74, failed=0.266, completion_tokens=41.8] validation:  92%|█████████▏| 366/400 [00:19<00:00, 75.91it/s, reward=-0.658, num_turns=1.74, num_tools=0.74, failed=0.265, completion_tokens=41.7]validation:  92%|█████████▏| 367/400 [00:19<00:00, 75.91it/s, reward=-0.651, num_turns=1.74, num_tools=0.741, failed=0.264, completion_tokens=41.6]validation:  92%|█████████▏| 368/400 [00:19<00:00, 75.91it/s, reward=-0.644, num_turns=1.74, num_tools=0.742, failed=0.264, completion_tokens=41.6]validation:  92%|█████████▏| 369/400 [00:19<00:00, 75.91it/s, reward=-0.64, num_turns=1.74, num_tools=0.743, failed=0.263, completion_tokens=41.5] validation:  92%|█████████▎| 370/400 [00:19<00:00, 85.29it/s, reward=-0.64, num_turns=1.74, num_tools=0.743, failed=0.263, completion_tokens=41.5]validation:  92%|█████████▎| 370/400 [00:19<00:00, 85.29it/s, reward=-0.635, num_turns=1.74, num_tools=0.743, failed=0.262, completion_tokens=41.5]validation:  93%|█████████▎| 371/400 [00:19<00:00, 85.29it/s, reward=-0.639, num_turns=1.74, num_tools=0.744, failed=0.261, completion_tokens=41.4]validation:  93%|█████████▎| 372/400 [00:19<00:00, 85.29it/s, reward=-0.626, num_turns=1.74, num_tools=0.745, failed=0.261, completion_tokens=41.4]validation:  93%|█████████▎| 373/400 [00:19<00:00, 85.29it/s, reward=-0.614, num_turns=1.75, num_tools=0.745, failed=0.26, completion_tokens=41.3] validation:  94%|█████████▎| 374/400 [00:19<00:00, 85.29it/s, reward=-0.607, num_turns=1.75, num_tools=0.746, failed=0.259, completion_tokens=41.3]validation:  94%|█████████▍| 375/400 [00:19<00:00, 85.29it/s, reward=-0.611, num_turns=1.75, num_tools=0.747, failed=0.259, completion_tokens=41.3]validation:  94%|█████████▍| 376/400 [00:19<00:00, 85.29it/s, reward=-0.604, num_turns=1.75, num_tools=0.747, failed=0.258, completion_tokens=41.3]validation:  94%|█████████▍| 377/400 [00:19<00:00, 85.29it/s, reward=-0.607, num_turns=1.75, num_tools=0.748, failed=0.257, completion_tokens=41.3]validation:  94%|█████████▍| 378/400 [00:19<00:00, 85.29it/s, reward=-0.611, num_turns=1.75, num_tools=0.749, failed=0.257, completion_tokens=41.3]validation:  95%|█████████▍| 379/400 [00:19<00:00, 85.29it/s, reward=-0.615, num_turns=1.75, num_tools=0.749, failed=0.256, completion_tokens=41.3]validation:  95%|█████████▌| 380/400 [00:19<00:00, 85.29it/s, reward=-0.618, num_turns=1.75, num_tools=0.75, failed=0.255, completion_tokens=41.3] validation:  95%|█████████▌| 381/400 [00:19<00:00, 85.29it/s, reward=-0.622, num_turns=1.75, num_tools=0.751, failed=0.255, completion_tokens=41.5]validation:  96%|█████████▌| 382/400 [00:19<00:00, 85.29it/s, reward=-0.626, num_turns=1.75, num_tools=0.754, failed=0.254, completion_tokens=41.5]validation:  96%|█████████▌| 383/400 [00:19<00:00, 85.29it/s, reward=-0.62, num_turns=1.75, num_tools=0.755, failed=0.253, completion_tokens=41.6] validation:  96%|█████████▌| 384/400 [00:19<00:00, 81.11it/s, reward=-0.62, num_turns=1.75, num_tools=0.755, failed=0.253, completion_tokens=41.6]validation:  96%|█████████▌| 384/400 [00:19<00:00, 81.11it/s, reward=-0.613, num_turns=1.76, num_tools=0.755, failed=0.253, completion_tokens=41.5]validation:  96%|█████████▋| 385/400 [00:19<00:00, 81.11it/s, reward=-0.617, num_turns=1.76, num_tools=0.756, failed=0.252, completion_tokens=41.5]validation:  96%|█████████▋| 386/400 [00:19<00:00, 81.11it/s, reward=-0.62, num_turns=1.76, num_tools=0.756, failed=0.251, completion_tokens=41.5] validation:  97%|█████████▋| 387/400 [00:19<00:00, 81.11it/s, reward=-0.615, num_turns=1.76, num_tools=0.757, failed=0.251, completion_tokens=41.5]validation:  97%|█████████▋| 388/400 [00:19<00:00, 81.11it/s, reward=-0.619, num_turns=1.76, num_tools=0.758, failed=0.25, completion_tokens=41.4] validation:  97%|█████████▋| 389/400 [00:19<00:00, 81.11it/s, reward=-0.623, num_turns=1.76, num_tools=0.758, failed=0.249, completion_tokens=41.4]validation:  98%|█████████▊| 390/400 [00:19<00:00, 81.11it/s, reward=-0.626, num_turns=1.76, num_tools=0.759, failed=0.249, completion_tokens=41.5]validation:  98%|█████████▊| 391/400 [00:19<00:00, 81.11it/s, reward=-0.623, num_turns=1.76, num_tools=0.76, failed=0.248, completion_tokens=41.6] validation:  98%|█████████▊| 392/400 [00:19<00:00, 81.11it/s, reward=-0.619, num_turns=1.76, num_tools=0.76, failed=0.247, completion_tokens=41.7]validation:  98%|█████████▊| 393/400 [00:19<00:00, 81.11it/s, reward=-0.623, num_turns=1.76, num_tools=0.763, failed=0.247, completion_tokens=41.6]validation:  98%|█████████▊| 394/400 [00:19<00:00, 81.11it/s, reward=-0.616, num_turns=1.76, num_tools=0.764, failed=0.246, completion_tokens=41.7]validation:  99%|█████████▉| 395/400 [00:19<00:00, 81.11it/s, reward=-0.622, num_turns=1.76, num_tools=0.762, failed=0.246, completion_tokens=42.1]validation:  99%|█████████▉| 396/400 [00:19<00:00, 61.24it/s, reward=-0.622, num_turns=1.76, num_tools=0.762, failed=0.246, completion_tokens=42.1]validation:  99%|█████████▉| 396/400 [00:19<00:00, 61.24it/s, reward=-0.618, num_turns=1.76, num_tools=0.763, failed=0.245, completion_tokens=42.1]validation:  99%|█████████▉| 397/400 [00:19<00:00, 61.24it/s, reward=-0.622, num_turns=1.76, num_tools=0.763, failed=0.244, completion_tokens=42.4]validation: 100%|█████████▉| 398/400 [00:20<00:00, 61.24it/s, reward=-0.625, num_turns=1.76, num_tools=0.764, failed=0.244, completion_tokens=43.2]validation: 100%|█████████▉| 399/400 [00:20<00:00, 61.24it/s, reward=-0.629, num_turns=1.76, num_tools=0.764, failed=0.243, completion_tokens=43.6]validation: 100%|██████████| 400/400 [00:20<00:00, 61.24it/s, reward=-0.632, num_turns=1.76, num_tools=0.765, failed=0.242, completion_tokens=44.1]validation: 100%|██████████| 400/400 [00:20<00:00, 19.37it/s, reward=-0.632, num_turns=1.76, num_tools=0.765, failed=0.242, completion_tokens=44.1]
+  Val avg reward: -0.632
+
+============================================================
+Step 16/50
+============================================================
+step 16:   0%|          | 0/32 [00:00<?, ?it/s]step 16:   3%|▎         | 1/32 [00:01<00:31,  1.00s/it]step 16:   3%|▎         | 1/32 [00:01<00:31,  1.00s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=21]step 16:   6%|▋         | 2/32 [00:01<00:30,  1.00s/it, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=30]step 16:   9%|▉         | 3/32 [00:01<00:09,  3.15it/s, reward=-3, num_turns=1, num_tools=0, failed=0, completion_tokens=30]step 16:   9%|▉         | 3/32 [00:01<00:09,  3.15it/s, reward=-0.667, num_turns=1.33, num_tools=0.333, failed=0, completion_tokens=26.3]step 16:  12%|█▎        | 4/32 [00:01<00:08,  3.15it/s, reward=-1, num_turns=1.5, num_tools=0.5, failed=0, completion_tokens=25.8]       step 16:  16%|█▌        | 5/32 [00:01<00:08,  3.15it/s, reward=0, num_turns=1.6, num_tools=0.6, failed=0, completion_tokens=25]   step 16:  19%|█▉        | 6/32 [00:01<00:08,  3.15it/s, reward=0.667, num_turns=1.67, num_tools=0.667, failed=0, completion_tokens=24.5]step 16:  22%|██▏       | 7/32 [00:01<00:07,  3.15it/s, reward=1.14, num_turns=1.71, num_tools=0.714, failed=0, completion_tokens=24.1] step 16:  25%|██▌       | 8/32 [00:01<00:07,  3.15it/s, reward=1.5, num_turns=1.75, num_tools=0.75, failed=0, completion_tokens=23.9]  step 16:  28%|██▊       | 9/32 [00:01<00:07,  3.15it/s, reward=1.78, num_turns=1.78, num_tools=0.778, failed=0, completion_tokens=24.3]step 16:  31%|███▏      | 10/32 [00:01<00:06,  3.15it/s, reward=1.7, num_turns=1.8, num_tools=0.8, failed=0, completion_tokens=24.8]   step 16:  34%|███▍      | 11/32 [00:01<00:06,  3.15it/s, reward=1.91, num_turns=1.82, num_tools=0.818, failed=0, completion_tokens=25]step 16:  38%|███▊      | 12/32 [00:01<00:06,  3.15it/s, reward=2.08, num_turns=1.83, num_tools=0.833, failed=0, completion_tokens=24.9]step 16:  41%|████      | 13/32 [00:01<00:06,  3.15it/s, reward=2.23, num_turns=1.85, num_tools=0.846, failed=0, completion_tokens=25.1]step 16:  44%|████▍     | 14/32 [00:01<00:05,  3.15it/s, reward=2.36, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=25.2]step 16:  47%|████▋     | 15/32 [00:01<00:05,  3.15it/s, reward=2.47, num_turns=1.87, num_tools=0.867, failed=0, completion_tokens=25.4]step 16:  50%|█████     | 16/32 [00:01<00:05,  3.15it/s, reward=2.56, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=25.6]step 16:  53%|█████▎    | 17/32 [00:01<00:04,  3.15it/s, reward=2.65, num_turns=1.88, num_tools=0.882, failed=0, completion_tokens=25.3]step 16:  56%|█████▋    | 18/32 [00:01<00:04,  3.15it/s, reward=2.72, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=25.1]step 16:  59%|█████▉    | 19/32 [00:01<00:00, 24.85it/s, reward=2.72, num_turns=1.89, num_tools=0.889, failed=0, completion_tokens=25.1]step 16:  59%|█████▉    | 19/32 [00:01<00:00, 24.85it/s, reward=2.79, num_turns=1.89, num_tools=0.895, failed=0, completion_tokens=25.2]step 16:  62%|██████▎   | 20/32 [00:01<00:00, 24.85it/s, reward=2.85, num_turns=1.9, num_tools=0.9, failed=0, completion_tokens=25.4]   step 16:  66%|██████▌   | 21/32 [00:01<00:00, 24.85it/s, reward=2.75, num_turns=1.9, num_tools=0.905, failed=0, completion_tokens=26]step 16:  69%|██████▉   | 22/32 [00:01<00:00, 24.85it/s, reward=2.48, num_turns=1.86, num_tools=0.864, failed=0, completion_tokens=28.9]step 16:  72%|███████▏  | 23/32 [00:01<00:00, 24.85it/s, reward=2.29, num_turns=1.87, num_tools=0.87, failed=0, completion_tokens=29.2] step 16:  75%|███████▌  | 24/32 [00:01<00:00, 24.85it/s, reward=2.11, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=29.4]step 16:  78%|███████▊  | 25/32 [00:01<00:00, 24.85it/s, reward=1.91, num_turns=1.84, num_tools=0.84, failed=0, completion_tokens=32.6] step 16:  81%|████████▏ | 26/32 [00:01<00:00, 30.29it/s, reward=1.91, num_turns=1.84, num_tools=0.84, failed=0, completion_tokens=32.6]step 16:  81%|████████▏ | 26/32 [00:01<00:00, 30.29it/s, reward=1.88, num_turns=1.85, num_tools=0.846, failed=0, completion_tokens=32.9]step 16:  84%|████████▍ | 27/32 [00:01<00:00, 30.29it/s, reward=1.86, num_turns=1.85, num_tools=0.852, failed=0, completion_tokens=33.4]step 16:  88%|████████▊ | 28/32 [00:01<00:00, 30.29it/s, reward=1.82, num_turns=1.86, num_tools=0.857, failed=0, completion_tokens=34.2]step 16:  91%|█████████ | 29/32 [00:01<00:00, 30.29it/s, reward=1.79, num_turns=1.86, num_tools=0.862, failed=0, completion_tokens=35.1]step 16:  94%|█████████▍| 30/32 [00:01<00:00, 30.29it/s, reward=1.67, num_turns=1.87, num_tools=0.867, failed=0, completion_tokens=35.1]step 16:  97%|█████████▋| 31/32 [00:01<00:00, 30.29it/s, reward=1.65, num_turns=1.87, num_tools=0.871, failed=0, completion_tokens=36.4]step 16: 100%|██████████| 32/32 [00:01<00:00, 26.13it/s, reward=1.65, num_turns=1.87, num_tools=0.871, failed=0, completion_tokens=36.4]step 16: 100%|██████████| 32/32 [00:01<00:00, 26.13it/s, reward=1.61, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=38]  step 16: 100%|██████████| 32/32 [00:01<00:00, 18.60it/s, reward=1.61, num_turns=1.88, num_tools=0.875, failed=0, completion_tokens=38]
+  group 0: mean=+4.00 std=0.000 min=+4.0 max=+4.0 | What's the weather like in London?
+  group 1: mean=-1.12 std=1.699 min=-3.0 max=+1.3 | What is Japan's population density in people per s
+  group 2: mean=+4.00 std=0.000 min=+4.0 max=+4.0 | Convert 23 kg to lbs.
+  group 3: mean=-0.42 std=1.778 min=-3.0 max=+1.3 | What is India's population density in people per s
+  Avg reward: 1.615 | Avg tools/rollout: 0.9 | groups with variance: 2/4
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0014
+Deleted checkpoint ./.art/rl-tool-use/models/qwen-0.5b-tool-agent/checkpoints/0010
+Packed 16 trajectories into 2 sequences of length 2048
+train:   0%|          | 0/2 [00:00<?, ?it/s][2026-04-13 02:45:31] INFO _base_client.py:1693: Retrying request to /completions in 0.489393 seconds
+[2026-04-13 02:45:36] INFO _base_client.py:1693: Retrying request to /completions in 0.916879 seconds
+[2026-04-13 02:46:17] INFO _base_client.py:1693: Retrying request to /completions in 0.386873 seconds
+[2026-04-13 02:46:23] INFO _base_client.py:1693: Retrying request to /completions in 0.894120 seconds
+[train.py] suppressed _monitor_openai_server crash: APITimeoutError: Request timed out.
+Traceback (most recent call last):
+  File "<frozen runpy>", line 198, in _run_module_as_main
+  File "<frozen runpy>", line 88, in _run_code
+  File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 211, in <module>
+    main()
+  File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 207, in main
+    asyncio.run(train(**kwargs))
+  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 30, in run
+    return loop.run_until_complete(task)
+           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 98, in run_until_complete
+    return f.result()
+           ^^^^^^^^^^
+  File "/usr/lib/python3.12/asyncio/futures.py", line 203, in result
+    raise self._exception.with_traceback(self._exception_tb)
+  File "/usr/lib/python3.12/asyncio/tasks.py", line 316, in __step_run_and_handle_result
+    result = coro.throw(exc)
+             ^^^^^^^^^^^^^^^
+  File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 153, in train
+    result = await backend.train(model, train_groups, learning_rate=learning_rate)
+             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 644, in train
+    async for metrics in self._train_model(
+  File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 783, in _train_model
+    async for result in service.train(
+  File "/usr/local/lib/python3.12/dist-packages/mp_actors/move.py", line 226, in async_gen_wrapper
+    send_value = yield await asyncio.wrap_future(
+                       ^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/usr/lib/python3.12/asyncio/futures.py", line 287, in __await__
+    yield self  # This tells Task to wait for completion.
+    ^^^^^^^^^^
+  File "/usr/lib/python3.12/asyncio/tasks.py", line 385, in __wakeup
+    future.result()
+  File "/usr/lib/python3.12/asyncio/futures.py", line 203, in result
+    raise self._exception.with_traceback(self._exception_tb)
+RuntimeError: Proxy is closing
+train:   0%|          | 0/2 [01:06<?, ?it/s]
diff --git a/chat_template.jinja b/chat_template.jinja
new file mode 100644
index 0000000..bdf7919
--- /dev/null
+++ b/chat_template.jinja
@@ -0,0 +1,54 @@
+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- messages[0]['content'] }}
+    {%- else %}
+        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
+    {%- endif %}
+    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content %}
+            {{- '\n' + message.content }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n{"name": "' }}
+            {{- tool_call.name }}
+            {{- '", "arguments": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- '}\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..e2f4dfb
--- /dev/null
+++ b/config.json
@@ -0,0 +1,57 @@
+{
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 896,
+  "initializer_range": 0.02,
+  "intermediate_size": 4864,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 21,
+  "model_type": "qwen2",
+  "num_attention_heads": 14,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 2,
+  "pad_token_id": null,
+  "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "rope_theta": 1000000.0,
+    "rope_type": "default"
+  },
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.2.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..8d23c88
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,14 @@
+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.1,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "5.2.0"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000..9a10cc7
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1ce74637a68f2305f02771cfbf5336782b145f388fae4f87b1ef1b1f84b08c6
+size 988097824
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..5340d81
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd5948af71b4f56cf697f7580814c7ce8b80595ef985544efcacf716126a2e31
+size 11422356
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..26510ce
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,15 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "is_local": true,
+  "model_max_length": 32768,
+  "pad_token": "<|PAD_TOKEN|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}