commit 67c081bc2ec6615ed6214a3e1d0e2e8bba2ba585
Author: ModelHub XC <noreply@modelhub.org.cn>
Date:   Fri May 1 18:43:25 2026 +0800

    初始化项目，由ModelHub XC社区提供模型
    
    Model: alwaysgood/QWEN3-4B-CPT
    Source: Original Platform

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..4769e9f
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,38 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+checkpoint-1477/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3f53990
--- /dev/null
+++ b/README.md
@@ -0,0 +1,59 @@
+---
+base_model: unsloth/Qwen3-4B-Base
+library_name: transformers
+model_name: checkpoints
+tags:
+- generated_from_trainer
+- sft
+- unsloth
+- trl
+licence: license
+---
+
+# Model Card for checkpoints
+
+This model is a fine-tuned version of [unsloth/Qwen3-4B-Base](https://huggingface.co/unsloth/Qwen3-4B-Base).
+It has been trained using [TRL](https://github.com/huggingface/trl).
+
+## Quick start
+
+```python
+from transformers import pipeline
+
+question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
+generator = pipeline("text-generation", model="None", device="cuda")
+output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
+print(output["generated_text"])
+```
+
+## Training procedure
+
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/hiloong/mono-cpt/runs/sxp4zkdr) 
+
+
+This model was trained with SFT.
+
+### Framework versions
+
+- TRL: 0.24.0
+- Transformers: 5.5.3
+- Pytorch: 2.9.0+cu128
+- Datasets: 4.3.0
+- Tokenizers: 0.22.2
+
+## Citations
+
+
+
+Cite TRL as:
+    
+```bibtex
+@misc{vonwerra2022trl,
+	title        = {{TRL: Transformer Reinforcement Learning}},
+	author       = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
+	year         = 2020,
+	journal      = {GitHub repository},
+	publisher    = {GitHub},
+	howpublished = {\url{https://github.com/huggingface/trl}}
+}
+```
\ No newline at end of file
diff --git a/all_results.json b/all_results.json
new file mode 100644
index 0000000..409268f
--- /dev/null
+++ b/all_results.json
@@ -0,0 +1,12 @@
+{
+    "epoch": 1.0,
+    "eval_loss": 1.7002116441726685,
+    "eval_runtime": 173.1669,
+    "eval_samples_per_second": 5.526,
+    "eval_steps_per_second": 0.693,
+    "total_flos": 2.103177196962902e+18,
+    "train_loss": 1.7256613558986822,
+    "train_runtime": 29239.084,
+    "train_samples_per_second": 1.616,
+    "train_steps_per_second": 0.051
+}
\ No newline at end of file
diff --git a/checkpoint-1477/config.json b/checkpoint-1477/config.json
new file mode 100644
index 0000000..cbfe8ec
--- /dev/null
+++ b/checkpoint-1477/config.json
@@ -0,0 +1,74 @@
+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": null,
+  "dtype": "bfloat16",
+  "eos_token_id": 151643,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 36,
+  "model_name": "unsloth/Qwen3-4B-Base",
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "pad_token_id": 151669,
+  "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "rope_theta": 1000000,
+    "rope_type": "default"
+  },
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.5.3",
+  "unsloth_fixed": true,
+  "unsloth_version": "2026.4.4",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
diff --git a/checkpoint-1477/generation_config.json b/checkpoint-1477/generation_config.json
new file mode 100644
index 0000000..43f602b
--- /dev/null
+++ b/checkpoint-1477/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "eos_token_id": [
+    151643
+  ],
+  "max_length": 32768,
+  "max_new_tokens": 2048,
+  "pad_token_id": 151669,
+  "transformers_version": "5.5.3"
+}
diff --git a/checkpoint-1477/model.safetensors b/checkpoint-1477/model.safetensors
new file mode 100644
index 0000000..3e5ceb1
--- /dev/null
+++ b/checkpoint-1477/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3a4a2abfa757af075d0fa804b2093ad46c4d9bc4a227a070907a885eea69e97
+size 8044982080
diff --git a/checkpoint-1477/optimizer.pt b/checkpoint-1477/optimizer.pt
new file mode 100644
index 0000000..0469685
--- /dev/null
+++ b/checkpoint-1477/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa38e6bbd0738ba63086bd0369ac5ef53d96f94c1d3bf63cd8286ec8d324f12e
+size 14534393422
diff --git a/checkpoint-1477/rng_state.pth b/checkpoint-1477/rng_state.pth
new file mode 100644
index 0000000..435e005
--- /dev/null
+++ b/checkpoint-1477/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
+size 14645
diff --git a/checkpoint-1477/scheduler.pt b/checkpoint-1477/scheduler.pt
new file mode 100644
index 0000000..5f84341
--- /dev/null
+++ b/checkpoint-1477/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41b1610c26267a8ba984a88363b2c05d3a4b232638c92e564e21a007ac9f4fc1
+size 1465
diff --git a/checkpoint-1477/tokenizer.json b/checkpoint-1477/tokenizer.json
new file mode 100644
index 0000000..73037fe
--- /dev/null
+++ b/checkpoint-1477/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45c4ffda6666cf6d75d0b1f961f25964e2a52a62e78aaecb2f458e9ba9824112
+size 11422840
diff --git a/checkpoint-1477/tokenizer_config.json b/checkpoint-1477/tokenizer_config.json
new file mode 100644
index 0000000..d450540
--- /dev/null
+++ b/checkpoint-1477/tokenizer_config.json
@@ -0,0 +1,15 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 32768,
+  "pad_token": "<|PAD_TOKEN|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}
diff --git a/checkpoint-1477/trainer_state.json b/checkpoint-1477/trainer_state.json
new file mode 100644
index 0000000..a7c293f
--- /dev/null
+++ b/checkpoint-1477/trainer_state.json
@@ -0,0 +1,1087 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1477,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.006771626883358727,
+      "grad_norm": 1.5234375,
+      "learning_rate": 6.081081081081082e-07,
+      "loss": 1.8358331680297852,
+      "step": 10
+    },
+    {
+      "epoch": 0.013543253766717453,
+      "grad_norm": 1.5078125,
+      "learning_rate": 1.2837837837837838e-06,
+      "loss": 1.840726089477539,
+      "step": 20
+    },
+    {
+      "epoch": 0.02031488065007618,
+      "grad_norm": 1.0859375,
+      "learning_rate": 1.9594594594594595e-06,
+      "loss": 1.8267410278320313,
+      "step": 30
+    },
+    {
+      "epoch": 0.027086507533434907,
+      "grad_norm": 1.1640625,
+      "learning_rate": 2.6351351351351353e-06,
+      "loss": 1.8383310317993165,
+      "step": 40
+    },
+    {
+      "epoch": 0.03385813441679363,
+      "grad_norm": 1.0859375,
+      "learning_rate": 3.310810810810811e-06,
+      "loss": 1.8384885787963867,
+      "step": 50
+    },
+    {
+      "epoch": 0.04062976130015236,
+      "grad_norm": 1.03125,
+      "learning_rate": 3.986486486486487e-06,
+      "loss": 1.8087802886962892,
+      "step": 60
+    },
+    {
+      "epoch": 0.04740138818351109,
+      "grad_norm": 1.015625,
+      "learning_rate": 4.6621621621621625e-06,
+      "loss": 1.8259227752685547,
+      "step": 70
+    },
+    {
+      "epoch": 0.05417301506686981,
+      "grad_norm": 1.046875,
+      "learning_rate": 5.337837837837838e-06,
+      "loss": 1.8241001129150392,
+      "step": 80
+    },
+    {
+      "epoch": 0.06094464195022854,
+      "grad_norm": 0.96484375,
+      "learning_rate": 6.013513513513514e-06,
+      "loss": 1.82220516204834,
+      "step": 90
+    },
+    {
+      "epoch": 0.06771626883358726,
+      "grad_norm": 0.953125,
+      "learning_rate": 6.689189189189191e-06,
+      "loss": 1.7921783447265625,
+      "step": 100
+    },
+    {
+      "epoch": 0.074487895716946,
+      "grad_norm": 0.9296875,
+      "learning_rate": 7.3648648648648655e-06,
+      "loss": 1.797548484802246,
+      "step": 110
+    },
+    {
+      "epoch": 0.08125952260030472,
+      "grad_norm": 0.89453125,
+      "learning_rate": 8.040540540540541e-06,
+      "loss": 1.7889528274536133,
+      "step": 120
+    },
+    {
+      "epoch": 0.08803114948366345,
+      "grad_norm": 0.90234375,
+      "learning_rate": 8.716216216216217e-06,
+      "loss": 1.7663179397583009,
+      "step": 130
+    },
+    {
+      "epoch": 0.09480277636702218,
+      "grad_norm": 0.89453125,
+      "learning_rate": 9.391891891891893e-06,
+      "loss": 1.7635225296020507,
+      "step": 140
+    },
+    {
+      "epoch": 0.1015744032503809,
+      "grad_norm": 0.91015625,
+      "learning_rate": 9.999986030219255e-06,
+      "loss": 1.7774492263793946,
+      "step": 150
+    },
+    {
+      "epoch": 0.10834603013373963,
+      "grad_norm": 0.91796875,
+      "learning_rate": 9.998309750982693e-06,
+      "loss": 1.7622718811035156,
+      "step": 160
+    },
+    {
+      "epoch": 0.11511765701709836,
+      "grad_norm": 0.890625,
+      "learning_rate": 9.993840588849743e-06,
+      "loss": 1.7750001907348634,
+      "step": 170
+    },
+    {
+      "epoch": 0.12188928390045708,
+      "grad_norm": 0.890625,
+      "learning_rate": 9.986581041033881e-06,
+      "loss": 1.767216110229492,
+      "step": 180
+    },
+    {
+      "epoch": 0.1286609107838158,
+      "grad_norm": 0.921875,
+      "learning_rate": 9.976535163919757e-06,
+      "loss": 1.7609657287597655,
+      "step": 190
+    },
+    {
+      "epoch": 0.13543253766717453,
+      "grad_norm": 0.87109375,
+      "learning_rate": 9.96370857079661e-06,
+      "loss": 1.7535722732543946,
+      "step": 200
+    },
+    {
+      "epoch": 0.14220416455053325,
+      "grad_norm": 0.86328125,
+      "learning_rate": 9.948108428721782e-06,
+      "loss": 1.7395360946655274,
+      "step": 210
+    },
+    {
+      "epoch": 0.148975791433892,
+      "grad_norm": 0.88671875,
+      "learning_rate": 9.92974345451598e-06,
+      "loss": 1.7465991973876953,
+      "step": 220
+    },
+    {
+      "epoch": 0.15574741831725072,
+      "grad_norm": 0.87890625,
+      "learning_rate": 9.908623909892651e-06,
+      "loss": 1.7506902694702149,
+      "step": 230
+    },
+    {
+      "epoch": 0.16251904520060945,
+      "grad_norm": 0.8984375,
+      "learning_rate": 9.884761595724068e-06,
+      "loss": 1.7368896484375,
+      "step": 240
+    },
+    {
+      "epoch": 0.16929067208396817,
+      "grad_norm": 0.8671875,
+      "learning_rate": 9.858169845447417e-06,
+      "loss": 1.7515613555908203,
+      "step": 250
+    },
+    {
+      "epoch": 0.1760622989673269,
+      "grad_norm": 0.85546875,
+      "learning_rate": 9.828863517614533e-06,
+      "loss": 1.7509956359863281,
+      "step": 260
+    },
+    {
+      "epoch": 0.1828339258506856,
+      "grad_norm": 0.9140625,
+      "learning_rate": 9.796858987589462e-06,
+      "loss": 1.753628921508789,
+      "step": 270
+    },
+    {
+      "epoch": 0.18960555273404436,
+      "grad_norm": 0.85546875,
+      "learning_rate": 9.762174138398456e-06,
+      "loss": 1.7379936218261718,
+      "step": 280
+    },
+    {
+      "epoch": 0.19637717961740309,
+      "grad_norm": 0.88671875,
+      "learning_rate": 9.724828350737574e-06,
+      "loss": 1.7442964553833007,
+      "step": 290
+    },
+    {
+      "epoch": 0.2031488065007618,
+      "grad_norm": 0.87109375,
+      "learning_rate": 9.684842492143399e-06,
+      "loss": 1.7366142272949219,
+      "step": 300
+    },
+    {
+      "epoch": 0.20992043338412053,
+      "grad_norm": 0.84765625,
+      "learning_rate": 9.642238905333e-06,
+      "loss": 1.7396051406860351,
+      "step": 310
+    },
+    {
+      "epoch": 0.21669206026747925,
+      "grad_norm": 0.87109375,
+      "learning_rate": 9.597041395719573e-06,
+      "loss": 1.732611083984375,
+      "step": 320
+    },
+    {
+      "epoch": 0.22346368715083798,
+      "grad_norm": 0.8828125,
+      "learning_rate": 9.549275218110818e-06,
+      "loss": 1.7453182220458985,
+      "step": 330
+    },
+    {
+      "epoch": 0.23023531403419673,
+      "grad_norm": 0.875,
+      "learning_rate": 9.498967062597403e-06,
+      "loss": 1.7297761917114258,
+      "step": 340
+    },
+    {
+      "epoch": 0.23700694091755545,
+      "grad_norm": 0.875,
+      "learning_rate": 9.446145039639486e-06,
+      "loss": 1.728118324279785,
+      "step": 350
+    },
+    {
+      "epoch": 0.24377856780091417,
+      "grad_norm": 0.890625,
+      "learning_rate": 9.390838664359539e-06,
+      "loss": 1.7387624740600587,
+      "step": 360
+    },
+    {
+      "epoch": 0.2505501946842729,
+      "grad_norm": 0.85546875,
+      "learning_rate": 9.333078840050331e-06,
+      "loss": 1.7364713668823242,
+      "step": 370
+    },
+    {
+      "epoch": 0.2573218215676316,
+      "grad_norm": 0.8828125,
+      "learning_rate": 9.27289784090723e-06,
+      "loss": 1.7236080169677734,
+      "step": 380
+    },
+    {
+      "epoch": 0.26409344845099036,
+      "grad_norm": 0.890625,
+      "learning_rate": 9.210329293994495e-06,
+      "loss": 1.7224924087524414,
+      "step": 390
+    },
+    {
+      "epoch": 0.27086507533434906,
+      "grad_norm": 0.8671875,
+      "learning_rate": 9.145408160455642e-06,
+      "loss": 1.7099193572998046,
+      "step": 400
+    },
+    {
+      "epoch": 0.2776367022177078,
+      "grad_norm": 0.8515625,
+      "learning_rate": 9.078170715978353e-06,
+      "loss": 1.737176513671875,
+      "step": 410
+    },
+    {
+      "epoch": 0.2844083291010665,
+      "grad_norm": 0.9140625,
+      "learning_rate": 9.008654530524883e-06,
+      "loss": 1.73763427734375,
+      "step": 420
+    },
+    {
+      "epoch": 0.29117995598442525,
+      "grad_norm": 0.85546875,
+      "learning_rate": 8.936898447339257e-06,
+      "loss": 1.7290821075439453,
+      "step": 430
+    },
+    {
+      "epoch": 0.297951582867784,
+      "grad_norm": 0.8984375,
+      "learning_rate": 8.86294256124301e-06,
+      "loss": 1.7403568267822265,
+      "step": 440
+    },
+    {
+      "epoch": 0.3047232097511427,
+      "grad_norm": 0.859375,
+      "learning_rate": 8.786828196231584e-06,
+      "loss": 1.7217792510986327,
+      "step": 450
+    },
+    {
+      "epoch": 0.31149483663450145,
+      "grad_norm": 0.87109375,
+      "learning_rate": 8.708597882383908e-06,
+      "loss": 1.7103708267211915,
+      "step": 460
+    },
+    {
+      "epoch": 0.31826646351786014,
+      "grad_norm": 0.91796875,
+      "learning_rate": 8.62829533209805e-06,
+      "loss": 1.7208784103393555,
+      "step": 470
+    },
+    {
+      "epoch": 0.3250380904012189,
+      "grad_norm": 0.859375,
+      "learning_rate": 8.545965415666254e-06,
+      "loss": 1.7223230361938477,
+      "step": 480
+    },
+    {
+      "epoch": 0.33180971728457764,
+      "grad_norm": 0.8671875,
+      "learning_rate": 8.46165413620295e-06,
+      "loss": 1.719701385498047,
+      "step": 490
+    },
+    {
+      "epoch": 0.33858134416793634,
+      "grad_norm": 0.85546875,
+      "learning_rate": 8.375408603939827e-06,
+      "loss": 1.721092987060547,
+      "step": 500
+    },
+    {
+      "epoch": 0.33858134416793634,
+      "eval_loss": 1.7143864631652832,
+      "eval_runtime": 177.179,
+      "eval_samples_per_second": 5.401,
+      "eval_steps_per_second": 0.677,
+      "step": 500
+    },
+    {
+      "epoch": 0.3453529710512951,
+      "grad_norm": 0.859375,
+      "learning_rate": 8.287277009902237e-06,
+      "loss": 1.7325265884399415,
+      "step": 510
+    },
+    {
+      "epoch": 0.3521245979346538,
+      "grad_norm": 0.83984375,
+      "learning_rate": 8.197308598981731e-06,
+      "loss": 1.7298921585083007,
+      "step": 520
+    },
+    {
+      "epoch": 0.35889622481801253,
+      "grad_norm": 0.8828125,
+      "learning_rate": 8.105553642419708e-06,
+      "loss": 1.6982412338256836,
+      "step": 530
+    },
+    {
+      "epoch": 0.3656678517013712,
+      "grad_norm": 0.91015625,
+      "learning_rate": 8.012063409717578e-06,
+      "loss": 1.7173789978027343,
+      "step": 540
+    },
+    {
+      "epoch": 0.37243947858473,
+      "grad_norm": 0.875,
+      "learning_rate": 7.916890139989147e-06,
+      "loss": 1.724541473388672,
+      "step": 550
+    },
+    {
+      "epoch": 0.3792111054680887,
+      "grad_norm": 0.859375,
+      "learning_rate": 7.820087012771184e-06,
+      "loss": 1.701674461364746,
+      "step": 560
+    },
+    {
+      "epoch": 0.3859827323514474,
+      "grad_norm": 0.85546875,
+      "learning_rate": 7.721708118308556e-06,
+      "loss": 1.7177881240844726,
+      "step": 570
+    },
+    {
+      "epoch": 0.39275435923480617,
+      "grad_norm": 0.87890625,
+      "learning_rate": 7.621808427330447e-06,
+      "loss": 1.6985021591186524,
+      "step": 580
+    },
+    {
+      "epoch": 0.39952598611816487,
+      "grad_norm": 0.87109375,
+      "learning_rate": 7.5204437603346224e-06,
+      "loss": 1.709127426147461,
+      "step": 590
+    },
+    {
+      "epoch": 0.4062976130015236,
+      "grad_norm": 0.88671875,
+      "learning_rate": 7.417670756396863e-06,
+      "loss": 1.7201419830322267,
+      "step": 600
+    },
+    {
+      "epoch": 0.41306923988488237,
+      "grad_norm": 0.8984375,
+      "learning_rate": 7.313546841522998e-06,
+      "loss": 1.7153247833251952,
+      "step": 610
+    },
+    {
+      "epoch": 0.41984086676824106,
+      "grad_norm": 0.875,
+      "learning_rate": 7.2081301965612435e-06,
+      "loss": 1.707881546020508,
+      "step": 620
+    },
+    {
+      "epoch": 0.4266124936515998,
+      "grad_norm": 0.87109375,
+      "learning_rate": 7.10147972469275e-06,
+      "loss": 1.7271339416503906,
+      "step": 630
+    },
+    {
+      "epoch": 0.4333841205349585,
+      "grad_norm": 1.3515625,
+      "learning_rate": 6.993655018518541e-06,
+      "loss": 1.7222976684570312,
+      "step": 640
+    },
+    {
+      "epoch": 0.44015574741831726,
+      "grad_norm": 0.85546875,
+      "learning_rate": 6.884716326761218e-06,
+      "loss": 1.7006675720214843,
+      "step": 650
+    },
+    {
+      "epoch": 0.44692737430167595,
+      "grad_norm": 0.87109375,
+      "learning_rate": 6.774724520600069e-06,
+      "loss": 1.6978439331054687,
+      "step": 660
+    },
+    {
+      "epoch": 0.4536990011850347,
+      "grad_norm": 0.87890625,
+      "learning_rate": 6.663741059658337e-06,
+      "loss": 1.7124168395996093,
+      "step": 670
+    },
+    {
+      "epoch": 0.46047062806839345,
+      "grad_norm": 0.87890625,
+      "learning_rate": 6.551827957661722e-06,
+      "loss": 1.7023361206054688,
+      "step": 680
+    },
+    {
+      "epoch": 0.46724225495175215,
+      "grad_norm": 0.86328125,
+      "learning_rate": 6.439047747787242e-06,
+      "loss": 1.700748825073242,
+      "step": 690
+    },
+    {
+      "epoch": 0.4740138818351109,
+      "grad_norm": 0.85546875,
+      "learning_rate": 6.325463447721852e-06,
+      "loss": 1.6977190017700194,
+      "step": 700
+    },
+    {
+      "epoch": 0.4807855087184696,
+      "grad_norm": 0.8984375,
+      "learning_rate": 6.211138524450347e-06,
+      "loss": 1.7250362396240235,
+      "step": 710
+    },
+    {
+      "epoch": 0.48755713560182834,
+      "grad_norm": 0.90234375,
+      "learning_rate": 6.096136858792193e-06,
+      "loss": 1.7249008178710938,
+      "step": 720
+    },
+    {
+      "epoch": 0.4943287624851871,
+      "grad_norm": 0.8671875,
+      "learning_rate": 5.980522709707132e-06,
+      "loss": 1.7153186798095703,
+      "step": 730
+    },
+    {
+      "epoch": 0.5011003893685458,
+      "grad_norm": 0.8828125,
+      "learning_rate": 5.864360678389497e-06,
+      "loss": 1.6841873168945312,
+      "step": 740
+    },
+    {
+      "epoch": 0.5078720162519045,
+      "grad_norm": 0.8515625,
+      "learning_rate": 5.747715672171295e-06,
+      "loss": 1.7151117324829102,
+      "step": 750
+    },
+    {
+      "epoch": 0.5146436431352632,
+      "grad_norm": 0.95703125,
+      "learning_rate": 5.630652868254229e-06,
+      "loss": 1.704267692565918,
+      "step": 760
+    },
+    {
+      "epoch": 0.521415270018622,
+      "grad_norm": 0.88671875,
+      "learning_rate": 5.51323767729093e-06,
+      "loss": 1.7240329742431642,
+      "step": 770
+    },
+    {
+      "epoch": 0.5281868969019807,
+      "grad_norm": 0.87890625,
+      "learning_rate": 5.395535706835744e-06,
+      "loss": 1.7058921813964845,
+      "step": 780
+    },
+    {
+      "epoch": 0.5349585237853395,
+      "grad_norm": 0.8828125,
+      "learning_rate": 5.27761272468549e-06,
+      "loss": 1.6999113082885742,
+      "step": 790
+    },
+    {
+      "epoch": 0.5417301506686981,
+      "grad_norm": 0.9140625,
+      "learning_rate": 5.159534622130695e-06,
+      "loss": 1.7173538208007812,
+      "step": 800
+    },
+    {
+      "epoch": 0.5485017775520569,
+      "grad_norm": 0.85546875,
+      "learning_rate": 5.04136737713781e-06,
+      "loss": 1.706464958190918,
+      "step": 810
+    },
+    {
+      "epoch": 0.5552734044354156,
+      "grad_norm": 0.84765625,
+      "learning_rate": 4.923177017483002e-06,
+      "loss": 1.7123580932617188,
+      "step": 820
+    },
+    {
+      "epoch": 0.5620450313187744,
+      "grad_norm": 0.84765625,
+      "learning_rate": 4.805029583858115e-06,
+      "loss": 1.7076505661010741,
+      "step": 830
+    },
+    {
+      "epoch": 0.568816658202133,
+      "grad_norm": 0.87109375,
+      "learning_rate": 4.686991092969408e-06,
+      "loss": 1.7007432937622071,
+      "step": 840
+    },
+    {
+      "epoch": 0.5755882850854918,
+      "grad_norm": 0.83984375,
+      "learning_rate": 4.569127500649701e-06,
+      "loss": 1.7156892776489259,
+      "step": 850
+    },
+    {
+      "epoch": 0.5823599119688505,
+      "grad_norm": 0.85546875,
+      "learning_rate": 4.4515046650045316e-06,
+      "loss": 1.6989547729492187,
+      "step": 860
+    },
+    {
+      "epoch": 0.5891315388522093,
+      "grad_norm": 0.859375,
+      "learning_rate": 4.334188309612923e-06,
+      "loss": 1.701683235168457,
+      "step": 870
+    },
+    {
+      "epoch": 0.595903165735568,
+      "grad_norm": 0.875,
+      "learning_rate": 4.217243986803315e-06,
+      "loss": 1.7004409790039063,
+      "step": 880
+    },
+    {
+      "epoch": 0.6026747926189266,
+      "grad_norm": 0.88671875,
+      "learning_rate": 4.100737041025188e-06,
+      "loss": 1.727794075012207,
+      "step": 890
+    },
+    {
+      "epoch": 0.6094464195022854,
+      "grad_norm": 0.89453125,
+      "learning_rate": 3.984732572336837e-06,
+      "loss": 1.6976716995239258,
+      "step": 900
+    },
+    {
+      "epoch": 0.6162180463856441,
+      "grad_norm": 0.89453125,
+      "learning_rate": 3.869295400029714e-06,
+      "loss": 1.6927717208862305,
+      "step": 910
+    },
+    {
+      "epoch": 0.6229896732690029,
+      "grad_norm": 0.84375,
+      "learning_rate": 3.754490026409637e-06,
+      "loss": 1.6997186660766601,
+      "step": 920
+    },
+    {
+      "epoch": 0.6297613001523616,
+      "grad_norm": 0.93359375,
+      "learning_rate": 3.6403806007551373e-06,
+      "loss": 1.7196897506713866,
+      "step": 930
+    },
+    {
+      "epoch": 0.6365329270357203,
+      "grad_norm": 0.83203125,
+      "learning_rate": 3.527030883473055e-06,
+      "loss": 1.7054462432861328,
+      "step": 940
+    },
+    {
+      "epoch": 0.643304553919079,
+      "grad_norm": 0.890625,
+      "learning_rate": 3.414504210471421e-06,
+      "loss": 1.7200759887695312,
+      "step": 950
+    },
+    {
+      "epoch": 0.6500761808024378,
+      "grad_norm": 0.890625,
+      "learning_rate": 3.302863457769544e-06,
+      "loss": 1.6951274871826172,
+      "step": 960
+    },
+    {
+      "epoch": 0.6568478076857965,
+      "grad_norm": 0.90625,
+      "learning_rate": 3.192171006365061e-06,
+      "loss": 1.7151849746704102,
+      "step": 970
+    },
+    {
+      "epoch": 0.6636194345691553,
+      "grad_norm": 0.8984375,
+      "learning_rate": 3.0824887073775877e-06,
+      "loss": 1.713322067260742,
+      "step": 980
+    },
+    {
+      "epoch": 0.6703910614525139,
+      "grad_norm": 0.83984375,
+      "learning_rate": 2.973877847488451e-06,
+      "loss": 1.7172536849975586,
+      "step": 990
+    },
+    {
+      "epoch": 0.6771626883358727,
+      "grad_norm": 0.859375,
+      "learning_rate": 2.8663991146958064e-06,
+      "loss": 1.7149576187133788,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6771626883358727,
+      "eval_loss": 1.7007688283920288,
+      "eval_runtime": 165.432,
+      "eval_samples_per_second": 5.785,
+      "eval_steps_per_second": 0.725,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6839343152192314,
+      "grad_norm": 0.90625,
+      "learning_rate": 2.7601125644042777e-06,
+      "loss": 1.714142417907715,
+      "step": 1010
+    },
+    {
+      "epoch": 0.6907059421025902,
+      "grad_norm": 0.859375,
+      "learning_rate": 2.6550775858680793e-06,
+      "loss": 1.7104360580444335,
+      "step": 1020
+    },
+    {
+      "epoch": 0.6974775689859489,
+      "grad_norm": 0.90234375,
+      "learning_rate": 2.551352869006338e-06,
+      "loss": 1.7032684326171874,
+      "step": 1030
+    },
+    {
+      "epoch": 0.7042491958693076,
+      "grad_norm": 0.86328125,
+      "learning_rate": 2.4489963716092096e-06,
+      "loss": 1.701323890686035,
+      "step": 1040
+    },
+    {
+      "epoch": 0.7110208227526663,
+      "grad_norm": 0.890625,
+      "learning_rate": 2.348065286953048e-06,
+      "loss": 1.7169862747192384,
+      "step": 1050
+    },
+    {
+      "epoch": 0.7177924496360251,
+      "grad_norm": 0.87890625,
+      "learning_rate": 2.2486160118427958e-06,
+      "loss": 1.701096534729004,
+      "step": 1060
+    },
+    {
+      "epoch": 0.7245640765193838,
+      "grad_norm": 0.88671875,
+      "learning_rate": 2.1507041150993813e-06,
+      "loss": 1.700172233581543,
+      "step": 1070
+    },
+    {
+      "epoch": 0.7313357034027425,
+      "grad_norm": 0.859375,
+      "learning_rate": 2.054384306509794e-06,
+      "loss": 1.7045093536376954,
+      "step": 1080
+    },
+    {
+      "epoch": 0.7381073302861012,
+      "grad_norm": 0.859375,
+      "learning_rate": 1.9597104062571337e-06,
+      "loss": 1.7091920852661133,
+      "step": 1090
+    },
+    {
+      "epoch": 0.74487895716946,
+      "grad_norm": 0.86328125,
+      "learning_rate": 1.8667353148477547e-06,
+      "loss": 1.7001871109008788,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7516505840528187,
+      "grad_norm": 0.85546875,
+      "learning_rate": 1.7755109835522938e-06,
+      "loss": 1.7016315460205078,
+      "step": 1110
+    },
+    {
+      "epoch": 0.7584222109361775,
+      "grad_norm": 0.87890625,
+      "learning_rate": 1.6860883853770848e-06,
+      "loss": 1.7196449279785155,
+      "step": 1120
+    },
+    {
+      "epoch": 0.7651938378195361,
+      "grad_norm": 0.89453125,
+      "learning_rate": 1.5985174865822146e-06,
+      "loss": 1.701955223083496,
+      "step": 1130
+    },
+    {
+      "epoch": 0.7719654647028948,
+      "grad_norm": 0.85546875,
+      "learning_rate": 1.5128472187620886e-06,
+      "loss": 1.703407096862793,
+      "step": 1140
+    },
+    {
+      "epoch": 0.7787370915862536,
+      "grad_norm": 0.875,
+      "learning_rate": 1.4291254515041592e-06,
+      "loss": 1.7057323455810547,
+      "step": 1150
+    },
+    {
+      "epoch": 0.7855087184696123,
+      "grad_norm": 0.8828125,
+      "learning_rate": 1.3473989656410413e-06,
+      "loss": 1.6963571548461913,
+      "step": 1160
+    },
+    {
+      "epoch": 0.7922803453529711,
+      "grad_norm": 0.8671875,
+      "learning_rate": 1.2677134271110082e-06,
+      "loss": 1.7136796951293944,
+      "step": 1170
+    },
+    {
+      "epoch": 0.7990519722363297,
+      "grad_norm": 0.89453125,
+      "learning_rate": 1.1901133614414352e-06,
+      "loss": 1.7095062255859375,
+      "step": 1180
+    },
+    {
+      "epoch": 0.8058235991196885,
+      "grad_norm": 0.875,
+      "learning_rate": 1.114642128869473e-06,
+      "loss": 1.7052017211914063,
+      "step": 1190
+    },
+    {
+      "epoch": 0.8125952260030472,
+      "grad_norm": 0.8984375,
+      "learning_rate": 1.0413419001138525e-06,
+      "loss": 1.7166055679321288,
+      "step": 1200
+    },
+    {
+      "epoch": 0.819366852886406,
+      "grad_norm": 0.87890625,
+      "learning_rate": 9.702536328113305e-07,
+      "loss": 1.7042055130004883,
+      "step": 1210
+    },
+    {
+      "epoch": 0.8261384797697647,
+      "grad_norm": 0.8671875,
+      "learning_rate": 9.014170486309875e-07,
+      "loss": 1.6885286331176759,
+      "step": 1220
+    },
+    {
+      "epoch": 0.8329101066531234,
+      "grad_norm": 0.84375,
+      "learning_rate": 8.348706110791238e-07,
+      "loss": 1.7065910339355468,
+      "step": 1230
+    },
+    {
+      "epoch": 0.8396817335364821,
+      "grad_norm": 0.87109375,
+      "learning_rate": 7.706515040071854e-07,
+      "loss": 1.6999498367309571,
+      "step": 1240
+    },
+    {
+      "epoch": 0.8464533604198409,
+      "grad_norm": 0.8828125,
+      "learning_rate": 7.08795610834706e-07,
+      "loss": 1.7021600723266601,
+      "step": 1250
+    },
+    {
+      "epoch": 0.8532249873031996,
+      "grad_norm": 0.87890625,
+      "learning_rate": 6.493374944988984e-07,
+      "loss": 1.722920799255371,
+      "step": 1260
+    },
+    {
+      "epoch": 0.8599966141865584,
+      "grad_norm": 0.8671875,
+      "learning_rate": 5.923103781420708e-07,
+      "loss": 1.7148597717285157,
+      "step": 1270
+    },
+    {
+      "epoch": 0.866768241069917,
+      "grad_norm": 0.890625,
+      "learning_rate": 5.377461265476868e-07,
+      "loss": 1.7151250839233398,
+      "step": 1280
+    },
+    {
+      "epoch": 0.8735398679532758,
+      "grad_norm": 0.8671875,
+      "learning_rate": 4.856752283354277e-07,
+      "loss": 1.7023918151855468,
+      "step": 1290
+    },
+    {
+      "epoch": 0.8803114948366345,
+      "grad_norm": 0.8671875,
+      "learning_rate": 4.3612677892519496e-07,
+      "loss": 1.7045417785644532,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8870831217199933,
+      "grad_norm": 0.86328125,
+      "learning_rate": 3.891284642796045e-07,
+      "loss": 1.7008039474487304,
+      "step": 1310
+    },
+    {
+      "epoch": 0.8938547486033519,
+      "grad_norm": 0.8671875,
+      "learning_rate": 3.447065454340198e-07,
+      "loss": 1.7126380920410156,
+      "step": 1320
+    },
+    {
+      "epoch": 0.9006263754867107,
+      "grad_norm": 0.88671875,
+      "learning_rate": 3.028858438227966e-07,
+      "loss": 1.7127569198608399,
+      "step": 1330
+    },
+    {
+      "epoch": 0.9073980023700694,
+      "grad_norm": 0.86328125,
+      "learning_rate": 2.636897274099187e-07,
+      "loss": 1.7151193618774414,
+      "step": 1340
+    },
+    {
+      "epoch": 0.9141696292534282,
+      "grad_norm": 0.8515625,
+      "learning_rate": 2.2714009763178945e-07,
+      "loss": 1.704157829284668,
+      "step": 1350
+    },
+    {
+      "epoch": 0.9209412561367869,
+      "grad_norm": 0.87890625,
+      "learning_rate": 1.932573771594648e-07,
+      "loss": 1.7036989212036133,
+      "step": 1360
+    },
+    {
+      "epoch": 0.9277128830201455,
+      "grad_norm": 0.8671875,
+      "learning_rate": 1.6206049848716765e-07,
+      "loss": 1.7044996261596679,
+      "step": 1370
+    },
+    {
+      "epoch": 0.9344845099035043,
+      "grad_norm": 1.109375,
+      "learning_rate": 1.3356689335346728e-07,
+      "loss": 1.7029462814331056,
+      "step": 1380
+    },
+    {
+      "epoch": 0.941256136786863,
+      "grad_norm": 0.91015625,
+      "learning_rate": 1.0779248300102352e-07,
+      "loss": 1.7133670806884767,
+      "step": 1390
+    },
+    {
+      "epoch": 0.9480277636702218,
+      "grad_norm": 0.859375,
+      "learning_rate": 8.475166928034684e-08,
+      "loss": 1.6992549896240234,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9547993905535805,
+      "grad_norm": 0.85546875,
+      "learning_rate": 6.445732660254056e-08,
+      "loss": 1.7066579818725587,
+      "step": 1410
+    },
+    {
+      "epoch": 0.9615710174369392,
+      "grad_norm": 0.9140625,
+      "learning_rate": 4.692079474552691e-08,
+      "loss": 1.6963106155395509,
+      "step": 1420
+    },
+    {
+      "epoch": 0.9683426443202979,
+      "grad_norm": 0.8515625,
+      "learning_rate": 3.2151872517767194e-08,
+      "loss": 1.7118385314941407,
+      "step": 1430
+    },
+    {
+      "epoch": 0.9751142712036567,
+      "grad_norm": 0.84375,
+      "learning_rate": 2.0158812283030403e-08,
+      "loss": 1.6870197296142577,
+      "step": 1440
+    },
+    {
+      "epoch": 0.9818858980870154,
+      "grad_norm": 0.87109375,
+      "learning_rate": 1.094831534925289e-08,
+      "loss": 1.7051671981811523,
+      "step": 1450
+    },
+    {
+      "epoch": 0.9886575249703742,
+      "grad_norm": 0.86328125,
+      "learning_rate": 4.5255282240802554e-09,
+      "loss": 1.7082006454467773,
+      "step": 1460
+    },
+    {
+      "epoch": 0.9954291518537328,
+      "grad_norm": 0.8828125,
+      "learning_rate": 8.940397391787869e-10,
+      "loss": 1.707107162475586,
+      "step": 1470
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.7002202272415161,
+      "eval_runtime": 169.1979,
+      "eval_samples_per_second": 5.656,
+      "eval_steps_per_second": 0.709,
+      "step": 1477
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1477,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.103177196962902e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/checkpoint-1477/training_args.bin b/checkpoint-1477/training_args.bin
new file mode 100644
index 0000000..70b9526
--- /dev/null
+++ b/checkpoint-1477/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:021e20fabb8f12442e13effbcc63f0a47b25ed87f82c678b87ee5792f87ef9bc
+size 5777
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..cbfe8ec
--- /dev/null
+++ b/config.json
@@ -0,0 +1,74 @@
+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": null,
+  "dtype": "bfloat16",
+  "eos_token_id": 151643,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 36,
+  "model_name": "unsloth/Qwen3-4B-Base",
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "pad_token_id": 151669,
+  "rms_norm_eps": 1e-06,
+  "rope_parameters": {
+    "rope_theta": 1000000,
+    "rope_type": "default"
+  },
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "5.5.3",
+  "unsloth_fixed": true,
+  "unsloth_version": "2026.4.4",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
diff --git a/eval/eval_results_final.json b/eval/eval_results_final.json
new file mode 100644
index 0000000..6243158
--- /dev/null
+++ b/eval/eval_results_final.json
@@ -0,0 +1,15657 @@
+{
+  "model_path": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+  "ppl": null,
+  "base_ppl": null,
+  "benchmarks": {
+    "cpt": {
+      "results": {
+        "arc_challenge": {
+          "alias": "arc_challenge",
+          "acc,none": 0.4825,
+          "acc_stderr,none": 0.025015972341295333,
+          "acc_norm,none": 0.5325,
+          "acc_norm_stderr,none": 0.024978374105060028
+        },
+        "arc_easy": {
+          "alias": "arc_easy",
+          "acc,none": 0.78,
+          "acc_stderr,none": 0.020738254217024313,
+          "acc_norm,none": 0.795,
+          "acc_norm_stderr,none": 0.020210359883399975
+        },
+        "hellaswag": {
+          "alias": "hellaswag",
+          "acc,none": 0.4975,
+          "acc_stderr,none": 0.025030995822773405,
+          "acc_norm,none": 0.63,
+          "acc_norm_stderr,none": 0.024170447375168467
+        },
+        "kmmlu": {
+          "acc,none": 0.4692806221646144,
+          "acc_stderr,none": 0.0039182515413587,
+          "alias": "kmmlu"
+        },
+        "kmmlu_applied_science": {
+          "acc,none": 0.45375,
+          "acc_stderr,none": 0.007111885914543827,
+          "alias": " - kmmlu_applied_science"
+        },
+        "kmmlu_aviation_engineering_and_maintenance": {
+          "alias": "  - kmmlu_aviation_engineering_and_maintenance",
+          "acc,none": 0.46,
+          "acc_stderr,none": 0.024951079956135092
+        },
+        "kmmlu_electronics_engineering": {
+          "alias": "  - kmmlu_electronics_engineering",
+          "acc,none": 0.6275,
+          "acc_stderr,none": 0.0242038000082031
+        },
+        "kmmlu_energy_management": {
+          "alias": "  - kmmlu_energy_management",
+          "acc,none": 0.395,
+          "acc_stderr,none": 0.0244731452227279
+        },
+        "kmmlu_environmental_science": {
+          "alias": "  - kmmlu_environmental_science",
+          "acc,none": 0.37,
+          "acc_stderr,none": 0.024170447375168453
+        },
+        "kmmlu_gas_technology_and_engineering": {
+          "alias": "  - kmmlu_gas_technology_and_engineering",
+          "acc,none": 0.405,
+          "acc_stderr,none": 0.024575340657273674
+        },
+        "kmmlu_geomatics": {
+          "alias": "  - kmmlu_geomatics",
+          "acc,none": 0.425,
+          "acc_stderr,none": 0.024748104405776187
+        },
+        "kmmlu_industrial_engineer": {
+          "alias": "  - kmmlu_industrial_engineer",
+          "acc,none": 0.4275,
+          "acc_stderr,none": 0.024766769210836766
+        },
+        "kmmlu_machine_design_and_manufacturing": {
+          "alias": "  - kmmlu_machine_design_and_manufacturing",
+          "acc,none": 0.4975,
+          "acc_stderr,none": 0.025030995822773395
+        },
+        "kmmlu_maritime_engineering": {
+          "alias": "  - kmmlu_maritime_engineering",
+          "acc,none": 0.4075,
+          "acc_stderr,none": 0.02459923129797198
+        },
+        "kmmlu_nondestructive_testing": {
+          "alias": "  - kmmlu_nondestructive_testing",
+          "acc,none": 0.475,
+          "acc_stderr,none": 0.024999999999999994
+        },
+        "kmmlu_railway_and_automotive_engineering": {
+          "alias": "  - kmmlu_railway_and_automotive_engineering",
+          "acc,none": 0.3825,
+          "acc_stderr,none": 0.024330316186072946
+        },
+        "kmmlu_telecommunications_and_wireless_technology": {
+          "alias": "  - kmmlu_telecommunications_and_wireless_technology",
+          "acc,none": 0.5725,
+          "acc_stderr,none": 0.02476676921083677
+        },
+        "kmmlu_humss": {
+          "acc,none": 0.4776556776556777,
+          "acc_stderr,none": 0.00943997794327789,
+          "alias": " - kmmlu_humss"
+        },
+        "kmmlu_accounting": {
+          "alias": "  - kmmlu_accounting",
+          "acc,none": 0.5,
+          "acc_stderr,none": 0.050251890762960605
+        },
+        "kmmlu_criminal_law": {
+          "alias": "  - kmmlu_criminal_law",
+          "acc,none": 0.39,
+          "acc_stderr,none": 0.03457567623250012
+        },
+        "kmmlu_economics": {
+          "alias": "  - kmmlu_economics",
+          "acc,none": 0.5461538461538461,
+          "acc_stderr,none": 0.04383459241436368
+        },
+        "kmmlu_education": {
+          "alias": "  - kmmlu_education",
+          "acc,none": 0.64,
+          "acc_stderr,none": 0.048241815132442176
+        },
+        "kmmlu_korean_history": {
+          "alias": "  - kmmlu_korean_history",
+          "acc,none": 0.3,
+          "acc_stderr,none": 0.046056618647183814
+        },
+        "kmmlu_law": {
+          "alias": "  - kmmlu_law",
+          "acc,none": 0.375,
+          "acc_stderr,none": 0.02423646044779629
+        },
+        "kmmlu_management": {
+          "alias": "  - kmmlu_management",
+          "acc,none": 0.5225,
+          "acc_stderr,none": 0.02500595167250431
+        },
+        "kmmlu_political_science_and_sociology": {
+          "alias": "  - kmmlu_political_science_and_sociology",
+          "acc,none": 0.55,
+          "acc_stderr,none": 0.02877080459987894
+        },
+        "kmmlu_psychology": {
+          "alias": "  - kmmlu_psychology",
+          "acc,none": 0.45,
+          "acc_stderr,none": 0.024905837706844923
+        },
+        "kmmlu_social_welfare": {
+          "alias": "  - kmmlu_social_welfare",
+          "acc,none": 0.57,
+          "acc_stderr,none": 0.02478478796128207
+        },
+        "kmmlu_taxation": {
+          "alias": "  - kmmlu_taxation",
+          "acc,none": 0.395,
+          "acc_stderr,none": 0.03465370682892271
+        },
+        "kmmlu_other": {
+          "acc,none": 0.4697222222222222,
+          "acc_stderr,none": 0.008043980393376315,
+          "alias": " - kmmlu_other"
+        },
+        "kmmlu_agricultural_sciences": {
+          "alias": "  - kmmlu_agricultural_sciences",
+          "acc,none": 0.3625,
+          "acc_stderr,none": 0.024066207238097735
+        },
+        "kmmlu_construction": {
+          "alias": "  - kmmlu_construction",
+          "acc,none": 0.4,
+          "acc_stderr,none": 0.024525573579398552
+        },
+        "kmmlu_fashion": {
+          "alias": "  - kmmlu_fashion",
+          "acc,none": 0.45,
+          "acc_stderr,none": 0.024905837706844923
+        },
+        "kmmlu_food_processing": {
+          "alias": "  - kmmlu_food_processing",
+          "acc,none": 0.3675,
+          "acc_stderr,none": 0.024136399679191744
+        },
+        "kmmlu_health": {
+          "alias": "  - kmmlu_health",
+          "acc,none": 0.58,
+          "acc_stderr,none": 0.049604496374885836
+        },
+        "kmmlu_interior_architecture_and_design": {
+          "alias": "  - kmmlu_interior_architecture_and_design",
+          "acc,none": 0.6175,
+          "acc_stderr,none": 0.024330316186072936
+        },
+        "kmmlu_marketing": {
+          "alias": "  - kmmlu_marketing",
+          "acc,none": 0.765,
+          "acc_stderr,none": 0.021226490755055
+        },
+        "kmmlu_patent": {
+          "alias": "  - kmmlu_patent",
+          "acc,none": 0.42,
+          "acc_stderr,none": 0.049604496374885836
+        },
+        "kmmlu_public_safety": {
+          "alias": "  - kmmlu_public_safety",
+          "acc,none": 0.38,
+          "acc_stderr,none": 0.024299715851758236
+        },
+        "kmmlu_real_estate": {
+          "alias": "  - kmmlu_real_estate",
+          "acc,none": 0.45,
+          "acc_stderr,none": 0.03526639466921485
+        },
+        "kmmlu_refrigerating_machinery": {
+          "alias": "  - kmmlu_refrigerating_machinery",
+          "acc,none": 0.41,
+          "acc_stderr,none": 0.02462246259333947
+        },
+        "kmmlu_stem": {
+          "acc,none": 0.48093023255813955,
+          "acc_stderr,none": 0.007306868046626305,
+          "alias": " - kmmlu_stem"
+        },
+        "kmmlu_biology": {
+          "alias": "  - kmmlu_biology",
+          "acc,none": 0.3125,
+          "acc_stderr,none": 0.023204644228784484
+        },
+        "kmmlu_chemical_engineering": {
+          "alias": "  - kmmlu_chemical_engineering",
+          "acc,none": 0.4875,
+          "acc_stderr,none": 0.025023485209500245
+        },
+        "kmmlu_chemistry": {
+          "alias": "  - kmmlu_chemistry",
+          "acc,none": 0.5175,
+          "acc_stderr,none": 0.025015972341295323
+        },
+        "kmmlu_civil_engineering": {
+          "alias": "  - kmmlu_civil_engineering",
+          "acc,none": 0.3925,
+          "acc_stderr,none": 0.024445927747963322
+        },
+        "kmmlu_computer_science": {
+          "alias": "  - kmmlu_computer_science",
+          "acc,none": 0.74,
+          "acc_stderr,none": 0.021959178349484305
+        },
+        "kmmlu_ecology": {
+          "alias": "  - kmmlu_ecology",
+          "acc,none": 0.505,
+          "acc_stderr,none": 0.02503005711936146
+        },
+        "kmmlu_electrical_engineering": {
+          "alias": "  - kmmlu_electrical_engineering",
+          "acc,none": 0.3425,
+          "acc_stderr,none": 0.02375700661717548
+        },
+        "kmmlu_information_technology": {
+          "alias": "  - kmmlu_information_technology",
+          "acc,none": 0.7525,
+          "acc_stderr,none": 0.021605006729678956
+        },
+        "kmmlu_materials_engineering": {
+          "alias": "  - kmmlu_materials_engineering",
+          "acc,none": 0.475,
+          "acc_stderr,none": 0.025
+        },
+        "kmmlu_math": {
+          "alias": "  - kmmlu_math",
+          "acc,none": 0.3333333333333333,
+          "acc_stderr,none": 0.027262027336984393
+        },
+        "kmmlu_mechanical_engineering": {
+          "alias": "  - kmmlu_mechanical_engineering",
+          "acc,none": 0.395,
+          "acc_stderr,none": 0.0244731452227279
+        },
+        "kobest_boolq": {
+          "alias": "kobest_boolq",
+          "acc,none": 0.755,
+          "acc_stderr,none": 0.02153129097913247,
+          "f1,none": 0.7379609080456697,
+          "f1_stderr,none": "N/A"
+        },
+        "kobest_copa": {
+          "alias": "kobest_copa",
+          "acc,none": 0.6525,
+          "acc_stderr,none": 0.023838625698390636,
+          "f1,none": 0.6523935455233165,
+          "f1_stderr,none": "N/A"
+        },
+        "kobest_hellaswag": {
+          "alias": "kobest_hellaswag",
+          "acc,none": 0.4325,
+          "acc_stderr,none": 0.024802162065186355,
+          "f1,none": 0.4264529493583016,
+          "f1_stderr,none": "N/A",
+          "acc_norm,none": 0.565,
+          "acc_norm_stderr,none": 0.024818892876375884
+        },
+        "mmlu": {
+          "acc,none": 0.7352865587252634,
+          "acc_stderr,none": 0.003887849176172822,
+          "alias": "mmlu"
+        },
+        "mmlu_humanities": {
+          "acc,none": 0.6862808842652796,
+          "acc_stderr,none": 0.0077616777391173045,
+          "alias": " - humanities"
+        },
+        "mmlu_formal_logic": {
+          "alias": "  - formal_logic",
+          "acc,none": 0.5873015873015873,
+          "acc_stderr,none": 0.04403438954768177
+        },
+        "mmlu_high_school_european_history": {
+          "alias": "  - high_school_european_history",
+          "acc,none": 0.7818181818181819,
+          "acc_stderr,none": 0.03225078108306289
+        },
+        "mmlu_high_school_us_history": {
+          "alias": "  - high_school_us_history",
+          "acc,none": 0.8186274509803921,
+          "acc_stderr,none": 0.02704462171947408
+        },
+        "mmlu_high_school_world_history": {
+          "alias": "  - high_school_world_history",
+          "acc,none": 0.8481012658227848,
+          "acc_stderr,none": 0.023363878096632453
+        },
+        "mmlu_international_law": {
+          "alias": "  - international_law",
+          "acc,none": 0.8264462809917356,
+          "acc_stderr,none": 0.0345727283691767
+        },
+        "mmlu_jurisprudence": {
+          "alias": "  - jurisprudence",
+          "acc,none": 0.8148148148148148,
+          "acc_stderr,none": 0.03755265865037183
+        },
+        "mmlu_logical_fallacies": {
+          "alias": "  - logical_fallacies",
+          "acc,none": 0.8466257668711656,
+          "acc_stderr,none": 0.02831160144143859
+        },
+        "mmlu_moral_disputes": {
+          "alias": "  - moral_disputes",
+          "acc,none": 0.7543352601156069,
+          "acc_stderr,none": 0.023176298203992005
+        },
+        "mmlu_moral_scenarios": {
+          "alias": "  - moral_scenarios",
+          "acc,none": 0.3225,
+          "acc_stderr,none": 0.023400926978618716
+        },
+        "mmlu_philosophy": {
+          "alias": "  - philosophy",
+          "acc,none": 0.7331189710610932,
+          "acc_stderr,none": 0.025122637608816636
+        },
+        "mmlu_prehistory": {
+          "alias": "  - prehistory",
+          "acc,none": 0.7870370370370371,
+          "acc_stderr,none": 0.02277971908873339
+        },
+        "mmlu_professional_law": {
+          "alias": "  - professional_law",
+          "acc,none": 0.5075,
+          "acc_stderr,none": 0.02502849253543831
+        },
+        "mmlu_world_religions": {
+          "alias": "  - world_religions",
+          "acc,none": 0.8070175438596491,
+          "acc_stderr,none": 0.030267457554898458
+        },
+        "mmlu_other": {
+          "acc,none": 0.7415565345080763,
+          "acc_stderr,none": 0.008104267812218218,
+          "alias": " - other"
+        },
+        "mmlu_business_ethics": {
+          "alias": "  - business_ethics",
+          "acc,none": 0.76,
+          "acc_stderr,none": 0.04292346959909282
+        },
+        "mmlu_clinical_knowledge": {
+          "alias": "  - clinical_knowledge",
+          "acc,none": 0.769811320754717,
+          "acc_stderr,none": 0.025907897122408173
+        },
+        "mmlu_college_medicine": {
+          "alias": "  - college_medicine",
+          "acc,none": 0.7456647398843931,
+          "acc_stderr,none": 0.0332055644308557
+        },
+        "mmlu_global_facts": {
+          "alias": "  - global_facts",
+          "acc,none": 0.44,
+          "acc_stderr,none": 0.0498887651569859
+        },
+        "mmlu_human_aging": {
+          "alias": "  - human_aging",
+          "acc,none": 0.7399103139013453,
+          "acc_stderr,none": 0.029442495585857473
+        },
+        "mmlu_management": {
+          "alias": "  - management",
+          "acc,none": 0.8640776699029126,
+          "acc_stderr,none": 0.0339329572976101
+        },
+        "mmlu_marketing": {
+          "alias": "  - marketing",
+          "acc,none": 0.8931623931623932,
+          "acc_stderr,none": 0.020237149008990932
+        },
+        "mmlu_medical_genetics": {
+          "alias": "  - medical_genetics",
+          "acc,none": 0.8,
+          "acc_stderr,none": 0.04020151261036846
+        },
+        "mmlu_miscellaneous": {
+          "alias": "  - miscellaneous",
+          "acc,none": 0.8225,
+          "acc_stderr,none": 0.019128489820344343
+        },
+        "mmlu_nutrition": {
+          "alias": "  - nutrition",
+          "acc,none": 0.7777777777777778,
+          "acc_stderr,none": 0.02380518652488816
+        },
+        "mmlu_professional_accounting": {
+          "alias": "  - professional_accounting",
+          "acc,none": 0.574468085106383,
+          "acc_stderr,none": 0.029494827600144366
+        },
+        "mmlu_professional_medicine": {
+          "alias": "  - professional_medicine",
+          "acc,none": 0.7757352941176471,
+          "acc_stderr,none": 0.02533684856333236
+        },
+        "mmlu_virology": {
+          "alias": "  - virology",
+          "acc,none": 0.5060240963855421,
+          "acc_stderr,none": 0.038922121953330446
+        },
+        "mmlu_social_sciences": {
+          "acc,none": 0.8158088235294118,
+          "acc_stderr,none": 0.007306038192044323,
+          "alias": " - social sciences"
+        },
+        "mmlu_econometrics": {
+          "alias": "  - econometrics",
+          "acc,none": 0.6578947368421053,
+          "acc_stderr,none": 0.04462917535336937
+        },
+        "mmlu_high_school_geography": {
+          "alias": "  - high_school_geography",
+          "acc,none": 0.8585858585858586,
+          "acc_stderr,none": 0.02482590979334335
+        },
+        "mmlu_high_school_government_and_politics": {
+          "alias": "  - high_school_government_and_politics",
+          "acc,none": 0.8704663212435233,
+          "acc_stderr,none": 0.024233532297758716
+        },
+        "mmlu_high_school_macroeconomics": {
+          "alias": "  - high_school_macroeconomics",
+          "acc,none": 0.8076923076923077,
+          "acc_stderr,none": 0.019982347208637296
+        },
+        "mmlu_high_school_microeconomics": {
+          "alias": "  - high_school_microeconomics",
+          "acc,none": 0.8991596638655462,
+          "acc_stderr,none": 0.019559663430480802
+        },
+        "mmlu_high_school_psychology": {
+          "alias": "  - high_school_psychology",
+          "acc,none": 0.905,
+          "acc_stderr,none": 0.014679107277903242
+        },
+        "mmlu_human_sexuality": {
+          "alias": "  - human_sexuality",
+          "acc,none": 0.7786259541984732,
+          "acc_stderr,none": 0.03641297081313729
+        },
+        "mmlu_professional_psychology": {
+          "alias": "  - professional_psychology",
+          "acc,none": 0.74,
+          "acc_stderr,none": 0.02195917834948431
+        },
+        "mmlu_public_relations": {
+          "alias": "  - public_relations",
+          "acc,none": 0.6727272727272727,
+          "acc_stderr,none": 0.0449429086625209
+        },
+        "mmlu_security_studies": {
+          "alias": "  - security_studies",
+          "acc,none": 0.7428571428571429,
+          "acc_stderr,none": 0.027979823538744546
+        },
+        "mmlu_sociology": {
+          "alias": "  - sociology",
+          "acc,none": 0.8557213930348259,
+          "acc_stderr,none": 0.02484575321230605
+        },
+        "mmlu_us_foreign_policy": {
+          "alias": "  - us_foreign_policy",
+          "acc,none": 0.89,
+          "acc_stderr,none": 0.03144660377352203
+        },
+        "mmlu_stem": {
+          "acc,none": 0.7082143989850935,
+          "acc_stderr,none": 0.007816574368205405,
+          "alias": " - stem"
+        },
+        "mmlu_abstract_algebra": {
+          "alias": "  - abstract_algebra",
+          "acc,none": 0.46,
+          "acc_stderr,none": 0.05009082659620333
+        },
+        "mmlu_anatomy": {
+          "alias": "  - anatomy",
+          "acc,none": 0.7111111111111111,
+          "acc_stderr,none": 0.0391545063041425
+        },
+        "mmlu_astronomy": {
+          "alias": "  - astronomy",
+          "acc,none": 0.8486842105263158,
+          "acc_stderr,none": 0.029162631596843975
+        },
+        "mmlu_college_biology": {
+          "alias": "  - college_biology",
+          "acc,none": 0.8263888888888888,
+          "acc_stderr,none": 0.03167473383795717
+        },
+        "mmlu_college_chemistry": {
+          "alias": "  - college_chemistry",
+          "acc,none": 0.52,
+          "acc_stderr,none": 0.050211673156867795
+        },
+        "mmlu_college_computer_science": {
+          "alias": "  - college_computer_science",
+          "acc,none": 0.68,
+          "acc_stderr,none": 0.04688261722621504
+        },
+        "mmlu_college_mathematics": {
+          "alias": "  - college_mathematics",
+          "acc,none": 0.53,
+          "acc_stderr,none": 0.05016135580465919
+        },
+        "mmlu_college_physics": {
+          "alias": "  - college_physics",
+          "acc,none": 0.5784313725490197,
+          "acc_stderr,none": 0.049135952012745045
+        },
+        "mmlu_computer_security": {
+          "alias": "  - computer_security",
+          "acc,none": 0.83,
+          "acc_stderr,none": 0.03775251680686371
+        },
+        "mmlu_conceptual_physics": {
+          "alias": "  - conceptual_physics",
+          "acc,none": 0.8,
+          "acc_stderr,none": 0.026148818018424506
+        },
+        "mmlu_electrical_engineering": {
+          "alias": "  - electrical_engineering",
+          "acc,none": 0.7586206896551724,
+          "acc_stderr,none": 0.03565998174135302
+        },
+        "mmlu_elementary_mathematics": {
+          "alias": "  - elementary_mathematics",
+          "acc,none": 0.6746031746031746,
+          "acc_stderr,none": 0.024130158299762613
+        },
+        "mmlu_high_school_biology": {
+          "alias": "  - high_school_biology",
+          "acc,none": 0.9,
+          "acc_stderr,none": 0.017066403719657258
+        },
+        "mmlu_high_school_chemistry": {
+          "alias": "  - high_school_chemistry",
+          "acc,none": 0.729064039408867,
+          "acc_stderr,none": 0.03127090713297698
+        },
+        "mmlu_high_school_computer_science": {
+          "alias": "  - high_school_computer_science",
+          "acc,none": 0.85,
+          "acc_stderr,none": 0.0358870281282637
+        },
+        "mmlu_high_school_mathematics": {
+          "alias": "  - high_school_mathematics",
+          "acc,none": 0.5296296296296297,
+          "acc_stderr,none": 0.030431963547936584
+        },
+        "mmlu_high_school_physics": {
+          "alias": "  - high_school_physics",
+          "acc,none": 0.6754966887417219,
+          "acc_stderr,none": 0.03822746937658752
+        },
+        "mmlu_high_school_statistics": {
+          "alias": "  - high_school_statistics",
+          "acc,none": 0.7037037037037037,
+          "acc_stderr,none": 0.031141447823536044
+        },
+        "mmlu_machine_learning": {
+          "alias": "  - machine_learning",
+          "acc,none": 0.5892857142857143,
+          "acc_stderr,none": 0.04669510663875191
+        },
+        "winogrande": {
+          "alias": "winogrande",
+          "acc,none": 0.7225,
+          "acc_stderr,none": 0.022416302137144652
+        }
+      },
+      "groups": {
+        "kmmlu": {
+          "acc,none": 0.4692806221646144,
+          "acc_stderr,none": 0.0039182515413587,
+          "alias": "kmmlu"
+        },
+        "kmmlu_applied_science": {
+          "acc,none": 0.45375,
+          "acc_stderr,none": 0.007111885914543827,
+          "alias": " - kmmlu_applied_science"
+        },
+        "kmmlu_humss": {
+          "acc,none": 0.4776556776556777,
+          "acc_stderr,none": 0.00943997794327789,
+          "alias": " - kmmlu_humss"
+        },
+        "kmmlu_other": {
+          "acc,none": 0.4697222222222222,
+          "acc_stderr,none": 0.008043980393376315,
+          "alias": " - kmmlu_other"
+        },
+        "kmmlu_stem": {
+          "acc,none": 0.48093023255813955,
+          "acc_stderr,none": 0.007306868046626305,
+          "alias": " - kmmlu_stem"
+        },
+        "mmlu": {
+          "acc,none": 0.7352865587252634,
+          "acc_stderr,none": 0.003887849176172822,
+          "alias": "mmlu"
+        },
+        "mmlu_humanities": {
+          "acc,none": 0.6862808842652796,
+          "acc_stderr,none": 0.0077616777391173045,
+          "alias": " - humanities"
+        },
+        "mmlu_other": {
+          "acc,none": 0.7415565345080763,
+          "acc_stderr,none": 0.008104267812218218,
+          "alias": " - other"
+        },
+        "mmlu_social_sciences": {
+          "acc,none": 0.8158088235294118,
+          "acc_stderr,none": 0.007306038192044323,
+          "alias": " - social sciences"
+        },
+        "mmlu_stem": {
+          "acc,none": 0.7082143989850935,
+          "acc_stderr,none": 0.007816574368205405,
+          "alias": " - stem"
+        }
+      },
+      "group_subtasks": {
+        "mmlu_humanities": [
+          "mmlu_formal_logic",
+          "mmlu_high_school_european_history",
+          "mmlu_high_school_us_history",
+          "mmlu_high_school_world_history",
+          "mmlu_international_law",
+          "mmlu_jurisprudence",
+          "mmlu_logical_fallacies",
+          "mmlu_moral_disputes",
+          "mmlu_moral_scenarios",
+          "mmlu_philosophy",
+          "mmlu_prehistory",
+          "mmlu_professional_law",
+          "mmlu_world_religions"
+        ],
+        "mmlu_social_sciences": [
+          "mmlu_econometrics",
+          "mmlu_high_school_geography",
+          "mmlu_high_school_government_and_politics",
+          "mmlu_high_school_macroeconomics",
+          "mmlu_high_school_microeconomics",
+          "mmlu_high_school_psychology",
+          "mmlu_human_sexuality",
+          "mmlu_professional_psychology",
+          "mmlu_public_relations",
+          "mmlu_security_studies",
+          "mmlu_sociology",
+          "mmlu_us_foreign_policy"
+        ],
+        "mmlu_other": [
+          "mmlu_business_ethics",
+          "mmlu_clinical_knowledge",
+          "mmlu_college_medicine",
+          "mmlu_global_facts",
+          "mmlu_human_aging",
+          "mmlu_management",
+          "mmlu_marketing",
+          "mmlu_medical_genetics",
+          "mmlu_miscellaneous",
+          "mmlu_nutrition",
+          "mmlu_professional_accounting",
+          "mmlu_professional_medicine",
+          "mmlu_virology"
+        ],
+        "mmlu_stem": [
+          "mmlu_abstract_algebra",
+          "mmlu_anatomy",
+          "mmlu_astronomy",
+          "mmlu_college_biology",
+          "mmlu_college_chemistry",
+          "mmlu_college_computer_science",
+          "mmlu_college_mathematics",
+          "mmlu_college_physics",
+          "mmlu_computer_security",
+          "mmlu_conceptual_physics",
+          "mmlu_electrical_engineering",
+          "mmlu_elementary_mathematics",
+          "mmlu_high_school_biology",
+          "mmlu_high_school_chemistry",
+          "mmlu_high_school_computer_science",
+          "mmlu_high_school_mathematics",
+          "mmlu_high_school_physics",
+          "mmlu_high_school_statistics",
+          "mmlu_machine_learning"
+        ],
+        "mmlu": [
+          "mmlu_stem",
+          "mmlu_other",
+          "mmlu_social_sciences",
+          "mmlu_humanities"
+        ],
+        "hellaswag": [],
+        "arc_easy": [],
+        "arc_challenge": [],
+        "winogrande": [],
+        "kmmlu_humss": [
+          "kmmlu_accounting",
+          "kmmlu_criminal_law",
+          "kmmlu_economics",
+          "kmmlu_education",
+          "kmmlu_korean_history",
+          "kmmlu_law",
+          "kmmlu_management",
+          "kmmlu_political_science_and_sociology",
+          "kmmlu_psychology",
+          "kmmlu_social_welfare",
+          "kmmlu_taxation"
+        ],
+        "kmmlu_applied_science": [
+          "kmmlu_aviation_engineering_and_maintenance",
+          "kmmlu_electronics_engineering",
+          "kmmlu_energy_management",
+          "kmmlu_environmental_science",
+          "kmmlu_gas_technology_and_engineering",
+          "kmmlu_geomatics",
+          "kmmlu_industrial_engineer",
+          "kmmlu_machine_design_and_manufacturing",
+          "kmmlu_maritime_engineering",
+          "kmmlu_nondestructive_testing",
+          "kmmlu_railway_and_automotive_engineering",
+          "kmmlu_telecommunications_and_wireless_technology"
+        ],
+        "kmmlu_other": [
+          "kmmlu_agricultural_sciences",
+          "kmmlu_construction",
+          "kmmlu_fashion",
+          "kmmlu_food_processing",
+          "kmmlu_health",
+          "kmmlu_interior_architecture_and_design",
+          "kmmlu_marketing",
+          "kmmlu_patent",
+          "kmmlu_public_safety",
+          "kmmlu_real_estate",
+          "kmmlu_refrigerating_machinery"
+        ],
+        "kmmlu_stem": [
+          "kmmlu_biology",
+          "kmmlu_chemical_engineering",
+          "kmmlu_chemistry",
+          "kmmlu_civil_engineering",
+          "kmmlu_computer_science",
+          "kmmlu_ecology",
+          "kmmlu_electrical_engineering",
+          "kmmlu_information_technology",
+          "kmmlu_materials_engineering",
+          "kmmlu_math",
+          "kmmlu_mechanical_engineering"
+        ],
+        "kmmlu": [
+          "kmmlu_stem",
+          "kmmlu_other",
+          "kmmlu_applied_science",
+          "kmmlu_humss"
+        ],
+        "kobest_boolq": [],
+        "kobest_copa": [],
+        "kobest_hellaswag": []
+      },
+      "configs": {
+        "arc_challenge": {
+          "task": "arc_challenge",
+          "tag": [
+            "ai2_arc"
+          ],
+          "dataset_path": "allenai/ai2_arc",
+          "dataset_name": "ARC-Challenge",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "doc_to_text": "Question: {{question}}\nAnswer:",
+          "doc_to_target": "{{choices.label.index(answerKey)}}",
+          "unsafe_code": false,
+          "doc_to_choice": "{{choices.text}}",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "Question: {{question}}\nAnswer:",
+            "doc_to_choice": "{{choices.text}}",
+            "doc_to_target": "{{choices.label.index(answerKey)}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "acc_norm",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": true,
+          "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "arc_easy": {
+          "task": "arc_easy",
+          "tag": [
+            "ai2_arc"
+          ],
+          "dataset_path": "allenai/ai2_arc",
+          "dataset_name": "ARC-Easy",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "doc_to_text": "Question: {{question}}\nAnswer:",
+          "doc_to_target": "{{choices.label.index(answerKey)}}",
+          "unsafe_code": false,
+          "doc_to_choice": "{{choices.text}}",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "Question: {{question}}\nAnswer:",
+            "doc_to_choice": "{{choices.text}}",
+            "doc_to_target": "{{choices.label.index(answerKey)}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "acc_norm",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": true,
+          "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "hellaswag": {
+          "task": "hellaswag",
+          "tag": [
+            "multiple_choice"
+          ],
+          "dataset_path": "Rowan/hellaswag",
+          "training_split": "train",
+          "validation_split": "validation",
+          "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc):\n        ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n        out_doc = {\n            \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n            \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n            \"gold\": int(doc[\"label\"]),\n        }\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
+          "doc_to_text": "{{query}}",
+          "doc_to_target": "{{label}}",
+          "unsafe_code": false,
+          "doc_to_choice": "choices",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": "<function process_docs at 0x77cd4ebe45e0>",
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{query}}",
+            "doc_to_choice": "choices",
+            "doc_to_target": "{{label}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "acc_norm",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_accounting": {
+          "task": "kmmlu_accounting",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Accounting",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_agricultural_sciences": {
+          "task": "kmmlu_agricultural_sciences",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Agricultural-Sciences",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_aviation_engineering_and_maintenance": {
+          "task": "kmmlu_aviation_engineering_and_maintenance",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Aviation-Engineering-and-Maintenance",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_biology": {
+          "task": "kmmlu_biology",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Biology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_chemical_engineering": {
+          "task": "kmmlu_chemical_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Chemical-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_chemistry": {
+          "task": "kmmlu_chemistry",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Chemistry",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_civil_engineering": {
+          "task": "kmmlu_civil_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Civil-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_computer_science": {
+          "task": "kmmlu_computer_science",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Computer-Science",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_construction": {
+          "task": "kmmlu_construction",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Construction",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_criminal_law": {
+          "task": "kmmlu_criminal_law",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Criminal-Law",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_ecology": {
+          "task": "kmmlu_ecology",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Ecology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_economics": {
+          "task": "kmmlu_economics",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Economics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_education": {
+          "task": "kmmlu_education",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Education",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_electrical_engineering": {
+          "task": "kmmlu_electrical_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Electrical-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_electronics_engineering": {
+          "task": "kmmlu_electronics_engineering",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Electronics-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_energy_management": {
+          "task": "kmmlu_energy_management",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Energy-Management",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_environmental_science": {
+          "task": "kmmlu_environmental_science",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Environmental-Science",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_fashion": {
+          "task": "kmmlu_fashion",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Fashion",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_food_processing": {
+          "task": "kmmlu_food_processing",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Food-Processing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_gas_technology_and_engineering": {
+          "task": "kmmlu_gas_technology_and_engineering",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Gas-Technology-and-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_geomatics": {
+          "task": "kmmlu_geomatics",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Geomatics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_health": {
+          "task": "kmmlu_health",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Health",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_industrial_engineer": {
+          "task": "kmmlu_industrial_engineer",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Industrial-Engineer",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_information_technology": {
+          "task": "kmmlu_information_technology",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Information-Technology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_interior_architecture_and_design": {
+          "task": "kmmlu_interior_architecture_and_design",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Interior-Architecture-and-Design",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_korean_history": {
+          "task": "kmmlu_korean_history",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Korean-History",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_law": {
+          "task": "kmmlu_law",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Law",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_machine_design_and_manufacturing": {
+          "task": "kmmlu_machine_design_and_manufacturing",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Machine-Design-and-Manufacturing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_management": {
+          "task": "kmmlu_management",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Management",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_maritime_engineering": {
+          "task": "kmmlu_maritime_engineering",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Maritime-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_marketing": {
+          "task": "kmmlu_marketing",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Marketing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_materials_engineering": {
+          "task": "kmmlu_materials_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Materials-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_math": {
+          "task": "kmmlu_math",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Math",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_mechanical_engineering": {
+          "task": "kmmlu_mechanical_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Mechanical-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_nondestructive_testing": {
+          "task": "kmmlu_nondestructive_testing",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Nondestructive-Testing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_patent": {
+          "task": "kmmlu_patent",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Patent",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_political_science_and_sociology": {
+          "task": "kmmlu_political_science_and_sociology",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Political-Science-and-Sociology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_psychology": {
+          "task": "kmmlu_psychology",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Psychology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_public_safety": {
+          "task": "kmmlu_public_safety",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Public-Safety",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_railway_and_automotive_engineering": {
+          "task": "kmmlu_railway_and_automotive_engineering",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Railway-and-Automotive-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_real_estate": {
+          "task": "kmmlu_real_estate",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Real-Estate",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_refrigerating_machinery": {
+          "task": "kmmlu_refrigerating_machinery",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Refrigerating-Machinery",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_social_welfare": {
+          "task": "kmmlu_social_welfare",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Social-Welfare",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_taxation": {
+          "task": "kmmlu_taxation",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Taxation",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_telecommunications_and_wireless_technology": {
+          "task": "kmmlu_telecommunications_and_wireless_technology",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Telecommunications-and-Wireless-Technology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kobest_boolq": {
+          "task": "kobest_boolq",
+          "dataset_path": "skt/kobest_v1",
+          "dataset_name": "boolq",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ",
+          "doc_to_target": "{{label}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "아니오",
+            "예"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ",
+            "doc_to_choice": [
+              "아니오",
+              "예"
+            ],
+            "doc_to_target": "{{label}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "f1",
+              "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+              "average": "macro",
+              "hf_evaluate": true,
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kobest_copa": {
+          "task": "kobest_copa",
+          "dataset_path": "skt/kobest_v1",
+          "dataset_name": "copa",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "doc_to_text": "def copa_doc_to_text(doc: dict) -> str:\n    connector = {\"원인\": \" 왜냐하면\", \"결과\": \" 그래서\"}[doc[\"question\"].strip()]\n    return f\"\"\"{doc[\"premise\"]} {connector}\"\"\"\n",
+          "doc_to_target": "def copa_doc_to_target(doc: dict) -> str:\n    correct_choice = doc[\"alternative_1\"] if doc[\"label\"] == 0 else doc[\"alternative_2\"]\n    return f\"\"\"{correct_choice}\"\"\"\n",
+          "unsafe_code": false,
+          "doc_to_choice": "def copa_doc_to_choice(doc: dict) -> list:\n    return [f\"\"\"{doc[\"alternative_1\"]}\"\"\", f\"\"\"{doc[\"alternative_2\"]}\"\"\"]\n",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "<function copa_doc_to_text at 0x77cb4011d8a0>",
+            "doc_to_choice": "<function copa_doc_to_choice at 0x77cb4011e3e0>",
+            "doc_to_target": "<function copa_doc_to_target at 0x77cb4011dee0>",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "f1",
+              "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+              "average": "macro",
+              "hf_evaluate": true,
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "kobest_hellaswag": {
+          "task": "kobest_hellaswag",
+          "dataset_path": "skt/kobest_v1",
+          "dataset_name": "hellaswag",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "process_docs": "def hellaswag_process_doc(doc: Dataset) -> Dataset:\n    def preprocessor(dataset):\n        return {\n            \"query\": f\"\"\"문장: {dataset[\"context\"]}\"\"\",\n            \"choices\": [\n                dataset[\"ending_1\"],\n                dataset[\"ending_2\"],\n                dataset[\"ending_3\"],\n                dataset[\"ending_4\"],\n            ],\n            \"gold\": int(dataset[\"label\"]),\n        }\n\n    return doc.map(preprocessor)\n",
+          "doc_to_text": "{{query}}",
+          "doc_to_target": "{{label}}",
+          "unsafe_code": false,
+          "doc_to_choice": "choices",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": "<function hellaswag_process_doc at 0x77cb4011f060>",
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{query}}",
+            "doc_to_choice": "choices",
+            "doc_to_target": "{{label}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "acc_norm",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "f1",
+              "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+              "average": "macro",
+              "hf_evaluate": true,
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_abstract_algebra": {
+          "task": "mmlu_abstract_algebra",
+          "task_alias": "abstract_algebra",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "abstract_algebra",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_anatomy": {
+          "task": "mmlu_anatomy",
+          "task_alias": "anatomy",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "anatomy",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_astronomy": {
+          "task": "mmlu_astronomy",
+          "task_alias": "astronomy",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "astronomy",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_business_ethics": {
+          "task": "mmlu_business_ethics",
+          "task_alias": "business_ethics",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "business_ethics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_clinical_knowledge": {
+          "task": "mmlu_clinical_knowledge",
+          "task_alias": "clinical_knowledge",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "clinical_knowledge",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_biology": {
+          "task": "mmlu_college_biology",
+          "task_alias": "college_biology",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_biology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_chemistry": {
+          "task": "mmlu_college_chemistry",
+          "task_alias": "college_chemistry",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_chemistry",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_computer_science": {
+          "task": "mmlu_college_computer_science",
+          "task_alias": "college_computer_science",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_computer_science",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_mathematics": {
+          "task": "mmlu_college_mathematics",
+          "task_alias": "college_mathematics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_mathematics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_medicine": {
+          "task": "mmlu_college_medicine",
+          "task_alias": "college_medicine",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_medicine",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_physics": {
+          "task": "mmlu_college_physics",
+          "task_alias": "college_physics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_physics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_computer_security": {
+          "task": "mmlu_computer_security",
+          "task_alias": "computer_security",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "computer_security",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_conceptual_physics": {
+          "task": "mmlu_conceptual_physics",
+          "task_alias": "conceptual_physics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "conceptual_physics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_econometrics": {
+          "task": "mmlu_econometrics",
+          "task_alias": "econometrics",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "econometrics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_electrical_engineering": {
+          "task": "mmlu_electrical_engineering",
+          "task_alias": "electrical_engineering",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "electrical_engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_elementary_mathematics": {
+          "task": "mmlu_elementary_mathematics",
+          "task_alias": "elementary_mathematics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "elementary_mathematics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_formal_logic": {
+          "task": "mmlu_formal_logic",
+          "task_alias": "formal_logic",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "formal_logic",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_global_facts": {
+          "task": "mmlu_global_facts",
+          "task_alias": "global_facts",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "global_facts",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_biology": {
+          "task": "mmlu_high_school_biology",
+          "task_alias": "high_school_biology",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_biology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_chemistry": {
+          "task": "mmlu_high_school_chemistry",
+          "task_alias": "high_school_chemistry",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_chemistry",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_computer_science": {
+          "task": "mmlu_high_school_computer_science",
+          "task_alias": "high_school_computer_science",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_computer_science",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_european_history": {
+          "task": "mmlu_high_school_european_history",
+          "task_alias": "high_school_european_history",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_european_history",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_geography": {
+          "task": "mmlu_high_school_geography",
+          "task_alias": "high_school_geography",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_geography",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_government_and_politics": {
+          "task": "mmlu_high_school_government_and_politics",
+          "task_alias": "high_school_government_and_politics",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_government_and_politics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_macroeconomics": {
+          "task": "mmlu_high_school_macroeconomics",
+          "task_alias": "high_school_macroeconomics",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_macroeconomics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_mathematics": {
+          "task": "mmlu_high_school_mathematics",
+          "task_alias": "high_school_mathematics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_mathematics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_microeconomics": {
+          "task": "mmlu_high_school_microeconomics",
+          "task_alias": "high_school_microeconomics",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_microeconomics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_physics": {
+          "task": "mmlu_high_school_physics",
+          "task_alias": "high_school_physics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_physics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_psychology": {
+          "task": "mmlu_high_school_psychology",
+          "task_alias": "high_school_psychology",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_psychology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_statistics": {
+          "task": "mmlu_high_school_statistics",
+          "task_alias": "high_school_statistics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_statistics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_us_history": {
+          "task": "mmlu_high_school_us_history",
+          "task_alias": "high_school_us_history",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_us_history",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_world_history": {
+          "task": "mmlu_high_school_world_history",
+          "task_alias": "high_school_world_history",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_world_history",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_human_aging": {
+          "task": "mmlu_human_aging",
+          "task_alias": "human_aging",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "human_aging",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_human_sexuality": {
+          "task": "mmlu_human_sexuality",
+          "task_alias": "human_sexuality",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "human_sexuality",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_international_law": {
+          "task": "mmlu_international_law",
+          "task_alias": "international_law",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "international_law",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about international law.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_jurisprudence": {
+          "task": "mmlu_jurisprudence",
+          "task_alias": "jurisprudence",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "jurisprudence",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_logical_fallacies": {
+          "task": "mmlu_logical_fallacies",
+          "task_alias": "logical_fallacies",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "logical_fallacies",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_machine_learning": {
+          "task": "mmlu_machine_learning",
+          "task_alias": "machine_learning",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "machine_learning",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_management": {
+          "task": "mmlu_management",
+          "task_alias": "management",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "management",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about management.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_marketing": {
+          "task": "mmlu_marketing",
+          "task_alias": "marketing",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "marketing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_medical_genetics": {
+          "task": "mmlu_medical_genetics",
+          "task_alias": "medical_genetics",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "medical_genetics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_miscellaneous": {
+          "task": "mmlu_miscellaneous",
+          "task_alias": "miscellaneous",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "miscellaneous",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_moral_disputes": {
+          "task": "mmlu_moral_disputes",
+          "task_alias": "moral_disputes",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "moral_disputes",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_moral_scenarios": {
+          "task": "mmlu_moral_scenarios",
+          "task_alias": "moral_scenarios",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "moral_scenarios",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_nutrition": {
+          "task": "mmlu_nutrition",
+          "task_alias": "nutrition",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "nutrition",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_philosophy": {
+          "task": "mmlu_philosophy",
+          "task_alias": "philosophy",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "philosophy",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_prehistory": {
+          "task": "mmlu_prehistory",
+          "task_alias": "prehistory",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "prehistory",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_professional_accounting": {
+          "task": "mmlu_professional_accounting",
+          "task_alias": "professional_accounting",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "professional_accounting",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_professional_law": {
+          "task": "mmlu_professional_law",
+          "task_alias": "professional_law",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "professional_law",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_professional_medicine": {
+          "task": "mmlu_professional_medicine",
+          "task_alias": "professional_medicine",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "professional_medicine",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_professional_psychology": {
+          "task": "mmlu_professional_psychology",
+          "task_alias": "professional_psychology",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "professional_psychology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_public_relations": {
+          "task": "mmlu_public_relations",
+          "task_alias": "public_relations",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "public_relations",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_security_studies": {
+          "task": "mmlu_security_studies",
+          "task_alias": "security_studies",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "security_studies",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_sociology": {
+          "task": "mmlu_sociology",
+          "task_alias": "sociology",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "sociology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_us_foreign_policy": {
+          "task": "mmlu_us_foreign_policy",
+          "task_alias": "us_foreign_policy",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "us_foreign_policy",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_virology": {
+          "task": "mmlu_virology",
+          "task_alias": "virology",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "virology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about virology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_world_religions": {
+          "task": "mmlu_world_religions",
+          "task_alias": "world_religions",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "world_religions",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        },
+        "winogrande": {
+          "task": "winogrande",
+          "dataset_path": "allenai/winogrande",
+          "dataset_name": "winogrande_xl",
+          "training_split": "train",
+          "validation_split": "validation",
+          "doc_to_text": "def doc_to_text(doc):\n    answer_to_num = {\"1\": 0, \"2\": 1}\n    return answer_to_num[doc[\"answer\"]]\n",
+          "doc_to_target": "def doc_to_target(doc):\n    idx = doc[\"sentence\"].index(\"_\") + 1\n    return doc[\"sentence\"][idx:].strip()\n",
+          "unsafe_code": false,
+          "doc_to_choice": "def doc_to_choice(doc):\n    idx = doc[\"sentence\"].index(\"_\")\n    options = [doc[\"option1\"], doc[\"option2\"]]\n    return [doc[\"sentence\"][:idx] + opt for opt in options]\n",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "<function doc_to_text at 0x77cb4f17d620>",
+            "doc_to_choice": "<function doc_to_choice at 0x77cb4f17dc60>",
+            "doc_to_target": "<function doc_to_target at 0x77cb4f17d9e0>",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": true,
+          "doc_to_decontamination_query": "sentence",
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+            "trust_remote_code": true
+          }
+        }
+      },
+      "versions": {
+        "arc_challenge": 1.0,
+        "arc_easy": 1.0,
+        "hellaswag": 1.0,
+        "kmmlu": 2.0,
+        "kmmlu_accounting": 2.0,
+        "kmmlu_agricultural_sciences": 2.0,
+        "kmmlu_applied_science": 2.0,
+        "kmmlu_aviation_engineering_and_maintenance": 2.0,
+        "kmmlu_biology": 2.0,
+        "kmmlu_chemical_engineering": 2.0,
+        "kmmlu_chemistry": 2.0,
+        "kmmlu_civil_engineering": 2.0,
+        "kmmlu_computer_science": 2.0,
+        "kmmlu_construction": 2.0,
+        "kmmlu_criminal_law": 2.0,
+        "kmmlu_ecology": 2.0,
+        "kmmlu_economics": 2.0,
+        "kmmlu_education": 2.0,
+        "kmmlu_electrical_engineering": 2.0,
+        "kmmlu_electronics_engineering": 2.0,
+        "kmmlu_energy_management": 2.0,
+        "kmmlu_environmental_science": 2.0,
+        "kmmlu_fashion": 2.0,
+        "kmmlu_food_processing": 2.0,
+        "kmmlu_gas_technology_and_engineering": 2.0,
+        "kmmlu_geomatics": 2.0,
+        "kmmlu_health": 2.0,
+        "kmmlu_humss": 2.0,
+        "kmmlu_industrial_engineer": 2.0,
+        "kmmlu_information_technology": 2.0,
+        "kmmlu_interior_architecture_and_design": 2.0,
+        "kmmlu_korean_history": 2.0,
+        "kmmlu_law": 2.0,
+        "kmmlu_machine_design_and_manufacturing": 2.0,
+        "kmmlu_management": 2.0,
+        "kmmlu_maritime_engineering": 2.0,
+        "kmmlu_marketing": 2.0,
+        "kmmlu_materials_engineering": 2.0,
+        "kmmlu_math": 2.0,
+        "kmmlu_mechanical_engineering": 2.0,
+        "kmmlu_nondestructive_testing": 2.0,
+        "kmmlu_other": 2.0,
+        "kmmlu_patent": 2.0,
+        "kmmlu_political_science_and_sociology": 2.0,
+        "kmmlu_psychology": 2.0,
+        "kmmlu_public_safety": 2.0,
+        "kmmlu_railway_and_automotive_engineering": 2.0,
+        "kmmlu_real_estate": 2.0,
+        "kmmlu_refrigerating_machinery": 2.0,
+        "kmmlu_social_welfare": 2.0,
+        "kmmlu_stem": 2.0,
+        "kmmlu_taxation": 2.0,
+        "kmmlu_telecommunications_and_wireless_technology": 2.0,
+        "kobest_boolq": 1.0,
+        "kobest_copa": 1.0,
+        "kobest_hellaswag": 1.0,
+        "mmlu": 2,
+        "mmlu_abstract_algebra": 1.0,
+        "mmlu_anatomy": 1.0,
+        "mmlu_astronomy": 1.0,
+        "mmlu_business_ethics": 1.0,
+        "mmlu_clinical_knowledge": 1.0,
+        "mmlu_college_biology": 1.0,
+        "mmlu_college_chemistry": 1.0,
+        "mmlu_college_computer_science": 1.0,
+        "mmlu_college_mathematics": 1.0,
+        "mmlu_college_medicine": 1.0,
+        "mmlu_college_physics": 1.0,
+        "mmlu_computer_security": 1.0,
+        "mmlu_conceptual_physics": 1.0,
+        "mmlu_econometrics": 1.0,
+        "mmlu_electrical_engineering": 1.0,
+        "mmlu_elementary_mathematics": 1.0,
+        "mmlu_formal_logic": 1.0,
+        "mmlu_global_facts": 1.0,
+        "mmlu_high_school_biology": 1.0,
+        "mmlu_high_school_chemistry": 1.0,
+        "mmlu_high_school_computer_science": 1.0,
+        "mmlu_high_school_european_history": 1.0,
+        "mmlu_high_school_geography": 1.0,
+        "mmlu_high_school_government_and_politics": 1.0,
+        "mmlu_high_school_macroeconomics": 1.0,
+        "mmlu_high_school_mathematics": 1.0,
+        "mmlu_high_school_microeconomics": 1.0,
+        "mmlu_high_school_physics": 1.0,
+        "mmlu_high_school_psychology": 1.0,
+        "mmlu_high_school_statistics": 1.0,
+        "mmlu_high_school_us_history": 1.0,
+        "mmlu_high_school_world_history": 1.0,
+        "mmlu_human_aging": 1.0,
+        "mmlu_human_sexuality": 1.0,
+        "mmlu_humanities": 2,
+        "mmlu_international_law": 1.0,
+        "mmlu_jurisprudence": 1.0,
+        "mmlu_logical_fallacies": 1.0,
+        "mmlu_machine_learning": 1.0,
+        "mmlu_management": 1.0,
+        "mmlu_marketing": 1.0,
+        "mmlu_medical_genetics": 1.0,
+        "mmlu_miscellaneous": 1.0,
+        "mmlu_moral_disputes": 1.0,
+        "mmlu_moral_scenarios": 1.0,
+        "mmlu_nutrition": 1.0,
+        "mmlu_other": 2,
+        "mmlu_philosophy": 1.0,
+        "mmlu_prehistory": 1.0,
+        "mmlu_professional_accounting": 1.0,
+        "mmlu_professional_law": 1.0,
+        "mmlu_professional_medicine": 1.0,
+        "mmlu_professional_psychology": 1.0,
+        "mmlu_public_relations": 1.0,
+        "mmlu_security_studies": 1.0,
+        "mmlu_social_sciences": 2,
+        "mmlu_sociology": 1.0,
+        "mmlu_stem": 2,
+        "mmlu_us_foreign_policy": 1.0,
+        "mmlu_virology": 1.0,
+        "mmlu_world_religions": 1.0,
+        "winogrande": 1.0
+      },
+      "n-shot": {
+        "arc_challenge": 0,
+        "arc_easy": 0,
+        "hellaswag": 0,
+        "kmmlu_accounting": 0,
+        "kmmlu_agricultural_sciences": 0,
+        "kmmlu_aviation_engineering_and_maintenance": 0,
+        "kmmlu_biology": 0,
+        "kmmlu_chemical_engineering": 0,
+        "kmmlu_chemistry": 0,
+        "kmmlu_civil_engineering": 0,
+        "kmmlu_computer_science": 0,
+        "kmmlu_construction": 0,
+        "kmmlu_criminal_law": 0,
+        "kmmlu_ecology": 0,
+        "kmmlu_economics": 0,
+        "kmmlu_education": 0,
+        "kmmlu_electrical_engineering": 0,
+        "kmmlu_electronics_engineering": 0,
+        "kmmlu_energy_management": 0,
+        "kmmlu_environmental_science": 0,
+        "kmmlu_fashion": 0,
+        "kmmlu_food_processing": 0,
+        "kmmlu_gas_technology_and_engineering": 0,
+        "kmmlu_geomatics": 0,
+        "kmmlu_health": 0,
+        "kmmlu_industrial_engineer": 0,
+        "kmmlu_information_technology": 0,
+        "kmmlu_interior_architecture_and_design": 0,
+        "kmmlu_korean_history": 0,
+        "kmmlu_law": 0,
+        "kmmlu_machine_design_and_manufacturing": 0,
+        "kmmlu_management": 0,
+        "kmmlu_maritime_engineering": 0,
+        "kmmlu_marketing": 0,
+        "kmmlu_materials_engineering": 0,
+        "kmmlu_math": 0,
+        "kmmlu_mechanical_engineering": 0,
+        "kmmlu_nondestructive_testing": 0,
+        "kmmlu_patent": 0,
+        "kmmlu_political_science_and_sociology": 0,
+        "kmmlu_psychology": 0,
+        "kmmlu_public_safety": 0,
+        "kmmlu_railway_and_automotive_engineering": 0,
+        "kmmlu_real_estate": 0,
+        "kmmlu_refrigerating_machinery": 0,
+        "kmmlu_social_welfare": 0,
+        "kmmlu_taxation": 0,
+        "kmmlu_telecommunications_and_wireless_technology": 0,
+        "kobest_boolq": 0,
+        "kobest_copa": 0,
+        "kobest_hellaswag": 0,
+        "mmlu_abstract_algebra": 0,
+        "mmlu_anatomy": 0,
+        "mmlu_astronomy": 0,
+        "mmlu_business_ethics": 0,
+        "mmlu_clinical_knowledge": 0,
+        "mmlu_college_biology": 0,
+        "mmlu_college_chemistry": 0,
+        "mmlu_college_computer_science": 0,
+        "mmlu_college_mathematics": 0,
+        "mmlu_college_medicine": 0,
+        "mmlu_college_physics": 0,
+        "mmlu_computer_security": 0,
+        "mmlu_conceptual_physics": 0,
+        "mmlu_econometrics": 0,
+        "mmlu_electrical_engineering": 0,
+        "mmlu_elementary_mathematics": 0,
+        "mmlu_formal_logic": 0,
+        "mmlu_global_facts": 0,
+        "mmlu_high_school_biology": 0,
+        "mmlu_high_school_chemistry": 0,
+        "mmlu_high_school_computer_science": 0,
+        "mmlu_high_school_european_history": 0,
+        "mmlu_high_school_geography": 0,
+        "mmlu_high_school_government_and_politics": 0,
+        "mmlu_high_school_macroeconomics": 0,
+        "mmlu_high_school_mathematics": 0,
+        "mmlu_high_school_microeconomics": 0,
+        "mmlu_high_school_physics": 0,
+        "mmlu_high_school_psychology": 0,
+        "mmlu_high_school_statistics": 0,
+        "mmlu_high_school_us_history": 0,
+        "mmlu_high_school_world_history": 0,
+        "mmlu_human_aging": 0,
+        "mmlu_human_sexuality": 0,
+        "mmlu_international_law": 0,
+        "mmlu_jurisprudence": 0,
+        "mmlu_logical_fallacies": 0,
+        "mmlu_machine_learning": 0,
+        "mmlu_management": 0,
+        "mmlu_marketing": 0,
+        "mmlu_medical_genetics": 0,
+        "mmlu_miscellaneous": 0,
+        "mmlu_moral_disputes": 0,
+        "mmlu_moral_scenarios": 0,
+        "mmlu_nutrition": 0,
+        "mmlu_philosophy": 0,
+        "mmlu_prehistory": 0,
+        "mmlu_professional_accounting": 0,
+        "mmlu_professional_law": 0,
+        "mmlu_professional_medicine": 0,
+        "mmlu_professional_psychology": 0,
+        "mmlu_public_relations": 0,
+        "mmlu_security_studies": 0,
+        "mmlu_sociology": 0,
+        "mmlu_us_foreign_policy": 0,
+        "mmlu_virology": 0,
+        "mmlu_world_religions": 0,
+        "winogrande": 0
+      },
+      "higher_is_better": {
+        "arc_challenge": {
+          "acc": true,
+          "acc_norm": true
+        },
+        "arc_easy": {
+          "acc": true,
+          "acc_norm": true
+        },
+        "hellaswag": {
+          "acc": true,
+          "acc_norm": true
+        },
+        "kmmlu": {
+          "acc": true
+        },
+        "kmmlu_accounting": {
+          "acc": true
+        },
+        "kmmlu_agricultural_sciences": {
+          "acc": true
+        },
+        "kmmlu_applied_science": {
+          "acc": true
+        },
+        "kmmlu_aviation_engineering_and_maintenance": {
+          "acc": true
+        },
+        "kmmlu_biology": {
+          "acc": true
+        },
+        "kmmlu_chemical_engineering": {
+          "acc": true
+        },
+        "kmmlu_chemistry": {
+          "acc": true
+        },
+        "kmmlu_civil_engineering": {
+          "acc": true
+        },
+        "kmmlu_computer_science": {
+          "acc": true
+        },
+        "kmmlu_construction": {
+          "acc": true
+        },
+        "kmmlu_criminal_law": {
+          "acc": true
+        },
+        "kmmlu_ecology": {
+          "acc": true
+        },
+        "kmmlu_economics": {
+          "acc": true
+        },
+        "kmmlu_education": {
+          "acc": true
+        },
+        "kmmlu_electrical_engineering": {
+          "acc": true
+        },
+        "kmmlu_electronics_engineering": {
+          "acc": true
+        },
+        "kmmlu_energy_management": {
+          "acc": true
+        },
+        "kmmlu_environmental_science": {
+          "acc": true
+        },
+        "kmmlu_fashion": {
+          "acc": true
+        },
+        "kmmlu_food_processing": {
+          "acc": true
+        },
+        "kmmlu_gas_technology_and_engineering": {
+          "acc": true
+        },
+        "kmmlu_geomatics": {
+          "acc": true
+        },
+        "kmmlu_health": {
+          "acc": true
+        },
+        "kmmlu_humss": {
+          "acc": true
+        },
+        "kmmlu_industrial_engineer": {
+          "acc": true
+        },
+        "kmmlu_information_technology": {
+          "acc": true
+        },
+        "kmmlu_interior_architecture_and_design": {
+          "acc": true
+        },
+        "kmmlu_korean_history": {
+          "acc": true
+        },
+        "kmmlu_law": {
+          "acc": true
+        },
+        "kmmlu_machine_design_and_manufacturing": {
+          "acc": true
+        },
+        "kmmlu_management": {
+          "acc": true
+        },
+        "kmmlu_maritime_engineering": {
+          "acc": true
+        },
+        "kmmlu_marketing": {
+          "acc": true
+        },
+        "kmmlu_materials_engineering": {
+          "acc": true
+        },
+        "kmmlu_math": {
+          "acc": true
+        },
+        "kmmlu_mechanical_engineering": {
+          "acc": true
+        },
+        "kmmlu_nondestructive_testing": {
+          "acc": true
+        },
+        "kmmlu_other": {
+          "acc": true
+        },
+        "kmmlu_patent": {
+          "acc": true
+        },
+        "kmmlu_political_science_and_sociology": {
+          "acc": true
+        },
+        "kmmlu_psychology": {
+          "acc": true
+        },
+        "kmmlu_public_safety": {
+          "acc": true
+        },
+        "kmmlu_railway_and_automotive_engineering": {
+          "acc": true
+        },
+        "kmmlu_real_estate": {
+          "acc": true
+        },
+        "kmmlu_refrigerating_machinery": {
+          "acc": true
+        },
+        "kmmlu_social_welfare": {
+          "acc": true
+        },
+        "kmmlu_stem": {
+          "acc": true
+        },
+        "kmmlu_taxation": {
+          "acc": true
+        },
+        "kmmlu_telecommunications_and_wireless_technology": {
+          "acc": true
+        },
+        "kobest_boolq": {
+          "acc": true,
+          "f1": true
+        },
+        "kobest_copa": {
+          "acc": true,
+          "f1": true
+        },
+        "kobest_hellaswag": {
+          "acc": true,
+          "acc_norm": true,
+          "f1": true
+        },
+        "mmlu": {
+          "acc": true
+        },
+        "mmlu_abstract_algebra": {
+          "acc": true
+        },
+        "mmlu_anatomy": {
+          "acc": true
+        },
+        "mmlu_astronomy": {
+          "acc": true
+        },
+        "mmlu_business_ethics": {
+          "acc": true
+        },
+        "mmlu_clinical_knowledge": {
+          "acc": true
+        },
+        "mmlu_college_biology": {
+          "acc": true
+        },
+        "mmlu_college_chemistry": {
+          "acc": true
+        },
+        "mmlu_college_computer_science": {
+          "acc": true
+        },
+        "mmlu_college_mathematics": {
+          "acc": true
+        },
+        "mmlu_college_medicine": {
+          "acc": true
+        },
+        "mmlu_college_physics": {
+          "acc": true
+        },
+        "mmlu_computer_security": {
+          "acc": true
+        },
+        "mmlu_conceptual_physics": {
+          "acc": true
+        },
+        "mmlu_econometrics": {
+          "acc": true
+        },
+        "mmlu_electrical_engineering": {
+          "acc": true
+        },
+        "mmlu_elementary_mathematics": {
+          "acc": true
+        },
+        "mmlu_formal_logic": {
+          "acc": true
+        },
+        "mmlu_global_facts": {
+          "acc": true
+        },
+        "mmlu_high_school_biology": {
+          "acc": true
+        },
+        "mmlu_high_school_chemistry": {
+          "acc": true
+        },
+        "mmlu_high_school_computer_science": {
+          "acc": true
+        },
+        "mmlu_high_school_european_history": {
+          "acc": true
+        },
+        "mmlu_high_school_geography": {
+          "acc": true
+        },
+        "mmlu_high_school_government_and_politics": {
+          "acc": true
+        },
+        "mmlu_high_school_macroeconomics": {
+          "acc": true
+        },
+        "mmlu_high_school_mathematics": {
+          "acc": true
+        },
+        "mmlu_high_school_microeconomics": {
+          "acc": true
+        },
+        "mmlu_high_school_physics": {
+          "acc": true
+        },
+        "mmlu_high_school_psychology": {
+          "acc": true
+        },
+        "mmlu_high_school_statistics": {
+          "acc": true
+        },
+        "mmlu_high_school_us_history": {
+          "acc": true
+        },
+        "mmlu_high_school_world_history": {
+          "acc": true
+        },
+        "mmlu_human_aging": {
+          "acc": true
+        },
+        "mmlu_human_sexuality": {
+          "acc": true
+        },
+        "mmlu_humanities": {
+          "acc": true
+        },
+        "mmlu_international_law": {
+          "acc": true
+        },
+        "mmlu_jurisprudence": {
+          "acc": true
+        },
+        "mmlu_logical_fallacies": {
+          "acc": true
+        },
+        "mmlu_machine_learning": {
+          "acc": true
+        },
+        "mmlu_management": {
+          "acc": true
+        },
+        "mmlu_marketing": {
+          "acc": true
+        },
+        "mmlu_medical_genetics": {
+          "acc": true
+        },
+        "mmlu_miscellaneous": {
+          "acc": true
+        },
+        "mmlu_moral_disputes": {
+          "acc": true
+        },
+        "mmlu_moral_scenarios": {
+          "acc": true
+        },
+        "mmlu_nutrition": {
+          "acc": true
+        },
+        "mmlu_other": {
+          "acc": true
+        },
+        "mmlu_philosophy": {
+          "acc": true
+        },
+        "mmlu_prehistory": {
+          "acc": true
+        },
+        "mmlu_professional_accounting": {
+          "acc": true
+        },
+        "mmlu_professional_law": {
+          "acc": true
+        },
+        "mmlu_professional_medicine": {
+          "acc": true
+        },
+        "mmlu_professional_psychology": {
+          "acc": true
+        },
+        "mmlu_public_relations": {
+          "acc": true
+        },
+        "mmlu_security_studies": {
+          "acc": true
+        },
+        "mmlu_social_sciences": {
+          "acc": true
+        },
+        "mmlu_sociology": {
+          "acc": true
+        },
+        "mmlu_stem": {
+          "acc": true
+        },
+        "mmlu_us_foreign_policy": {
+          "acc": true
+        },
+        "mmlu_virology": {
+          "acc": true
+        },
+        "mmlu_world_religions": {
+          "acc": true
+        },
+        "winogrande": {
+          "acc": true
+        }
+      },
+      "n-samples": {
+        "kobest_hellaswag": {
+          "original": 500,
+          "effective": 400
+        },
+        "kobest_copa": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kobest_boolq": {
+          "original": 1404,
+          "effective": 400
+        },
+        "kmmlu_biology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_chemical_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_chemistry": {
+          "original": 600,
+          "effective": 400
+        },
+        "kmmlu_civil_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_computer_science": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_ecology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_electrical_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_information_technology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_materials_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_math": {
+          "original": 300,
+          "effective": 300
+        },
+        "kmmlu_mechanical_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_agricultural_sciences": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_construction": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_fashion": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_food_processing": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_health": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_interior_architecture_and_design": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_marketing": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_patent": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_public_safety": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_real_estate": {
+          "original": 200,
+          "effective": 200
+        },
+        "kmmlu_refrigerating_machinery": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_aviation_engineering_and_maintenance": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_electronics_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_energy_management": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_environmental_science": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_gas_technology_and_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_geomatics": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_industrial_engineer": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_machine_design_and_manufacturing": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_maritime_engineering": {
+          "original": 600,
+          "effective": 400
+        },
+        "kmmlu_nondestructive_testing": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_railway_and_automotive_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_telecommunications_and_wireless_technology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_accounting": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_criminal_law": {
+          "original": 200,
+          "effective": 200
+        },
+        "kmmlu_economics": {
+          "original": 130,
+          "effective": 130
+        },
+        "kmmlu_education": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_korean_history": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_law": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_management": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_political_science_and_sociology": {
+          "original": 300,
+          "effective": 300
+        },
+        "kmmlu_psychology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_social_welfare": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_taxation": {
+          "original": 200,
+          "effective": 200
+        },
+        "winogrande": {
+          "original": 1267,
+          "effective": 400
+        },
+        "arc_challenge": {
+          "original": 1172,
+          "effective": 400
+        },
+        "arc_easy": {
+          "original": 2376,
+          "effective": 400
+        },
+        "hellaswag": {
+          "original": 10042,
+          "effective": 400
+        },
+        "mmlu_abstract_algebra": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_anatomy": {
+          "original": 135,
+          "effective": 135
+        },
+        "mmlu_astronomy": {
+          "original": 152,
+          "effective": 152
+        },
+        "mmlu_college_biology": {
+          "original": 144,
+          "effective": 144
+        },
+        "mmlu_college_chemistry": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_college_computer_science": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_college_mathematics": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_college_physics": {
+          "original": 102,
+          "effective": 102
+        },
+        "mmlu_computer_security": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_conceptual_physics": {
+          "original": 235,
+          "effective": 235
+        },
+        "mmlu_electrical_engineering": {
+          "original": 145,
+          "effective": 145
+        },
+        "mmlu_elementary_mathematics": {
+          "original": 378,
+          "effective": 378
+        },
+        "mmlu_high_school_biology": {
+          "original": 310,
+          "effective": 310
+        },
+        "mmlu_high_school_chemistry": {
+          "original": 203,
+          "effective": 203
+        },
+        "mmlu_high_school_computer_science": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_high_school_mathematics": {
+          "original": 270,
+          "effective": 270
+        },
+        "mmlu_high_school_physics": {
+          "original": 151,
+          "effective": 151
+        },
+        "mmlu_high_school_statistics": {
+          "original": 216,
+          "effective": 216
+        },
+        "mmlu_machine_learning": {
+          "original": 112,
+          "effective": 112
+        },
+        "mmlu_business_ethics": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_clinical_knowledge": {
+          "original": 265,
+          "effective": 265
+        },
+        "mmlu_college_medicine": {
+          "original": 173,
+          "effective": 173
+        },
+        "mmlu_global_facts": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_human_aging": {
+          "original": 223,
+          "effective": 223
+        },
+        "mmlu_management": {
+          "original": 103,
+          "effective": 103
+        },
+        "mmlu_marketing": {
+          "original": 234,
+          "effective": 234
+        },
+        "mmlu_medical_genetics": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_miscellaneous": {
+          "original": 783,
+          "effective": 400
+        },
+        "mmlu_nutrition": {
+          "original": 306,
+          "effective": 306
+        },
+        "mmlu_professional_accounting": {
+          "original": 282,
+          "effective": 282
+        },
+        "mmlu_professional_medicine": {
+          "original": 272,
+          "effective": 272
+        },
+        "mmlu_virology": {
+          "original": 166,
+          "effective": 166
+        },
+        "mmlu_econometrics": {
+          "original": 114,
+          "effective": 114
+        },
+        "mmlu_high_school_geography": {
+          "original": 198,
+          "effective": 198
+        },
+        "mmlu_high_school_government_and_politics": {
+          "original": 193,
+          "effective": 193
+        },
+        "mmlu_high_school_macroeconomics": {
+          "original": 390,
+          "effective": 390
+        },
+        "mmlu_high_school_microeconomics": {
+          "original": 238,
+          "effective": 238
+        },
+        "mmlu_high_school_psychology": {
+          "original": 545,
+          "effective": 400
+        },
+        "mmlu_human_sexuality": {
+          "original": 131,
+          "effective": 131
+        },
+        "mmlu_professional_psychology": {
+          "original": 612,
+          "effective": 400
+        },
+        "mmlu_public_relations": {
+          "original": 110,
+          "effective": 110
+        },
+        "mmlu_security_studies": {
+          "original": 245,
+          "effective": 245
+        },
+        "mmlu_sociology": {
+          "original": 201,
+          "effective": 201
+        },
+        "mmlu_us_foreign_policy": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_formal_logic": {
+          "original": 126,
+          "effective": 126
+        },
+        "mmlu_high_school_european_history": {
+          "original": 165,
+          "effective": 165
+        },
+        "mmlu_high_school_us_history": {
+          "original": 204,
+          "effective": 204
+        },
+        "mmlu_high_school_world_history": {
+          "original": 237,
+          "effective": 237
+        },
+        "mmlu_international_law": {
+          "original": 121,
+          "effective": 121
+        },
+        "mmlu_jurisprudence": {
+          "original": 108,
+          "effective": 108
+        },
+        "mmlu_logical_fallacies": {
+          "original": 163,
+          "effective": 163
+        },
+        "mmlu_moral_disputes": {
+          "original": 346,
+          "effective": 346
+        },
+        "mmlu_moral_scenarios": {
+          "original": 895,
+          "effective": 400
+        },
+        "mmlu_philosophy": {
+          "original": 311,
+          "effective": 311
+        },
+        "mmlu_prehistory": {
+          "original": 324,
+          "effective": 324
+        },
+        "mmlu_professional_law": {
+          "original": 1534,
+          "effective": 400
+        },
+        "mmlu_world_religions": {
+          "original": 171,
+          "effective": 171
+        }
+      },
+      "config": {
+        "model": "hf",
+        "model_args": {
+          "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+          "trust_remote_code": true
+        },
+        "model_num_parameters": 4022468096,
+        "model_dtype": "torch.bfloat16",
+        "model_revision": "main",
+        "model_sha": "",
+        "batch_size": "12",
+        "batch_sizes": [],
+        "device": "cuda:0",
+        "use_cache": null,
+        "limit": 400.0,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": {},
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+      },
+      "git_hash": "0ce43af",
+      "date": 1775962096.959724,
+      "pretty_env_info": "PyTorch version: 2.9.0+cu128\nIs debug build: False\nCUDA used to build PyTorch: 12.8\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.5 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 4.1.0\nLibc version: glibc-2.35\n\nPython version: 3.11.14 | packaged by conda-forge | (main, Oct 13 2025, 14:09:32) [GCC 14.3.0] (64-bit runtime)\nPython platform: Linux-6.8.0-90-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.8.93\nCUDA_MODULE_LOADING set to: \nGPU models and configuration: GPU 0: NVIDIA RTX PRO 6000 Blackwell Workstation Edition\nNvidia driver version: 590.48.01\ncuDNN version: Probably one of the following:\n/usr/lib/x86_64-linux-gnu/libcudnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_runtime_compiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_graph.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_heuristic.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops.so.9.8.0\nIs XPU available: False\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:                         x86_64\nCPU op-mode(s):                       32-bit, 64-bit\nAddress sizes:                        43 bits physical, 48 bits virtual\nByte Order:                           Little Endian\nCPU(s):                               192\nOn-line CPU(s) list:                  0-191\nVendor ID:                            AuthenticAMD\nModel name:                           AMD EPYC 7642 48-Core Processor\nCPU family:                           23\nModel:                                49\nThread(s) per core:                   2\nCore(s) per socket:                   48\nSocket(s):                            2\nStepping:                             0\nFrequency boost:                      enabled\nCPU max MHz:                          2300.0000\nCPU min MHz:                          1500.0000\nBogoMIPS:                             4600.15\nFlags:                                fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sev sev_es ibpb_exit_to_user\nVirtualization:                       AMD-V\nL1d cache:                            3 MiB (96 instances)\nL1i cache:                            3 MiB (96 instances)\nL2 cache:                             48 MiB (96 instances)\nL3 cache:                             512 MiB (32 instances)\nNUMA node(s):                         2\nNUMA node0 CPU(s):                    0-47,96-143\nNUMA node1 CPU(s):                    48-95,144-191\nVulnerability Gather data sampling:   Not affected\nVulnerability Itlb multihit:          Not affected\nVulnerability L1tf:                   Not affected\nVulnerability Mds:                    Not affected\nVulnerability Meltdown:               Not affected\nVulnerability Mmio stale data:        Not affected\nVulnerability Reg file data sampling: Not affected\nVulnerability Retbleed:               Mitigation; untrained return thunk; SMT enabled with STIBP protection\nVulnerability Spec rstack overflow:   Mitigation; Safe RET\nVulnerability Spec store bypass:      Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1:             Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:             Mitigation; Retpolines; IBPB conditional; STIBP always-on; RSB filling; PBRSB-eIBRS Not affected; BHI Not affected\nVulnerability Srbds:                  Not affected\nVulnerability Tsx async abort:        Not affected\nVulnerability Vmscape:                Mitigation; IBPB before exit to userspace\n\nVersions of relevant libraries:\n[pip3] executorch==1.0.1\n[pip3] numpy==2.2.6\n[pip3] nvidia-cublas-cu12==12.8.4.1\n[pip3] nvidia-cuda-cupti-cu12==12.8.90\n[pip3] nvidia-cuda-nvrtc-cu12==12.8.93\n[pip3] nvidia-cuda-runtime-cu12==12.8.90\n[pip3] nvidia-cudnn-cu12==9.10.2.21\n[pip3] nvidia-cudnn-frontend==1.17.0\n[pip3] nvidia-cufft-cu12==11.3.3.83\n[pip3] nvidia-curand-cu12==10.3.9.90\n[pip3] nvidia-cusolver-cu12==11.7.3.90\n[pip3] nvidia-cusparse-cu12==12.5.8.93\n[pip3] nvidia-cusparselt-cu12==0.7.1\n[pip3] nvidia-nccl-cu12==2.27.5\n[pip3] nvidia-nvjitlink-cu12==12.8.93\n[pip3] nvidia-nvtx-cu12==12.8.90\n[pip3] optree==0.17.0\n[pip3] pytorch_tokenizers==1.0.1\n[pip3] torch==2.9.0+cu128\n[pip3] torch_c_dlpack_ext==0.1.4\n[pip3] torch-stoi==0.2.3\n[pip3] torchao==0.14.0\n[pip3] torchaudio==2.9.0+cu128\n[pip3] torchcodec==0.9.1\n[pip3] torchelastic==0.2.2\n[pip3] torchvision==0.24.0+cu128\n[pip3] triton==3.5.0\n[pip3] triton_kernels==1.0.0\n[conda] No relevant packages",
+      "transformers_version": "5.5.3",
+      "lm_eval_version": "0.4.11",
+      "upper_git_hash": null,
+      "tokenizer_pad_token": [
+        "<|PAD_TOKEN|>",
+        "151669"
+      ],
+      "tokenizer_eos_token": [
+        "<|endoftext|>",
+        "151643"
+      ],
+      "tokenizer_bos_token": [
+        null,
+        "None"
+      ],
+      "eot_token_id": 151643,
+      "max_length": 32768,
+      "task_hashes": {},
+      "model_source": "hf",
+      "model_name": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+      "model_name_sanitized": "__home__unsloth__scp_stage1_cpt__artifacts__cpt_full_96gb_qwen3_4b__checkpoints",
+      "system_instruction": null,
+      "system_instruction_sha": null,
+      "fewshot_as_multiturn": null,
+      "chat_template": null,
+      "chat_template_sha": null,
+      "total_evaluation_time_seconds": "580.1511918641627"
+    },
+    "base": {
+      "results": {
+        "arc_challenge": {
+          "alias": "arc_challenge",
+          "acc,none": 0.4525,
+          "acc_stderr,none": 0.024918098926991643,
+          "acc_norm,none": 0.4975,
+          "acc_norm_stderr,none": 0.0250309958227734
+        },
+        "arc_easy": {
+          "alias": "arc_easy",
+          "acc,none": 0.7625,
+          "acc_stderr,none": 0.02130420258115865,
+          "acc_norm,none": 0.755,
+          "acc_norm_stderr,none": 0.02153129097913246
+        },
+        "hellaswag": {
+          "alias": "hellaswag",
+          "acc,none": 0.4925,
+          "acc_stderr,none": 0.025028492535438325,
+          "acc_norm,none": 0.6225,
+          "acc_norm_stderr,none": 0.024268431488608636
+        },
+        "kmmlu": {
+          "acc,none": 0.47556707712248864,
+          "acc_stderr,none": 0.003918983222456166,
+          "alias": "kmmlu"
+        },
+        "kmmlu_applied_science": {
+          "acc,none": 0.45875,
+          "acc_stderr,none": 0.007101063857525891,
+          "alias": " - kmmlu_applied_science"
+        },
+        "kmmlu_aviation_engineering_and_maintenance": {
+          "alias": "  - kmmlu_aviation_engineering_and_maintenance",
+          "acc,none": 0.4475,
+          "acc_stderr,none": 0.024892941194307603
+        },
+        "kmmlu_electronics_engineering": {
+          "alias": "  - kmmlu_electronics_engineering",
+          "acc,none": 0.65,
+          "acc_stderr,none": 0.023878346647046
+        },
+        "kmmlu_energy_management": {
+          "alias": "  - kmmlu_energy_management",
+          "acc,none": 0.4,
+          "acc_stderr,none": 0.02452557357939856
+        },
+        "kmmlu_environmental_science": {
+          "alias": "  - kmmlu_environmental_science",
+          "acc,none": 0.3875,
+          "acc_stderr,none": 0.02438947500927543
+        },
+        "kmmlu_gas_technology_and_engineering": {
+          "alias": "  - kmmlu_gas_technology_and_engineering",
+          "acc,none": 0.3775,
+          "acc_stderr,none": 0.02426843148860864
+        },
+        "kmmlu_geomatics": {
+          "alias": "  - kmmlu_geomatics",
+          "acc,none": 0.4325,
+          "acc_stderr,none": 0.024802162065186362
+        },
+        "kmmlu_industrial_engineer": {
+          "alias": "  - kmmlu_industrial_engineer",
+          "acc,none": 0.4275,
+          "acc_stderr,none": 0.024766769210836766
+        },
+        "kmmlu_machine_design_and_manufacturing": {
+          "alias": "  - kmmlu_machine_design_and_manufacturing",
+          "acc,none": 0.52,
+          "acc_stderr,none": 0.025011275652681887
+        },
+        "kmmlu_maritime_engineering": {
+          "alias": "  - kmmlu_maritime_engineering",
+          "acc,none": 0.405,
+          "acc_stderr,none": 0.024575340657273674
+        },
+        "kmmlu_nondestructive_testing": {
+          "alias": "  - kmmlu_nondestructive_testing",
+          "acc,none": 0.4825,
+          "acc_stderr,none": 0.025015972341295333
+        },
+        "kmmlu_railway_and_automotive_engineering": {
+          "alias": "  - kmmlu_railway_and_automotive_engineering",
+          "acc,none": 0.3875,
+          "acc_stderr,none": 0.02438947500927542
+        },
+        "kmmlu_telecommunications_and_wireless_technology": {
+          "alias": "  - kmmlu_telecommunications_and_wireless_technology",
+          "acc,none": 0.5875,
+          "acc_stderr,none": 0.024645036407943802
+        },
+        "kmmlu_humss": {
+          "acc,none": 0.4805860805860806,
+          "acc_stderr,none": 0.009419825503999339,
+          "alias": " - kmmlu_humss"
+        },
+        "kmmlu_accounting": {
+          "alias": "  - kmmlu_accounting",
+          "acc,none": 0.49,
+          "acc_stderr,none": 0.05024183937956912
+        },
+        "kmmlu_criminal_law": {
+          "alias": "  - kmmlu_criminal_law",
+          "acc,none": 0.39,
+          "acc_stderr,none": 0.03457567623250011
+        },
+        "kmmlu_economics": {
+          "alias": "  - kmmlu_economics",
+          "acc,none": 0.5615384615384615,
+          "acc_stderr,none": 0.04368784779071991
+        },
+        "kmmlu_education": {
+          "alias": "  - kmmlu_education",
+          "acc,none": 0.65,
+          "acc_stderr,none": 0.047937248544110196
+        },
+        "kmmlu_korean_history": {
+          "alias": "  - kmmlu_korean_history",
+          "acc,none": 0.24,
+          "acc_stderr,none": 0.04292346959909284
+        },
+        "kmmlu_law": {
+          "alias": "  - kmmlu_law",
+          "acc,none": 0.3875,
+          "acc_stderr,none": 0.024389475009275435
+        },
+        "kmmlu_management": {
+          "alias": "  - kmmlu_management",
+          "acc,none": 0.53,
+          "acc_stderr,none": 0.02498621173652297
+        },
+        "kmmlu_political_science_and_sociology": {
+          "alias": "  - kmmlu_political_science_and_sociology",
+          "acc,none": 0.5466666666666666,
+          "acc_stderr,none": 0.028789526978043094
+        },
+        "kmmlu_psychology": {
+          "alias": "  - kmmlu_psychology",
+          "acc,none": 0.4275,
+          "acc_stderr,none": 0.02476676921083677
+        },
+        "kmmlu_social_welfare": {
+          "alias": "  - kmmlu_social_welfare",
+          "acc,none": 0.585,
+          "acc_stderr,none": 0.02466695454685353
+        },
+        "kmmlu_taxation": {
+          "alias": "  - kmmlu_taxation",
+          "acc,none": 0.435,
+          "acc_stderr,none": 0.03514328173714407
+        },
+        "kmmlu_other": {
+          "acc,none": 0.4772222222222222,
+          "acc_stderr,none": 0.008073884461069719,
+          "alias": " - kmmlu_other"
+        },
+        "kmmlu_agricultural_sciences": {
+          "alias": "  - kmmlu_agricultural_sciences",
+          "acc,none": 0.3625,
+          "acc_stderr,none": 0.024066207238097725
+        },
+        "kmmlu_construction": {
+          "alias": "  - kmmlu_construction",
+          "acc,none": 0.3925,
+          "acc_stderr,none": 0.024445927747963316
+        },
+        "kmmlu_fashion": {
+          "alias": "  - kmmlu_fashion",
+          "acc,none": 0.4575,
+          "acc_stderr,none": 0.024940719189394073
+        },
+        "kmmlu_food_processing": {
+          "alias": "  - kmmlu_food_processing",
+          "acc,none": 0.39,
+          "acc_stderr,none": 0.024418038445046374
+        },
+        "kmmlu_health": {
+          "alias": "  - kmmlu_health",
+          "acc,none": 0.63,
+          "acc_stderr,none": 0.048523658709391
+        },
+        "kmmlu_interior_architecture_and_design": {
+          "alias": "  - kmmlu_interior_architecture_and_design",
+          "acc,none": 0.6025,
+          "acc_stderr,none": 0.024499693108404712
+        },
+        "kmmlu_marketing": {
+          "alias": "  - kmmlu_marketing",
+          "acc,none": 0.76,
+          "acc_stderr,none": 0.021380899352993952
+        },
+        "kmmlu_patent": {
+          "alias": "  - kmmlu_patent",
+          "acc,none": 0.46,
+          "acc_stderr,none": 0.05009082659620332
+        },
+        "kmmlu_public_safety": {
+          "alias": "  - kmmlu_public_safety",
+          "acc,none": 0.4025,
+          "acc_stderr,none": 0.024550788746396206
+        },
+        "kmmlu_real_estate": {
+          "alias": "  - kmmlu_real_estate",
+          "acc,none": 0.485,
+          "acc_stderr,none": 0.03542810683297719
+        },
+        "kmmlu_refrigerating_machinery": {
+          "alias": "  - kmmlu_refrigerating_machinery",
+          "acc,none": 0.4125,
+          "acc_stderr,none": 0.024645036407943802
+        },
+        "kmmlu_stem": {
+          "acc,none": 0.4897674418604651,
+          "acc_stderr,none": 0.007312394370135803,
+          "alias": " - kmmlu_stem"
+        },
+        "kmmlu_biology": {
+          "alias": "  - kmmlu_biology",
+          "acc,none": 0.3225,
+          "acc_stderr,none": 0.023400926978618723
+        },
+        "kmmlu_chemical_engineering": {
+          "alias": "  - kmmlu_chemical_engineering",
+          "acc,none": 0.4875,
+          "acc_stderr,none": 0.025023485209500245
+        },
+        "kmmlu_chemistry": {
+          "alias": "  - kmmlu_chemistry",
+          "acc,none": 0.5175,
+          "acc_stderr,none": 0.02501597234129533
+        },
+        "kmmlu_civil_engineering": {
+          "alias": "  - kmmlu_civil_engineering",
+          "acc,none": 0.3825,
+          "acc_stderr,none": 0.024330316186072946
+        },
+        "kmmlu_computer_science": {
+          "alias": "  - kmmlu_computer_science",
+          "acc,none": 0.75,
+          "acc_stderr,none": 0.021677749238103
+        },
+        "kmmlu_ecology": {
+          "alias": "  - kmmlu_ecology",
+          "acc,none": 0.5425,
+          "acc_stderr,none": 0.024940719189394077
+        },
+        "kmmlu_electrical_engineering": {
+          "alias": "  - kmmlu_electrical_engineering",
+          "acc,none": 0.355,
+          "acc_stderr,none": 0.023955629410456463
+        },
+        "kmmlu_information_technology": {
+          "alias": "  - kmmlu_information_technology",
+          "acc,none": 0.75,
+          "acc_stderr,none": 0.021677749238103
+        },
+        "kmmlu_materials_engineering": {
+          "alias": "  - kmmlu_materials_engineering",
+          "acc,none": 0.495,
+          "acc_stderr,none": 0.025030057119361453
+        },
+        "kmmlu_math": {
+          "alias": "  - kmmlu_math",
+          "acc,none": 0.3333333333333333,
+          "acc_stderr,none": 0.027262027336984396
+        },
+        "kmmlu_mechanical_engineering": {
+          "alias": "  - kmmlu_mechanical_engineering",
+          "acc,none": 0.4125,
+          "acc_stderr,none": 0.024645036407943802
+        },
+        "kobest_boolq": {
+          "alias": "kobest_boolq",
+          "acc,none": 0.6675,
+          "acc_stderr,none": 0.023584952830141535,
+          "f1,none": 0.6247575383530242,
+          "f1_stderr,none": "N/A"
+        },
+        "kobest_copa": {
+          "alias": "kobest_copa",
+          "acc,none": 0.6475,
+          "acc_stderr,none": 0.023917346710791564,
+          "f1,none": 0.6473920138042275,
+          "f1_stderr,none": "N/A"
+        },
+        "kobest_hellaswag": {
+          "alias": "kobest_hellaswag",
+          "acc,none": 0.44,
+          "acc_stderr,none": 0.02485042976789583,
+          "f1,none": 0.4328647077786627,
+          "f1_stderr,none": "N/A",
+          "acc_norm,none": 0.5825,
+          "acc_norm_stderr,none": 0.024688218756390913
+        },
+        "mmlu": {
+          "acc,none": 0.7404266255461321,
+          "acc_stderr,none": 0.003869340083262106,
+          "alias": "mmlu"
+        },
+        "mmlu_humanities": {
+          "acc,none": 0.6931079323797139,
+          "acc_stderr,none": 0.0077779673157217745,
+          "alias": " - humanities"
+        },
+        "mmlu_formal_logic": {
+          "alias": "  - formal_logic",
+          "acc,none": 0.5793650793650794,
+          "acc_stderr,none": 0.04415438226743745
+        },
+        "mmlu_high_school_european_history": {
+          "alias": "  - high_school_european_history",
+          "acc,none": 0.7818181818181819,
+          "acc_stderr,none": 0.03225078108306289
+        },
+        "mmlu_high_school_us_history": {
+          "alias": "  - high_school_us_history",
+          "acc,none": 0.8284313725490197,
+          "acc_stderr,none": 0.02646056956124065
+        },
+        "mmlu_high_school_world_history": {
+          "alias": "  - high_school_world_history",
+          "acc,none": 0.8438818565400844,
+          "acc_stderr,none": 0.023627159460318684
+        },
+        "mmlu_international_law": {
+          "alias": "  - international_law",
+          "acc,none": 0.8016528925619835,
+          "acc_stderr,none": 0.03640118271990946
+        },
+        "mmlu_jurisprudence": {
+          "alias": "  - jurisprudence",
+          "acc,none": 0.7962962962962963,
+          "acc_stderr,none": 0.03893542518824847
+        },
+        "mmlu_logical_fallacies": {
+          "alias": "  - logical_fallacies",
+          "acc,none": 0.8404907975460123,
+          "acc_stderr,none": 0.02876748172598387
+        },
+        "mmlu_moral_disputes": {
+          "alias": "  - moral_disputes",
+          "acc,none": 0.7543352601156069,
+          "acc_stderr,none": 0.023176298203992
+        },
+        "mmlu_moral_scenarios": {
+          "alias": "  - moral_scenarios",
+          "acc,none": 0.3475,
+          "acc_stderr,none": 0.023838625698390636
+        },
+        "mmlu_philosophy": {
+          "alias": "  - philosophy",
+          "acc,none": 0.7588424437299035,
+          "acc_stderr,none": 0.02429659403476343
+        },
+        "mmlu_prehistory": {
+          "alias": "  - prehistory",
+          "acc,none": 0.7870370370370371,
+          "acc_stderr,none": 0.02277971908873339
+        },
+        "mmlu_professional_law": {
+          "alias": "  - professional_law",
+          "acc,none": 0.53,
+          "acc_stderr,none": 0.02498621173652297
+        },
+        "mmlu_world_religions": {
+          "alias": "  - world_religions",
+          "acc,none": 0.8070175438596491,
+          "acc_stderr,none": 0.030267457554898458
+        },
+        "mmlu_other": {
+          "acc,none": 0.7437591776798825,
+          "acc_stderr,none": 0.008056333552095894,
+          "alias": " - other"
+        },
+        "mmlu_business_ethics": {
+          "alias": "  - business_ethics",
+          "acc,none": 0.75,
+          "acc_stderr,none": 0.04351941398892446
+        },
+        "mmlu_clinical_knowledge": {
+          "alias": "  - clinical_knowledge",
+          "acc,none": 0.7773584905660378,
+          "acc_stderr,none": 0.0256042334708991
+        },
+        "mmlu_college_medicine": {
+          "alias": "  - college_medicine",
+          "acc,none": 0.7341040462427746,
+          "acc_stderr,none": 0.03368762932259431
+        },
+        "mmlu_global_facts": {
+          "alias": "  - global_facts",
+          "acc,none": 0.43,
+          "acc_stderr,none": 0.04975698519562429
+        },
+        "mmlu_human_aging": {
+          "alias": "  - human_aging",
+          "acc,none": 0.7488789237668162,
+          "acc_stderr,none": 0.02910522083322461
+        },
+        "mmlu_management": {
+          "alias": "  - management",
+          "acc,none": 0.8932038834951457,
+          "acc_stderr,none": 0.030581088928331356
+        },
+        "mmlu_marketing": {
+          "alias": "  - marketing",
+          "acc,none": 0.9145299145299145,
+          "acc_stderr,none": 0.018315891685625862
+        },
+        "mmlu_medical_genetics": {
+          "alias": "  - medical_genetics",
+          "acc,none": 0.8,
+          "acc_stderr,none": 0.04020151261036846
+        },
+        "mmlu_miscellaneous": {
+          "alias": "  - miscellaneous",
+          "acc,none": 0.82,
+          "acc_stderr,none": 0.01923342954415769
+        },
+        "mmlu_nutrition": {
+          "alias": "  - nutrition",
+          "acc,none": 0.7745098039215687,
+          "acc_stderr,none": 0.023929155517351277
+        },
+        "mmlu_professional_accounting": {
+          "alias": "  - professional_accounting",
+          "acc,none": 0.5709219858156028,
+          "acc_stderr,none": 0.02952591430255856
+        },
+        "mmlu_professional_medicine": {
+          "alias": "  - professional_medicine",
+          "acc,none": 0.7757352941176471,
+          "acc_stderr,none": 0.025336848563332365
+        },
+        "mmlu_virology": {
+          "alias": "  - virology",
+          "acc,none": 0.5120481927710844,
+          "acc_stderr,none": 0.03891364495835817
+        },
+        "mmlu_social_sciences": {
+          "acc,none": 0.8202205882352941,
+          "acc_stderr,none": 0.007248431086566561,
+          "alias": " - social sciences"
+        },
+        "mmlu_econometrics": {
+          "alias": "  - econometrics",
+          "acc,none": 0.6578947368421053,
+          "acc_stderr,none": 0.04462917535336937
+        },
+        "mmlu_high_school_geography": {
+          "alias": "  - high_school_geography",
+          "acc,none": 0.8737373737373737,
+          "acc_stderr,none": 0.02366435940288024
+        },
+        "mmlu_high_school_government_and_politics": {
+          "alias": "  - high_school_government_and_politics",
+          "acc,none": 0.8756476683937824,
+          "acc_stderr,none": 0.023814477086593556
+        },
+        "mmlu_high_school_macroeconomics": {
+          "alias": "  - high_school_macroeconomics",
+          "acc,none": 0.8076923076923077,
+          "acc_stderr,none": 0.019982347208637292
+        },
+        "mmlu_high_school_microeconomics": {
+          "alias": "  - high_school_microeconomics",
+          "acc,none": 0.8991596638655462,
+          "acc_stderr,none": 0.019559663430480802
+        },
+        "mmlu_high_school_psychology": {
+          "alias": "  - high_school_psychology",
+          "acc,none": 0.9025,
+          "acc_stderr,none": 0.0148504449187799
+        },
+        "mmlu_human_sexuality": {
+          "alias": "  - human_sexuality",
+          "acc,none": 0.7862595419847328,
+          "acc_stderr,none": 0.035954616117746904
+        },
+        "mmlu_professional_psychology": {
+          "alias": "  - professional_psychology",
+          "acc,none": 0.7475,
+          "acc_stderr,none": 0.0217495282695941
+        },
+        "mmlu_public_relations": {
+          "alias": "  - public_relations",
+          "acc,none": 0.6818181818181818,
+          "acc_stderr,none": 0.04461272175910509
+        },
+        "mmlu_security_studies": {
+          "alias": "  - security_studies",
+          "acc,none": 0.7673469387755102,
+          "acc_stderr,none": 0.02704925791589618
+        },
+        "mmlu_sociology": {
+          "alias": "  - sociology",
+          "acc,none": 0.845771144278607,
+          "acc_stderr,none": 0.02553843336857833
+        },
+        "mmlu_us_foreign_policy": {
+          "alias": "  - us_foreign_policy",
+          "acc,none": 0.89,
+          "acc_stderr,none": 0.03144660377352203
+        },
+        "mmlu_stem": {
+          "acc,none": 0.7148747224865207,
+          "acc_stderr,none": 0.007751851248299227,
+          "alias": " - stem"
+        },
+        "mmlu_abstract_algebra": {
+          "alias": "  - abstract_algebra",
+          "acc,none": 0.47,
+          "acc_stderr,none": 0.050161355804659205
+        },
+        "mmlu_anatomy": {
+          "alias": "  - anatomy",
+          "acc,none": 0.6888888888888889,
+          "acc_stderr,none": 0.03999262876617723
+        },
+        "mmlu_astronomy": {
+          "alias": "  - astronomy",
+          "acc,none": 0.8421052631578947,
+          "acc_stderr,none": 0.02967416752010141
+        },
+        "mmlu_college_biology": {
+          "alias": "  - college_biology",
+          "acc,none": 0.8402777777777778,
+          "acc_stderr,none": 0.030635578972093267
+        },
+        "mmlu_college_chemistry": {
+          "alias": "  - college_chemistry",
+          "acc,none": 0.56,
+          "acc_stderr,none": 0.049888765156985884
+        },
+        "mmlu_college_computer_science": {
+          "alias": "  - college_computer_science",
+          "acc,none": 0.66,
+          "acc_stderr,none": 0.04760952285695237
+        },
+        "mmlu_college_mathematics": {
+          "alias": "  - college_mathematics",
+          "acc,none": 0.52,
+          "acc_stderr,none": 0.050211673156867795
+        },
+        "mmlu_college_physics": {
+          "alias": "  - college_physics",
+          "acc,none": 0.5686274509803921,
+          "acc_stderr,none": 0.04928099597287534
+        },
+        "mmlu_computer_security": {
+          "alias": "  - computer_security",
+          "acc,none": 0.83,
+          "acc_stderr,none": 0.0377525168068637
+        },
+        "mmlu_conceptual_physics": {
+          "alias": "  - conceptual_physics",
+          "acc,none": 0.7957446808510639,
+          "acc_stderr,none": 0.026355158413349428
+        },
+        "mmlu_electrical_engineering": {
+          "alias": "  - electrical_engineering",
+          "acc,none": 0.7517241379310344,
+          "acc_stderr,none": 0.036001056927277716
+        },
+        "mmlu_elementary_mathematics": {
+          "alias": "  - elementary_mathematics",
+          "acc,none": 0.7116402116402116,
+          "acc_stderr,none": 0.023330654054535903
+        },
+        "mmlu_high_school_biology": {
+          "alias": "  - high_school_biology",
+          "acc,none": 0.9161290322580645,
+          "acc_stderr,none": 0.015769027496775653
+        },
+        "mmlu_high_school_chemistry": {
+          "alias": "  - high_school_chemistry",
+          "acc,none": 0.7192118226600985,
+          "acc_stderr,none": 0.03161856335358611
+        },
+        "mmlu_high_school_computer_science": {
+          "alias": "  - high_school_computer_science",
+          "acc,none": 0.87,
+          "acc_stderr,none": 0.03379976689896309
+        },
+        "mmlu_high_school_mathematics": {
+          "alias": "  - high_school_mathematics",
+          "acc,none": 0.5222222222222223,
+          "acc_stderr,none": 0.030455413985678408
+        },
+        "mmlu_high_school_physics": {
+          "alias": "  - high_school_physics",
+          "acc,none": 0.6754966887417219,
+          "acc_stderr,none": 0.038227469376587525
+        },
+        "mmlu_high_school_statistics": {
+          "alias": "  - high_school_statistics",
+          "acc,none": 0.7222222222222222,
+          "acc_stderr,none": 0.030546745264953185
+        },
+        "mmlu_machine_learning": {
+          "alias": "  - machine_learning",
+          "acc,none": 0.6160714285714286,
+          "acc_stderr,none": 0.04616143075028546
+        },
+        "winogrande": {
+          "alias": "winogrande",
+          "acc,none": 0.7375,
+          "acc_stderr,none": 0.022027196108925243
+        }
+      },
+      "groups": {
+        "kmmlu": {
+          "acc,none": 0.47556707712248864,
+          "acc_stderr,none": 0.003918983222456166,
+          "alias": "kmmlu"
+        },
+        "kmmlu_applied_science": {
+          "acc,none": 0.45875,
+          "acc_stderr,none": 0.007101063857525891,
+          "alias": " - kmmlu_applied_science"
+        },
+        "kmmlu_humss": {
+          "acc,none": 0.4805860805860806,
+          "acc_stderr,none": 0.009419825503999339,
+          "alias": " - kmmlu_humss"
+        },
+        "kmmlu_other": {
+          "acc,none": 0.4772222222222222,
+          "acc_stderr,none": 0.008073884461069719,
+          "alias": " - kmmlu_other"
+        },
+        "kmmlu_stem": {
+          "acc,none": 0.4897674418604651,
+          "acc_stderr,none": 0.007312394370135803,
+          "alias": " - kmmlu_stem"
+        },
+        "mmlu": {
+          "acc,none": 0.7404266255461321,
+          "acc_stderr,none": 0.003869340083262106,
+          "alias": "mmlu"
+        },
+        "mmlu_humanities": {
+          "acc,none": 0.6931079323797139,
+          "acc_stderr,none": 0.0077779673157217745,
+          "alias": " - humanities"
+        },
+        "mmlu_other": {
+          "acc,none": 0.7437591776798825,
+          "acc_stderr,none": 0.008056333552095894,
+          "alias": " - other"
+        },
+        "mmlu_social_sciences": {
+          "acc,none": 0.8202205882352941,
+          "acc_stderr,none": 0.007248431086566561,
+          "alias": " - social sciences"
+        },
+        "mmlu_stem": {
+          "acc,none": 0.7148747224865207,
+          "acc_stderr,none": 0.007751851248299227,
+          "alias": " - stem"
+        }
+      },
+      "group_subtasks": {
+        "mmlu_humanities": [
+          "mmlu_formal_logic",
+          "mmlu_high_school_european_history",
+          "mmlu_high_school_us_history",
+          "mmlu_high_school_world_history",
+          "mmlu_international_law",
+          "mmlu_jurisprudence",
+          "mmlu_logical_fallacies",
+          "mmlu_moral_disputes",
+          "mmlu_moral_scenarios",
+          "mmlu_philosophy",
+          "mmlu_prehistory",
+          "mmlu_professional_law",
+          "mmlu_world_religions"
+        ],
+        "mmlu_social_sciences": [
+          "mmlu_econometrics",
+          "mmlu_high_school_geography",
+          "mmlu_high_school_government_and_politics",
+          "mmlu_high_school_macroeconomics",
+          "mmlu_high_school_microeconomics",
+          "mmlu_high_school_psychology",
+          "mmlu_human_sexuality",
+          "mmlu_professional_psychology",
+          "mmlu_public_relations",
+          "mmlu_security_studies",
+          "mmlu_sociology",
+          "mmlu_us_foreign_policy"
+        ],
+        "mmlu_other": [
+          "mmlu_business_ethics",
+          "mmlu_clinical_knowledge",
+          "mmlu_college_medicine",
+          "mmlu_global_facts",
+          "mmlu_human_aging",
+          "mmlu_management",
+          "mmlu_marketing",
+          "mmlu_medical_genetics",
+          "mmlu_miscellaneous",
+          "mmlu_nutrition",
+          "mmlu_professional_accounting",
+          "mmlu_professional_medicine",
+          "mmlu_virology"
+        ],
+        "mmlu_stem": [
+          "mmlu_abstract_algebra",
+          "mmlu_anatomy",
+          "mmlu_astronomy",
+          "mmlu_college_biology",
+          "mmlu_college_chemistry",
+          "mmlu_college_computer_science",
+          "mmlu_college_mathematics",
+          "mmlu_college_physics",
+          "mmlu_computer_security",
+          "mmlu_conceptual_physics",
+          "mmlu_electrical_engineering",
+          "mmlu_elementary_mathematics",
+          "mmlu_high_school_biology",
+          "mmlu_high_school_chemistry",
+          "mmlu_high_school_computer_science",
+          "mmlu_high_school_mathematics",
+          "mmlu_high_school_physics",
+          "mmlu_high_school_statistics",
+          "mmlu_machine_learning"
+        ],
+        "mmlu": [
+          "mmlu_stem",
+          "mmlu_other",
+          "mmlu_social_sciences",
+          "mmlu_humanities"
+        ],
+        "hellaswag": [],
+        "arc_easy": [],
+        "arc_challenge": [],
+        "winogrande": [],
+        "kmmlu_humss": [
+          "kmmlu_accounting",
+          "kmmlu_criminal_law",
+          "kmmlu_economics",
+          "kmmlu_education",
+          "kmmlu_korean_history",
+          "kmmlu_law",
+          "kmmlu_management",
+          "kmmlu_political_science_and_sociology",
+          "kmmlu_psychology",
+          "kmmlu_social_welfare",
+          "kmmlu_taxation"
+        ],
+        "kmmlu_applied_science": [
+          "kmmlu_aviation_engineering_and_maintenance",
+          "kmmlu_electronics_engineering",
+          "kmmlu_energy_management",
+          "kmmlu_environmental_science",
+          "kmmlu_gas_technology_and_engineering",
+          "kmmlu_geomatics",
+          "kmmlu_industrial_engineer",
+          "kmmlu_machine_design_and_manufacturing",
+          "kmmlu_maritime_engineering",
+          "kmmlu_nondestructive_testing",
+          "kmmlu_railway_and_automotive_engineering",
+          "kmmlu_telecommunications_and_wireless_technology"
+        ],
+        "kmmlu_other": [
+          "kmmlu_agricultural_sciences",
+          "kmmlu_construction",
+          "kmmlu_fashion",
+          "kmmlu_food_processing",
+          "kmmlu_health",
+          "kmmlu_interior_architecture_and_design",
+          "kmmlu_marketing",
+          "kmmlu_patent",
+          "kmmlu_public_safety",
+          "kmmlu_real_estate",
+          "kmmlu_refrigerating_machinery"
+        ],
+        "kmmlu_stem": [
+          "kmmlu_biology",
+          "kmmlu_chemical_engineering",
+          "kmmlu_chemistry",
+          "kmmlu_civil_engineering",
+          "kmmlu_computer_science",
+          "kmmlu_ecology",
+          "kmmlu_electrical_engineering",
+          "kmmlu_information_technology",
+          "kmmlu_materials_engineering",
+          "kmmlu_math",
+          "kmmlu_mechanical_engineering"
+        ],
+        "kmmlu": [
+          "kmmlu_stem",
+          "kmmlu_other",
+          "kmmlu_applied_science",
+          "kmmlu_humss"
+        ],
+        "kobest_boolq": [],
+        "kobest_copa": [],
+        "kobest_hellaswag": []
+      },
+      "configs": {
+        "arc_challenge": {
+          "task": "arc_challenge",
+          "tag": [
+            "ai2_arc"
+          ],
+          "dataset_path": "allenai/ai2_arc",
+          "dataset_name": "ARC-Challenge",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "doc_to_text": "Question: {{question}}\nAnswer:",
+          "doc_to_target": "{{choices.label.index(answerKey)}}",
+          "unsafe_code": false,
+          "doc_to_choice": "{{choices.text}}",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "Question: {{question}}\nAnswer:",
+            "doc_to_choice": "{{choices.text}}",
+            "doc_to_target": "{{choices.label.index(answerKey)}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "acc_norm",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": true,
+          "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "arc_easy": {
+          "task": "arc_easy",
+          "tag": [
+            "ai2_arc"
+          ],
+          "dataset_path": "allenai/ai2_arc",
+          "dataset_name": "ARC-Easy",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "doc_to_text": "Question: {{question}}\nAnswer:",
+          "doc_to_target": "{{choices.label.index(answerKey)}}",
+          "unsafe_code": false,
+          "doc_to_choice": "{{choices.text}}",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "Question: {{question}}\nAnswer:",
+            "doc_to_choice": "{{choices.text}}",
+            "doc_to_target": "{{choices.label.index(answerKey)}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "acc_norm",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": true,
+          "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "hellaswag": {
+          "task": "hellaswag",
+          "tag": [
+            "multiple_choice"
+          ],
+          "dataset_path": "Rowan/hellaswag",
+          "training_split": "train",
+          "validation_split": "validation",
+          "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc):\n        ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n        out_doc = {\n            \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n            \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n            \"gold\": int(doc[\"label\"]),\n        }\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
+          "doc_to_text": "{{query}}",
+          "doc_to_target": "{{label}}",
+          "unsafe_code": false,
+          "doc_to_choice": "choices",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": "<function process_docs at 0x73f0acb5a700>",
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{query}}",
+            "doc_to_choice": "choices",
+            "doc_to_target": "{{label}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "acc_norm",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_accounting": {
+          "task": "kmmlu_accounting",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Accounting",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_agricultural_sciences": {
+          "task": "kmmlu_agricultural_sciences",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Agricultural-Sciences",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_aviation_engineering_and_maintenance": {
+          "task": "kmmlu_aviation_engineering_and_maintenance",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Aviation-Engineering-and-Maintenance",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_biology": {
+          "task": "kmmlu_biology",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Biology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_chemical_engineering": {
+          "task": "kmmlu_chemical_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Chemical-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_chemistry": {
+          "task": "kmmlu_chemistry",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Chemistry",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_civil_engineering": {
+          "task": "kmmlu_civil_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Civil-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_computer_science": {
+          "task": "kmmlu_computer_science",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Computer-Science",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_construction": {
+          "task": "kmmlu_construction",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Construction",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_criminal_law": {
+          "task": "kmmlu_criminal_law",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Criminal-Law",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_ecology": {
+          "task": "kmmlu_ecology",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Ecology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_economics": {
+          "task": "kmmlu_economics",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Economics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_education": {
+          "task": "kmmlu_education",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Education",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_electrical_engineering": {
+          "task": "kmmlu_electrical_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Electrical-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_electronics_engineering": {
+          "task": "kmmlu_electronics_engineering",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Electronics-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_energy_management": {
+          "task": "kmmlu_energy_management",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Energy-Management",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_environmental_science": {
+          "task": "kmmlu_environmental_science",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Environmental-Science",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_fashion": {
+          "task": "kmmlu_fashion",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Fashion",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_food_processing": {
+          "task": "kmmlu_food_processing",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Food-Processing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_gas_technology_and_engineering": {
+          "task": "kmmlu_gas_technology_and_engineering",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Gas-Technology-and-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_geomatics": {
+          "task": "kmmlu_geomatics",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Geomatics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_health": {
+          "task": "kmmlu_health",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Health",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_industrial_engineer": {
+          "task": "kmmlu_industrial_engineer",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Industrial-Engineer",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_information_technology": {
+          "task": "kmmlu_information_technology",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Information-Technology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_interior_architecture_and_design": {
+          "task": "kmmlu_interior_architecture_and_design",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Interior-Architecture-and-Design",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_korean_history": {
+          "task": "kmmlu_korean_history",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Korean-History",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_law": {
+          "task": "kmmlu_law",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Law",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_machine_design_and_manufacturing": {
+          "task": "kmmlu_machine_design_and_manufacturing",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Machine-Design-and-Manufacturing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_management": {
+          "task": "kmmlu_management",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Management",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_maritime_engineering": {
+          "task": "kmmlu_maritime_engineering",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Maritime-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_marketing": {
+          "task": "kmmlu_marketing",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Marketing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_materials_engineering": {
+          "task": "kmmlu_materials_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Materials-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_math": {
+          "task": "kmmlu_math",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Math",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_mechanical_engineering": {
+          "task": "kmmlu_mechanical_engineering",
+          "tag": "kmmlu_stem_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Mechanical-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_nondestructive_testing": {
+          "task": "kmmlu_nondestructive_testing",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Nondestructive-Testing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_patent": {
+          "task": "kmmlu_patent",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Patent",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_political_science_and_sociology": {
+          "task": "kmmlu_political_science_and_sociology",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Political-Science-and-Sociology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_psychology": {
+          "task": "kmmlu_psychology",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Psychology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_public_safety": {
+          "task": "kmmlu_public_safety",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Public-Safety",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_railway_and_automotive_engineering": {
+          "task": "kmmlu_railway_and_automotive_engineering",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Railway-and-Automotive-Engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_real_estate": {
+          "task": "kmmlu_real_estate",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Real-Estate",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_refrigerating_machinery": {
+          "task": "kmmlu_refrigerating_machinery",
+          "tag": "kmmlu_other_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Refrigerating-Machinery",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_social_welfare": {
+          "task": "kmmlu_social_welfare",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Social-Welfare",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_taxation": {
+          "task": "kmmlu_taxation",
+          "tag": "kmmlu_humss_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Taxation",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kmmlu_telecommunications_and_wireless_technology": {
+          "task": "kmmlu_telecommunications_and_wireless_technology",
+          "tag": "kmmlu_applied_science_tasks",
+          "dataset_path": "HAERAE-HUB/KMMLU",
+          "dataset_name": "Telecommunications-and-Wireless-Technology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+          "doc_to_target": "{{answer-1}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "{{answer-1}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 2.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kobest_boolq": {
+          "task": "kobest_boolq",
+          "dataset_path": "skt/kobest_v1",
+          "dataset_name": "boolq",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ",
+          "doc_to_target": "{{label}}",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "아니오",
+            "예"
+          ],
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ",
+            "doc_to_choice": [
+              "아니오",
+              "예"
+            ],
+            "doc_to_target": "{{label}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "f1",
+              "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+              "average": "macro",
+              "hf_evaluate": true,
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kobest_copa": {
+          "task": "kobest_copa",
+          "dataset_path": "skt/kobest_v1",
+          "dataset_name": "copa",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "doc_to_text": "def copa_doc_to_text(doc: dict) -> str:\n    connector = {\"원인\": \" 왜냐하면\", \"결과\": \" 그래서\"}[doc[\"question\"].strip()]\n    return f\"\"\"{doc[\"premise\"]} {connector}\"\"\"\n",
+          "doc_to_target": "def copa_doc_to_target(doc: dict) -> str:\n    correct_choice = doc[\"alternative_1\"] if doc[\"label\"] == 0 else doc[\"alternative_2\"]\n    return f\"\"\"{correct_choice}\"\"\"\n",
+          "unsafe_code": false,
+          "doc_to_choice": "def copa_doc_to_choice(doc: dict) -> list:\n    return [f\"\"\"{doc[\"alternative_1\"]}\"\"\", f\"\"\"{doc[\"alternative_2\"]}\"\"\"]\n",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "<function copa_doc_to_text at 0x73f083a685e0>",
+            "doc_to_choice": "<function copa_doc_to_choice at 0x73f083a69120>",
+            "doc_to_target": "<function copa_doc_to_target at 0x73f083a68c20>",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "f1",
+              "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+              "average": "macro",
+              "hf_evaluate": true,
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "kobest_hellaswag": {
+          "task": "kobest_hellaswag",
+          "dataset_path": "skt/kobest_v1",
+          "dataset_name": "hellaswag",
+          "training_split": "train",
+          "validation_split": "validation",
+          "test_split": "test",
+          "process_docs": "def hellaswag_process_doc(doc: Dataset) -> Dataset:\n    def preprocessor(dataset):\n        return {\n            \"query\": f\"\"\"문장: {dataset[\"context\"]}\"\"\",\n            \"choices\": [\n                dataset[\"ending_1\"],\n                dataset[\"ending_2\"],\n                dataset[\"ending_3\"],\n                dataset[\"ending_4\"],\n            ],\n            \"gold\": int(dataset[\"label\"]),\n        }\n\n    return doc.map(preprocessor)\n",
+          "doc_to_text": "{{query}}",
+          "doc_to_target": "{{label}}",
+          "unsafe_code": false,
+          "doc_to_choice": "choices",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": "<function hellaswag_process_doc at 0x73f083a69940>",
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{query}}",
+            "doc_to_choice": "choices",
+            "doc_to_target": "{{label}}",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "acc_norm",
+              "aggregation": "mean",
+              "higher_is_better": true
+            },
+            {
+              "metric": "f1",
+              "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+              "average": "macro",
+              "hf_evaluate": true,
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_abstract_algebra": {
+          "task": "mmlu_abstract_algebra",
+          "task_alias": "abstract_algebra",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "abstract_algebra",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_anatomy": {
+          "task": "mmlu_anatomy",
+          "task_alias": "anatomy",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "anatomy",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_astronomy": {
+          "task": "mmlu_astronomy",
+          "task_alias": "astronomy",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "astronomy",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_business_ethics": {
+          "task": "mmlu_business_ethics",
+          "task_alias": "business_ethics",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "business_ethics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_clinical_knowledge": {
+          "task": "mmlu_clinical_knowledge",
+          "task_alias": "clinical_knowledge",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "clinical_knowledge",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_biology": {
+          "task": "mmlu_college_biology",
+          "task_alias": "college_biology",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_biology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_chemistry": {
+          "task": "mmlu_college_chemistry",
+          "task_alias": "college_chemistry",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_chemistry",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_computer_science": {
+          "task": "mmlu_college_computer_science",
+          "task_alias": "college_computer_science",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_computer_science",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_mathematics": {
+          "task": "mmlu_college_mathematics",
+          "task_alias": "college_mathematics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_mathematics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_medicine": {
+          "task": "mmlu_college_medicine",
+          "task_alias": "college_medicine",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_medicine",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_college_physics": {
+          "task": "mmlu_college_physics",
+          "task_alias": "college_physics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "college_physics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_computer_security": {
+          "task": "mmlu_computer_security",
+          "task_alias": "computer_security",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "computer_security",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_conceptual_physics": {
+          "task": "mmlu_conceptual_physics",
+          "task_alias": "conceptual_physics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "conceptual_physics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_econometrics": {
+          "task": "mmlu_econometrics",
+          "task_alias": "econometrics",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "econometrics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_electrical_engineering": {
+          "task": "mmlu_electrical_engineering",
+          "task_alias": "electrical_engineering",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "electrical_engineering",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_elementary_mathematics": {
+          "task": "mmlu_elementary_mathematics",
+          "task_alias": "elementary_mathematics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "elementary_mathematics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_formal_logic": {
+          "task": "mmlu_formal_logic",
+          "task_alias": "formal_logic",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "formal_logic",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_global_facts": {
+          "task": "mmlu_global_facts",
+          "task_alias": "global_facts",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "global_facts",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_biology": {
+          "task": "mmlu_high_school_biology",
+          "task_alias": "high_school_biology",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_biology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_chemistry": {
+          "task": "mmlu_high_school_chemistry",
+          "task_alias": "high_school_chemistry",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_chemistry",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_computer_science": {
+          "task": "mmlu_high_school_computer_science",
+          "task_alias": "high_school_computer_science",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_computer_science",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_european_history": {
+          "task": "mmlu_high_school_european_history",
+          "task_alias": "high_school_european_history",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_european_history",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_geography": {
+          "task": "mmlu_high_school_geography",
+          "task_alias": "high_school_geography",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_geography",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_government_and_politics": {
+          "task": "mmlu_high_school_government_and_politics",
+          "task_alias": "high_school_government_and_politics",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_government_and_politics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_macroeconomics": {
+          "task": "mmlu_high_school_macroeconomics",
+          "task_alias": "high_school_macroeconomics",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_macroeconomics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_mathematics": {
+          "task": "mmlu_high_school_mathematics",
+          "task_alias": "high_school_mathematics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_mathematics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_microeconomics": {
+          "task": "mmlu_high_school_microeconomics",
+          "task_alias": "high_school_microeconomics",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_microeconomics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_physics": {
+          "task": "mmlu_high_school_physics",
+          "task_alias": "high_school_physics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_physics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_psychology": {
+          "task": "mmlu_high_school_psychology",
+          "task_alias": "high_school_psychology",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_psychology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_statistics": {
+          "task": "mmlu_high_school_statistics",
+          "task_alias": "high_school_statistics",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_statistics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_us_history": {
+          "task": "mmlu_high_school_us_history",
+          "task_alias": "high_school_us_history",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_us_history",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_high_school_world_history": {
+          "task": "mmlu_high_school_world_history",
+          "task_alias": "high_school_world_history",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "high_school_world_history",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_human_aging": {
+          "task": "mmlu_human_aging",
+          "task_alias": "human_aging",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "human_aging",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_human_sexuality": {
+          "task": "mmlu_human_sexuality",
+          "task_alias": "human_sexuality",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "human_sexuality",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_international_law": {
+          "task": "mmlu_international_law",
+          "task_alias": "international_law",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "international_law",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about international law.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_jurisprudence": {
+          "task": "mmlu_jurisprudence",
+          "task_alias": "jurisprudence",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "jurisprudence",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_logical_fallacies": {
+          "task": "mmlu_logical_fallacies",
+          "task_alias": "logical_fallacies",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "logical_fallacies",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_machine_learning": {
+          "task": "mmlu_machine_learning",
+          "task_alias": "machine_learning",
+          "tag": "mmlu_stem_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "machine_learning",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_management": {
+          "task": "mmlu_management",
+          "task_alias": "management",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "management",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about management.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_marketing": {
+          "task": "mmlu_marketing",
+          "task_alias": "marketing",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "marketing",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_medical_genetics": {
+          "task": "mmlu_medical_genetics",
+          "task_alias": "medical_genetics",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "medical_genetics",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_miscellaneous": {
+          "task": "mmlu_miscellaneous",
+          "task_alias": "miscellaneous",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "miscellaneous",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_moral_disputes": {
+          "task": "mmlu_moral_disputes",
+          "task_alias": "moral_disputes",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "moral_disputes",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_moral_scenarios": {
+          "task": "mmlu_moral_scenarios",
+          "task_alias": "moral_scenarios",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "moral_scenarios",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_nutrition": {
+          "task": "mmlu_nutrition",
+          "task_alias": "nutrition",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "nutrition",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_philosophy": {
+          "task": "mmlu_philosophy",
+          "task_alias": "philosophy",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "philosophy",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_prehistory": {
+          "task": "mmlu_prehistory",
+          "task_alias": "prehistory",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "prehistory",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_professional_accounting": {
+          "task": "mmlu_professional_accounting",
+          "task_alias": "professional_accounting",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "professional_accounting",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_professional_law": {
+          "task": "mmlu_professional_law",
+          "task_alias": "professional_law",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "professional_law",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_professional_medicine": {
+          "task": "mmlu_professional_medicine",
+          "task_alias": "professional_medicine",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "professional_medicine",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_professional_psychology": {
+          "task": "mmlu_professional_psychology",
+          "task_alias": "professional_psychology",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "professional_psychology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_public_relations": {
+          "task": "mmlu_public_relations",
+          "task_alias": "public_relations",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "public_relations",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_security_studies": {
+          "task": "mmlu_security_studies",
+          "task_alias": "security_studies",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "security_studies",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_sociology": {
+          "task": "mmlu_sociology",
+          "task_alias": "sociology",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "sociology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_us_foreign_policy": {
+          "task": "mmlu_us_foreign_policy",
+          "task_alias": "us_foreign_policy",
+          "tag": "mmlu_social_sciences_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "us_foreign_policy",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_virology": {
+          "task": "mmlu_virology",
+          "task_alias": "virology",
+          "tag": "mmlu_other_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "virology",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about virology.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "mmlu_world_religions": {
+          "task": "mmlu_world_religions",
+          "task_alias": "world_religions",
+          "tag": "mmlu_humanities_tasks",
+          "dataset_path": "cais/mmlu",
+          "dataset_name": "world_religions",
+          "test_split": "test",
+          "fewshot_split": "dev",
+          "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+          "doc_to_target": "answer",
+          "unsafe_code": false,
+          "doc_to_choice": [
+            "A",
+            "B",
+            "C",
+            "D"
+          ],
+          "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "first_n",
+            "split": "dev",
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+            "doc_to_choice": [
+              "A",
+              "B",
+              "C",
+              "D"
+            ],
+            "doc_to_target": "answer",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": false,
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        },
+        "winogrande": {
+          "task": "winogrande",
+          "dataset_path": "allenai/winogrande",
+          "dataset_name": "winogrande_xl",
+          "training_split": "train",
+          "validation_split": "validation",
+          "doc_to_text": "def doc_to_text(doc):\n    answer_to_num = {\"1\": 0, \"2\": 1}\n    return answer_to_num[doc[\"answer\"]]\n",
+          "doc_to_target": "def doc_to_target(doc):\n    idx = doc[\"sentence\"].index(\"_\") + 1\n    return doc[\"sentence\"][idx:].strip()\n",
+          "unsafe_code": false,
+          "doc_to_choice": "def doc_to_choice(doc):\n    idx = doc[\"sentence\"].index(\"_\")\n    options = [doc[\"option1\"], doc[\"option2\"]]\n    return [doc[\"sentence\"][:idx] + opt for opt in options]\n",
+          "description": "",
+          "target_delimiter": " ",
+          "fewshot_delimiter": "\n\n",
+          "fewshot_config": {
+            "sampler": "default",
+            "split": null,
+            "process_docs": null,
+            "fewshot_indices": null,
+            "samples": null,
+            "doc_to_text": "<function doc_to_text at 0x73f093d3ca40>",
+            "doc_to_choice": "<function doc_to_choice at 0x73f093d3d080>",
+            "doc_to_target": "<function doc_to_target at 0x73f093d3ce00>",
+            "gen_prefix": null,
+            "fewshot_delimiter": "\n\n",
+            "target_delimiter": " "
+          },
+          "num_fewshot": 0,
+          "metric_list": [
+            {
+              "metric": "acc",
+              "aggregation": "mean",
+              "higher_is_better": true
+            }
+          ],
+          "output_type": "multiple_choice",
+          "repeats": 1,
+          "should_decontaminate": true,
+          "doc_to_decontamination_query": "sentence",
+          "metadata": {
+            "version": 1.0,
+            "pretrained": "unsloth/Qwen3-4B-Base",
+            "trust_remote_code": true
+          }
+        }
+      },
+      "versions": {
+        "arc_challenge": 1.0,
+        "arc_easy": 1.0,
+        "hellaswag": 1.0,
+        "kmmlu": 2.0,
+        "kmmlu_accounting": 2.0,
+        "kmmlu_agricultural_sciences": 2.0,
+        "kmmlu_applied_science": 2.0,
+        "kmmlu_aviation_engineering_and_maintenance": 2.0,
+        "kmmlu_biology": 2.0,
+        "kmmlu_chemical_engineering": 2.0,
+        "kmmlu_chemistry": 2.0,
+        "kmmlu_civil_engineering": 2.0,
+        "kmmlu_computer_science": 2.0,
+        "kmmlu_construction": 2.0,
+        "kmmlu_criminal_law": 2.0,
+        "kmmlu_ecology": 2.0,
+        "kmmlu_economics": 2.0,
+        "kmmlu_education": 2.0,
+        "kmmlu_electrical_engineering": 2.0,
+        "kmmlu_electronics_engineering": 2.0,
+        "kmmlu_energy_management": 2.0,
+        "kmmlu_environmental_science": 2.0,
+        "kmmlu_fashion": 2.0,
+        "kmmlu_food_processing": 2.0,
+        "kmmlu_gas_technology_and_engineering": 2.0,
+        "kmmlu_geomatics": 2.0,
+        "kmmlu_health": 2.0,
+        "kmmlu_humss": 2.0,
+        "kmmlu_industrial_engineer": 2.0,
+        "kmmlu_information_technology": 2.0,
+        "kmmlu_interior_architecture_and_design": 2.0,
+        "kmmlu_korean_history": 2.0,
+        "kmmlu_law": 2.0,
+        "kmmlu_machine_design_and_manufacturing": 2.0,
+        "kmmlu_management": 2.0,
+        "kmmlu_maritime_engineering": 2.0,
+        "kmmlu_marketing": 2.0,
+        "kmmlu_materials_engineering": 2.0,
+        "kmmlu_math": 2.0,
+        "kmmlu_mechanical_engineering": 2.0,
+        "kmmlu_nondestructive_testing": 2.0,
+        "kmmlu_other": 2.0,
+        "kmmlu_patent": 2.0,
+        "kmmlu_political_science_and_sociology": 2.0,
+        "kmmlu_psychology": 2.0,
+        "kmmlu_public_safety": 2.0,
+        "kmmlu_railway_and_automotive_engineering": 2.0,
+        "kmmlu_real_estate": 2.0,
+        "kmmlu_refrigerating_machinery": 2.0,
+        "kmmlu_social_welfare": 2.0,
+        "kmmlu_stem": 2.0,
+        "kmmlu_taxation": 2.0,
+        "kmmlu_telecommunications_and_wireless_technology": 2.0,
+        "kobest_boolq": 1.0,
+        "kobest_copa": 1.0,
+        "kobest_hellaswag": 1.0,
+        "mmlu": 2,
+        "mmlu_abstract_algebra": 1.0,
+        "mmlu_anatomy": 1.0,
+        "mmlu_astronomy": 1.0,
+        "mmlu_business_ethics": 1.0,
+        "mmlu_clinical_knowledge": 1.0,
+        "mmlu_college_biology": 1.0,
+        "mmlu_college_chemistry": 1.0,
+        "mmlu_college_computer_science": 1.0,
+        "mmlu_college_mathematics": 1.0,
+        "mmlu_college_medicine": 1.0,
+        "mmlu_college_physics": 1.0,
+        "mmlu_computer_security": 1.0,
+        "mmlu_conceptual_physics": 1.0,
+        "mmlu_econometrics": 1.0,
+        "mmlu_electrical_engineering": 1.0,
+        "mmlu_elementary_mathematics": 1.0,
+        "mmlu_formal_logic": 1.0,
+        "mmlu_global_facts": 1.0,
+        "mmlu_high_school_biology": 1.0,
+        "mmlu_high_school_chemistry": 1.0,
+        "mmlu_high_school_computer_science": 1.0,
+        "mmlu_high_school_european_history": 1.0,
+        "mmlu_high_school_geography": 1.0,
+        "mmlu_high_school_government_and_politics": 1.0,
+        "mmlu_high_school_macroeconomics": 1.0,
+        "mmlu_high_school_mathematics": 1.0,
+        "mmlu_high_school_microeconomics": 1.0,
+        "mmlu_high_school_physics": 1.0,
+        "mmlu_high_school_psychology": 1.0,
+        "mmlu_high_school_statistics": 1.0,
+        "mmlu_high_school_us_history": 1.0,
+        "mmlu_high_school_world_history": 1.0,
+        "mmlu_human_aging": 1.0,
+        "mmlu_human_sexuality": 1.0,
+        "mmlu_humanities": 2,
+        "mmlu_international_law": 1.0,
+        "mmlu_jurisprudence": 1.0,
+        "mmlu_logical_fallacies": 1.0,
+        "mmlu_machine_learning": 1.0,
+        "mmlu_management": 1.0,
+        "mmlu_marketing": 1.0,
+        "mmlu_medical_genetics": 1.0,
+        "mmlu_miscellaneous": 1.0,
+        "mmlu_moral_disputes": 1.0,
+        "mmlu_moral_scenarios": 1.0,
+        "mmlu_nutrition": 1.0,
+        "mmlu_other": 2,
+        "mmlu_philosophy": 1.0,
+        "mmlu_prehistory": 1.0,
+        "mmlu_professional_accounting": 1.0,
+        "mmlu_professional_law": 1.0,
+        "mmlu_professional_medicine": 1.0,
+        "mmlu_professional_psychology": 1.0,
+        "mmlu_public_relations": 1.0,
+        "mmlu_security_studies": 1.0,
+        "mmlu_social_sciences": 2,
+        "mmlu_sociology": 1.0,
+        "mmlu_stem": 2,
+        "mmlu_us_foreign_policy": 1.0,
+        "mmlu_virology": 1.0,
+        "mmlu_world_religions": 1.0,
+        "winogrande": 1.0
+      },
+      "n-shot": {
+        "arc_challenge": 0,
+        "arc_easy": 0,
+        "hellaswag": 0,
+        "kmmlu_accounting": 0,
+        "kmmlu_agricultural_sciences": 0,
+        "kmmlu_aviation_engineering_and_maintenance": 0,
+        "kmmlu_biology": 0,
+        "kmmlu_chemical_engineering": 0,
+        "kmmlu_chemistry": 0,
+        "kmmlu_civil_engineering": 0,
+        "kmmlu_computer_science": 0,
+        "kmmlu_construction": 0,
+        "kmmlu_criminal_law": 0,
+        "kmmlu_ecology": 0,
+        "kmmlu_economics": 0,
+        "kmmlu_education": 0,
+        "kmmlu_electrical_engineering": 0,
+        "kmmlu_electronics_engineering": 0,
+        "kmmlu_energy_management": 0,
+        "kmmlu_environmental_science": 0,
+        "kmmlu_fashion": 0,
+        "kmmlu_food_processing": 0,
+        "kmmlu_gas_technology_and_engineering": 0,
+        "kmmlu_geomatics": 0,
+        "kmmlu_health": 0,
+        "kmmlu_industrial_engineer": 0,
+        "kmmlu_information_technology": 0,
+        "kmmlu_interior_architecture_and_design": 0,
+        "kmmlu_korean_history": 0,
+        "kmmlu_law": 0,
+        "kmmlu_machine_design_and_manufacturing": 0,
+        "kmmlu_management": 0,
+        "kmmlu_maritime_engineering": 0,
+        "kmmlu_marketing": 0,
+        "kmmlu_materials_engineering": 0,
+        "kmmlu_math": 0,
+        "kmmlu_mechanical_engineering": 0,
+        "kmmlu_nondestructive_testing": 0,
+        "kmmlu_patent": 0,
+        "kmmlu_political_science_and_sociology": 0,
+        "kmmlu_psychology": 0,
+        "kmmlu_public_safety": 0,
+        "kmmlu_railway_and_automotive_engineering": 0,
+        "kmmlu_real_estate": 0,
+        "kmmlu_refrigerating_machinery": 0,
+        "kmmlu_social_welfare": 0,
+        "kmmlu_taxation": 0,
+        "kmmlu_telecommunications_and_wireless_technology": 0,
+        "kobest_boolq": 0,
+        "kobest_copa": 0,
+        "kobest_hellaswag": 0,
+        "mmlu_abstract_algebra": 0,
+        "mmlu_anatomy": 0,
+        "mmlu_astronomy": 0,
+        "mmlu_business_ethics": 0,
+        "mmlu_clinical_knowledge": 0,
+        "mmlu_college_biology": 0,
+        "mmlu_college_chemistry": 0,
+        "mmlu_college_computer_science": 0,
+        "mmlu_college_mathematics": 0,
+        "mmlu_college_medicine": 0,
+        "mmlu_college_physics": 0,
+        "mmlu_computer_security": 0,
+        "mmlu_conceptual_physics": 0,
+        "mmlu_econometrics": 0,
+        "mmlu_electrical_engineering": 0,
+        "mmlu_elementary_mathematics": 0,
+        "mmlu_formal_logic": 0,
+        "mmlu_global_facts": 0,
+        "mmlu_high_school_biology": 0,
+        "mmlu_high_school_chemistry": 0,
+        "mmlu_high_school_computer_science": 0,
+        "mmlu_high_school_european_history": 0,
+        "mmlu_high_school_geography": 0,
+        "mmlu_high_school_government_and_politics": 0,
+        "mmlu_high_school_macroeconomics": 0,
+        "mmlu_high_school_mathematics": 0,
+        "mmlu_high_school_microeconomics": 0,
+        "mmlu_high_school_physics": 0,
+        "mmlu_high_school_psychology": 0,
+        "mmlu_high_school_statistics": 0,
+        "mmlu_high_school_us_history": 0,
+        "mmlu_high_school_world_history": 0,
+        "mmlu_human_aging": 0,
+        "mmlu_human_sexuality": 0,
+        "mmlu_international_law": 0,
+        "mmlu_jurisprudence": 0,
+        "mmlu_logical_fallacies": 0,
+        "mmlu_machine_learning": 0,
+        "mmlu_management": 0,
+        "mmlu_marketing": 0,
+        "mmlu_medical_genetics": 0,
+        "mmlu_miscellaneous": 0,
+        "mmlu_moral_disputes": 0,
+        "mmlu_moral_scenarios": 0,
+        "mmlu_nutrition": 0,
+        "mmlu_philosophy": 0,
+        "mmlu_prehistory": 0,
+        "mmlu_professional_accounting": 0,
+        "mmlu_professional_law": 0,
+        "mmlu_professional_medicine": 0,
+        "mmlu_professional_psychology": 0,
+        "mmlu_public_relations": 0,
+        "mmlu_security_studies": 0,
+        "mmlu_sociology": 0,
+        "mmlu_us_foreign_policy": 0,
+        "mmlu_virology": 0,
+        "mmlu_world_religions": 0,
+        "winogrande": 0
+      },
+      "higher_is_better": {
+        "arc_challenge": {
+          "acc": true,
+          "acc_norm": true
+        },
+        "arc_easy": {
+          "acc": true,
+          "acc_norm": true
+        },
+        "hellaswag": {
+          "acc": true,
+          "acc_norm": true
+        },
+        "kmmlu": {
+          "acc": true
+        },
+        "kmmlu_accounting": {
+          "acc": true
+        },
+        "kmmlu_agricultural_sciences": {
+          "acc": true
+        },
+        "kmmlu_applied_science": {
+          "acc": true
+        },
+        "kmmlu_aviation_engineering_and_maintenance": {
+          "acc": true
+        },
+        "kmmlu_biology": {
+          "acc": true
+        },
+        "kmmlu_chemical_engineering": {
+          "acc": true
+        },
+        "kmmlu_chemistry": {
+          "acc": true
+        },
+        "kmmlu_civil_engineering": {
+          "acc": true
+        },
+        "kmmlu_computer_science": {
+          "acc": true
+        },
+        "kmmlu_construction": {
+          "acc": true
+        },
+        "kmmlu_criminal_law": {
+          "acc": true
+        },
+        "kmmlu_ecology": {
+          "acc": true
+        },
+        "kmmlu_economics": {
+          "acc": true
+        },
+        "kmmlu_education": {
+          "acc": true
+        },
+        "kmmlu_electrical_engineering": {
+          "acc": true
+        },
+        "kmmlu_electronics_engineering": {
+          "acc": true
+        },
+        "kmmlu_energy_management": {
+          "acc": true
+        },
+        "kmmlu_environmental_science": {
+          "acc": true
+        },
+        "kmmlu_fashion": {
+          "acc": true
+        },
+        "kmmlu_food_processing": {
+          "acc": true
+        },
+        "kmmlu_gas_technology_and_engineering": {
+          "acc": true
+        },
+        "kmmlu_geomatics": {
+          "acc": true
+        },
+        "kmmlu_health": {
+          "acc": true
+        },
+        "kmmlu_humss": {
+          "acc": true
+        },
+        "kmmlu_industrial_engineer": {
+          "acc": true
+        },
+        "kmmlu_information_technology": {
+          "acc": true
+        },
+        "kmmlu_interior_architecture_and_design": {
+          "acc": true
+        },
+        "kmmlu_korean_history": {
+          "acc": true
+        },
+        "kmmlu_law": {
+          "acc": true
+        },
+        "kmmlu_machine_design_and_manufacturing": {
+          "acc": true
+        },
+        "kmmlu_management": {
+          "acc": true
+        },
+        "kmmlu_maritime_engineering": {
+          "acc": true
+        },
+        "kmmlu_marketing": {
+          "acc": true
+        },
+        "kmmlu_materials_engineering": {
+          "acc": true
+        },
+        "kmmlu_math": {
+          "acc": true
+        },
+        "kmmlu_mechanical_engineering": {
+          "acc": true
+        },
+        "kmmlu_nondestructive_testing": {
+          "acc": true
+        },
+        "kmmlu_other": {
+          "acc": true
+        },
+        "kmmlu_patent": {
+          "acc": true
+        },
+        "kmmlu_political_science_and_sociology": {
+          "acc": true
+        },
+        "kmmlu_psychology": {
+          "acc": true
+        },
+        "kmmlu_public_safety": {
+          "acc": true
+        },
+        "kmmlu_railway_and_automotive_engineering": {
+          "acc": true
+        },
+        "kmmlu_real_estate": {
+          "acc": true
+        },
+        "kmmlu_refrigerating_machinery": {
+          "acc": true
+        },
+        "kmmlu_social_welfare": {
+          "acc": true
+        },
+        "kmmlu_stem": {
+          "acc": true
+        },
+        "kmmlu_taxation": {
+          "acc": true
+        },
+        "kmmlu_telecommunications_and_wireless_technology": {
+          "acc": true
+        },
+        "kobest_boolq": {
+          "acc": true,
+          "f1": true
+        },
+        "kobest_copa": {
+          "acc": true,
+          "f1": true
+        },
+        "kobest_hellaswag": {
+          "acc": true,
+          "acc_norm": true,
+          "f1": true
+        },
+        "mmlu": {
+          "acc": true
+        },
+        "mmlu_abstract_algebra": {
+          "acc": true
+        },
+        "mmlu_anatomy": {
+          "acc": true
+        },
+        "mmlu_astronomy": {
+          "acc": true
+        },
+        "mmlu_business_ethics": {
+          "acc": true
+        },
+        "mmlu_clinical_knowledge": {
+          "acc": true
+        },
+        "mmlu_college_biology": {
+          "acc": true
+        },
+        "mmlu_college_chemistry": {
+          "acc": true
+        },
+        "mmlu_college_computer_science": {
+          "acc": true
+        },
+        "mmlu_college_mathematics": {
+          "acc": true
+        },
+        "mmlu_college_medicine": {
+          "acc": true
+        },
+        "mmlu_college_physics": {
+          "acc": true
+        },
+        "mmlu_computer_security": {
+          "acc": true
+        },
+        "mmlu_conceptual_physics": {
+          "acc": true
+        },
+        "mmlu_econometrics": {
+          "acc": true
+        },
+        "mmlu_electrical_engineering": {
+          "acc": true
+        },
+        "mmlu_elementary_mathematics": {
+          "acc": true
+        },
+        "mmlu_formal_logic": {
+          "acc": true
+        },
+        "mmlu_global_facts": {
+          "acc": true
+        },
+        "mmlu_high_school_biology": {
+          "acc": true
+        },
+        "mmlu_high_school_chemistry": {
+          "acc": true
+        },
+        "mmlu_high_school_computer_science": {
+          "acc": true
+        },
+        "mmlu_high_school_european_history": {
+          "acc": true
+        },
+        "mmlu_high_school_geography": {
+          "acc": true
+        },
+        "mmlu_high_school_government_and_politics": {
+          "acc": true
+        },
+        "mmlu_high_school_macroeconomics": {
+          "acc": true
+        },
+        "mmlu_high_school_mathematics": {
+          "acc": true
+        },
+        "mmlu_high_school_microeconomics": {
+          "acc": true
+        },
+        "mmlu_high_school_physics": {
+          "acc": true
+        },
+        "mmlu_high_school_psychology": {
+          "acc": true
+        },
+        "mmlu_high_school_statistics": {
+          "acc": true
+        },
+        "mmlu_high_school_us_history": {
+          "acc": true
+        },
+        "mmlu_high_school_world_history": {
+          "acc": true
+        },
+        "mmlu_human_aging": {
+          "acc": true
+        },
+        "mmlu_human_sexuality": {
+          "acc": true
+        },
+        "mmlu_humanities": {
+          "acc": true
+        },
+        "mmlu_international_law": {
+          "acc": true
+        },
+        "mmlu_jurisprudence": {
+          "acc": true
+        },
+        "mmlu_logical_fallacies": {
+          "acc": true
+        },
+        "mmlu_machine_learning": {
+          "acc": true
+        },
+        "mmlu_management": {
+          "acc": true
+        },
+        "mmlu_marketing": {
+          "acc": true
+        },
+        "mmlu_medical_genetics": {
+          "acc": true
+        },
+        "mmlu_miscellaneous": {
+          "acc": true
+        },
+        "mmlu_moral_disputes": {
+          "acc": true
+        },
+        "mmlu_moral_scenarios": {
+          "acc": true
+        },
+        "mmlu_nutrition": {
+          "acc": true
+        },
+        "mmlu_other": {
+          "acc": true
+        },
+        "mmlu_philosophy": {
+          "acc": true
+        },
+        "mmlu_prehistory": {
+          "acc": true
+        },
+        "mmlu_professional_accounting": {
+          "acc": true
+        },
+        "mmlu_professional_law": {
+          "acc": true
+        },
+        "mmlu_professional_medicine": {
+          "acc": true
+        },
+        "mmlu_professional_psychology": {
+          "acc": true
+        },
+        "mmlu_public_relations": {
+          "acc": true
+        },
+        "mmlu_security_studies": {
+          "acc": true
+        },
+        "mmlu_social_sciences": {
+          "acc": true
+        },
+        "mmlu_sociology": {
+          "acc": true
+        },
+        "mmlu_stem": {
+          "acc": true
+        },
+        "mmlu_us_foreign_policy": {
+          "acc": true
+        },
+        "mmlu_virology": {
+          "acc": true
+        },
+        "mmlu_world_religions": {
+          "acc": true
+        },
+        "winogrande": {
+          "acc": true
+        }
+      },
+      "n-samples": {
+        "kobest_hellaswag": {
+          "original": 500,
+          "effective": 400
+        },
+        "kobest_copa": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kobest_boolq": {
+          "original": 1404,
+          "effective": 400
+        },
+        "kmmlu_biology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_chemical_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_chemistry": {
+          "original": 600,
+          "effective": 400
+        },
+        "kmmlu_civil_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_computer_science": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_ecology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_electrical_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_information_technology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_materials_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_math": {
+          "original": 300,
+          "effective": 300
+        },
+        "kmmlu_mechanical_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_agricultural_sciences": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_construction": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_fashion": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_food_processing": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_health": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_interior_architecture_and_design": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_marketing": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_patent": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_public_safety": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_real_estate": {
+          "original": 200,
+          "effective": 200
+        },
+        "kmmlu_refrigerating_machinery": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_aviation_engineering_and_maintenance": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_electronics_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_energy_management": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_environmental_science": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_gas_technology_and_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_geomatics": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_industrial_engineer": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_machine_design_and_manufacturing": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_maritime_engineering": {
+          "original": 600,
+          "effective": 400
+        },
+        "kmmlu_nondestructive_testing": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_railway_and_automotive_engineering": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_telecommunications_and_wireless_technology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_accounting": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_criminal_law": {
+          "original": 200,
+          "effective": 200
+        },
+        "kmmlu_economics": {
+          "original": 130,
+          "effective": 130
+        },
+        "kmmlu_education": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_korean_history": {
+          "original": 100,
+          "effective": 100
+        },
+        "kmmlu_law": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_management": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_political_science_and_sociology": {
+          "original": 300,
+          "effective": 300
+        },
+        "kmmlu_psychology": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_social_welfare": {
+          "original": 1000,
+          "effective": 400
+        },
+        "kmmlu_taxation": {
+          "original": 200,
+          "effective": 200
+        },
+        "winogrande": {
+          "original": 1267,
+          "effective": 400
+        },
+        "arc_challenge": {
+          "original": 1172,
+          "effective": 400
+        },
+        "arc_easy": {
+          "original": 2376,
+          "effective": 400
+        },
+        "hellaswag": {
+          "original": 10042,
+          "effective": 400
+        },
+        "mmlu_abstract_algebra": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_anatomy": {
+          "original": 135,
+          "effective": 135
+        },
+        "mmlu_astronomy": {
+          "original": 152,
+          "effective": 152
+        },
+        "mmlu_college_biology": {
+          "original": 144,
+          "effective": 144
+        },
+        "mmlu_college_chemistry": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_college_computer_science": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_college_mathematics": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_college_physics": {
+          "original": 102,
+          "effective": 102
+        },
+        "mmlu_computer_security": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_conceptual_physics": {
+          "original": 235,
+          "effective": 235
+        },
+        "mmlu_electrical_engineering": {
+          "original": 145,
+          "effective": 145
+        },
+        "mmlu_elementary_mathematics": {
+          "original": 378,
+          "effective": 378
+        },
+        "mmlu_high_school_biology": {
+          "original": 310,
+          "effective": 310
+        },
+        "mmlu_high_school_chemistry": {
+          "original": 203,
+          "effective": 203
+        },
+        "mmlu_high_school_computer_science": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_high_school_mathematics": {
+          "original": 270,
+          "effective": 270
+        },
+        "mmlu_high_school_physics": {
+          "original": 151,
+          "effective": 151
+        },
+        "mmlu_high_school_statistics": {
+          "original": 216,
+          "effective": 216
+        },
+        "mmlu_machine_learning": {
+          "original": 112,
+          "effective": 112
+        },
+        "mmlu_business_ethics": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_clinical_knowledge": {
+          "original": 265,
+          "effective": 265
+        },
+        "mmlu_college_medicine": {
+          "original": 173,
+          "effective": 173
+        },
+        "mmlu_global_facts": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_human_aging": {
+          "original": 223,
+          "effective": 223
+        },
+        "mmlu_management": {
+          "original": 103,
+          "effective": 103
+        },
+        "mmlu_marketing": {
+          "original": 234,
+          "effective": 234
+        },
+        "mmlu_medical_genetics": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_miscellaneous": {
+          "original": 783,
+          "effective": 400
+        },
+        "mmlu_nutrition": {
+          "original": 306,
+          "effective": 306
+        },
+        "mmlu_professional_accounting": {
+          "original": 282,
+          "effective": 282
+        },
+        "mmlu_professional_medicine": {
+          "original": 272,
+          "effective": 272
+        },
+        "mmlu_virology": {
+          "original": 166,
+          "effective": 166
+        },
+        "mmlu_econometrics": {
+          "original": 114,
+          "effective": 114
+        },
+        "mmlu_high_school_geography": {
+          "original": 198,
+          "effective": 198
+        },
+        "mmlu_high_school_government_and_politics": {
+          "original": 193,
+          "effective": 193
+        },
+        "mmlu_high_school_macroeconomics": {
+          "original": 390,
+          "effective": 390
+        },
+        "mmlu_high_school_microeconomics": {
+          "original": 238,
+          "effective": 238
+        },
+        "mmlu_high_school_psychology": {
+          "original": 545,
+          "effective": 400
+        },
+        "mmlu_human_sexuality": {
+          "original": 131,
+          "effective": 131
+        },
+        "mmlu_professional_psychology": {
+          "original": 612,
+          "effective": 400
+        },
+        "mmlu_public_relations": {
+          "original": 110,
+          "effective": 110
+        },
+        "mmlu_security_studies": {
+          "original": 245,
+          "effective": 245
+        },
+        "mmlu_sociology": {
+          "original": 201,
+          "effective": 201
+        },
+        "mmlu_us_foreign_policy": {
+          "original": 100,
+          "effective": 100
+        },
+        "mmlu_formal_logic": {
+          "original": 126,
+          "effective": 126
+        },
+        "mmlu_high_school_european_history": {
+          "original": 165,
+          "effective": 165
+        },
+        "mmlu_high_school_us_history": {
+          "original": 204,
+          "effective": 204
+        },
+        "mmlu_high_school_world_history": {
+          "original": 237,
+          "effective": 237
+        },
+        "mmlu_international_law": {
+          "original": 121,
+          "effective": 121
+        },
+        "mmlu_jurisprudence": {
+          "original": 108,
+          "effective": 108
+        },
+        "mmlu_logical_fallacies": {
+          "original": 163,
+          "effective": 163
+        },
+        "mmlu_moral_disputes": {
+          "original": 346,
+          "effective": 346
+        },
+        "mmlu_moral_scenarios": {
+          "original": 895,
+          "effective": 400
+        },
+        "mmlu_philosophy": {
+          "original": 311,
+          "effective": 311
+        },
+        "mmlu_prehistory": {
+          "original": 324,
+          "effective": 324
+        },
+        "mmlu_professional_law": {
+          "original": 1534,
+          "effective": 400
+        },
+        "mmlu_world_religions": {
+          "original": 171,
+          "effective": 171
+        }
+      },
+      "config": {
+        "model": "hf",
+        "model_args": {
+          "pretrained": "unsloth/Qwen3-4B-Base",
+          "trust_remote_code": true
+        },
+        "model_num_parameters": 4022468096,
+        "model_dtype": "torch.bfloat16",
+        "model_revision": "main",
+        "model_sha": "0573b584bc6b32adc84bb9c91bf9b71bea71fc40",
+        "batch_size": "12",
+        "batch_sizes": [],
+        "device": "cuda:0",
+        "use_cache": null,
+        "limit": 400.0,
+        "bootstrap_iters": 100000,
+        "gen_kwargs": {},
+        "random_seed": 0,
+        "numpy_seed": 1234,
+        "torch_seed": 1234,
+        "fewshot_seed": 1234
+      },
+      "git_hash": "0ce43af",
+      "date": 1775962695.520946,
+      "pretty_env_info": "PyTorch version: 2.9.0+cu128\nIs debug build: False\nCUDA used to build PyTorch: 12.8\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.5 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 4.1.0\nLibc version: glibc-2.35\n\nPython version: 3.11.14 | packaged by conda-forge | (main, Oct 13 2025, 14:09:32) [GCC 14.3.0] (64-bit runtime)\nPython platform: Linux-6.8.0-90-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.8.93\nCUDA_MODULE_LOADING set to: \nGPU models and configuration: GPU 0: NVIDIA RTX PRO 6000 Blackwell Workstation Edition\nNvidia driver version: 590.48.01\ncuDNN version: Probably one of the following:\n/usr/lib/x86_64-linux-gnu/libcudnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_runtime_compiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_graph.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_heuristic.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops.so.9.8.0\nIs XPU available: False\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:                         x86_64\nCPU op-mode(s):                       32-bit, 64-bit\nAddress sizes:                        43 bits physical, 48 bits virtual\nByte Order:                           Little Endian\nCPU(s):                               192\nOn-line CPU(s) list:                  0-191\nVendor ID:                            AuthenticAMD\nModel name:                           AMD EPYC 7642 48-Core Processor\nCPU family:                           23\nModel:                                49\nThread(s) per core:                   2\nCore(s) per socket:                   48\nSocket(s):                            2\nStepping:                             0\nFrequency boost:                      enabled\nCPU max MHz:                          2300.0000\nCPU min MHz:                          1500.0000\nBogoMIPS:                             4600.15\nFlags:                                fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sev sev_es ibpb_exit_to_user\nVirtualization:                       AMD-V\nL1d cache:                            3 MiB (96 instances)\nL1i cache:                            3 MiB (96 instances)\nL2 cache:                             48 MiB (96 instances)\nL3 cache:                             512 MiB (32 instances)\nNUMA node(s):                         2\nNUMA node0 CPU(s):                    0-47,96-143\nNUMA node1 CPU(s):                    48-95,144-191\nVulnerability Gather data sampling:   Not affected\nVulnerability Itlb multihit:          Not affected\nVulnerability L1tf:                   Not affected\nVulnerability Mds:                    Not affected\nVulnerability Meltdown:               Not affected\nVulnerability Mmio stale data:        Not affected\nVulnerability Reg file data sampling: Not affected\nVulnerability Retbleed:               Mitigation; untrained return thunk; SMT enabled with STIBP protection\nVulnerability Spec rstack overflow:   Mitigation; Safe RET\nVulnerability Spec store bypass:      Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1:             Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:             Mitigation; Retpolines; IBPB conditional; STIBP always-on; RSB filling; PBRSB-eIBRS Not affected; BHI Not affected\nVulnerability Srbds:                  Not affected\nVulnerability Tsx async abort:        Not affected\nVulnerability Vmscape:                Mitigation; IBPB before exit to userspace\n\nVersions of relevant libraries:\n[pip3] executorch==1.0.1\n[pip3] numpy==2.2.6\n[pip3] nvidia-cublas-cu12==12.8.4.1\n[pip3] nvidia-cuda-cupti-cu12==12.8.90\n[pip3] nvidia-cuda-nvrtc-cu12==12.8.93\n[pip3] nvidia-cuda-runtime-cu12==12.8.90\n[pip3] nvidia-cudnn-cu12==9.10.2.21\n[pip3] nvidia-cudnn-frontend==1.17.0\n[pip3] nvidia-cufft-cu12==11.3.3.83\n[pip3] nvidia-curand-cu12==10.3.9.90\n[pip3] nvidia-cusolver-cu12==11.7.3.90\n[pip3] nvidia-cusparse-cu12==12.5.8.93\n[pip3] nvidia-cusparselt-cu12==0.7.1\n[pip3] nvidia-nccl-cu12==2.27.5\n[pip3] nvidia-nvjitlink-cu12==12.8.93\n[pip3] nvidia-nvtx-cu12==12.8.90\n[pip3] optree==0.17.0\n[pip3] pytorch_tokenizers==1.0.1\n[pip3] torch==2.9.0+cu128\n[pip3] torch_c_dlpack_ext==0.1.4\n[pip3] torch-stoi==0.2.3\n[pip3] torchao==0.14.0\n[pip3] torchaudio==2.9.0+cu128\n[pip3] torchcodec==0.9.1\n[pip3] torchelastic==0.2.2\n[pip3] torchvision==0.24.0+cu128\n[pip3] triton==3.5.0\n[pip3] triton_kernels==1.0.0\n[conda] No relevant packages",
+      "transformers_version": "5.5.3",
+      "lm_eval_version": "0.4.11",
+      "upper_git_hash": null,
+      "tokenizer_pad_token": [
+        "<|vision_pad|>",
+        "151654"
+      ],
+      "tokenizer_eos_token": [
+        "<|endoftext|>",
+        "151643"
+      ],
+      "tokenizer_bos_token": [
+        null,
+        "None"
+      ],
+      "eot_token_id": 151643,
+      "max_length": 32768,
+      "task_hashes": {},
+      "model_source": "hf",
+      "model_name": "unsloth/Qwen3-4B-Base",
+      "model_name_sanitized": "unsloth__Qwen3-4B-Base",
+      "system_instruction": null,
+      "system_instruction_sha": null,
+      "fewshot_as_multiturn": null,
+      "chat_template": null,
+      "chat_template_sha": null,
+      "total_evaluation_time_seconds": "573.7631184216589"
+    }
+  }
+}
\ No newline at end of file
diff --git a/eval/lm_eval/checkpoints/base/stdout.txt b/eval/lm_eval/checkpoints/base/stdout.txt
new file mode 100644
index 0000000..2190b6e
--- /dev/null
+++ b/eval/lm_eval/checkpoints/base/stdout.txt
@@ -0,0 +1,2765 @@
+2026-04-12:02:58:12 WARNING  [config.evaluate_config:281] --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.
+2026-04-12:02:58:15 INFO     [_cli.run:376] Selected Tasks: ['mmlu', 'hellaswag', 'arc_easy', 'arc_challenge', 'winogrande', 'kmmlu', 'kobest_boolq', 'kobest_copa', 'kobest_hellaswag']
+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+Unsloth: Your Flash Attention 2 installation seems to be broken. Using Xformers instead. No performance changes will be seen.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+
+Loading weights:   0%|          | 0/398 [00:00<?, ?it/s]
+Loading weights:   3%|▎         | 11/398 [00:00<00:03, 108.90it/s]
+Loading weights:  10%|▉         | 38/398 [00:00<00:01, 196.95it/s]
+Loading weights:  18%|█▊        | 70/398 [00:00<00:01, 249.86it/s]
+Loading weights:  26%|██▌       | 102/398 [00:00<00:01, 276.04it/s]
+Loading weights:  33%|███▎      | 130/398 [00:00<00:00, 274.76it/s]
+Loading weights:  40%|███▉      | 159/398 [00:00<00:00, 276.07it/s]
+Loading weights:  48%|████▊     | 191/398 [00:00<00:00, 285.91it/s]
+Loading weights:  56%|█████▌    | 223/398 [00:00<00:00, 295.40it/s]
+Loading weights:  64%|██████▎   | 253/398 [00:00<00:00, 294.55it/s]
+Loading weights:  71%|███████   | 283/398 [00:01<00:00, 272.75it/s]
+Loading weights:  78%|███████▊  | 312/398 [00:01<00:00, 275.62it/s]
+Loading weights:  86%|████████▋ | 344/398 [00:01<00:00, 287.59it/s]
+Loading weights:  94%|█████████▎| 373/398 [00:01<00:00, 272.03it/s]
+Loading weights: 100%|██████████| 398/398 [00:01<00:00, 272.49it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 52%|█████▏    | 207/400 [00:00<00:00, 2069.61it/s]
+100%|██████████| 400/400 [00:00<00:00, 2014.80it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+100%|██████████| 400/400 [00:00<00:00, 103073.15it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 44%|████▎     | 174/400 [00:00<00:00, 1736.87it/s]
+ 87%|████████▋ | 348/400 [00:00<00:00, 1721.70it/s]
+100%|██████████| 400/400 [00:00<00:00, 1728.50it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1067.68it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1032.79it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1051.33it/s]
+100%|██████████| 400/400 [00:00<00:00, 1052.72it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1070.41it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1074.91it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1071.79it/s]
+100%|██████████| 400/400 [00:00<00:00, 1071.97it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1064.54it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1068.35it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1067.37it/s]
+100%|██████████| 400/400 [00:00<00:00, 1066.59it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1065.38it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1061.99it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1069.84it/s]
+100%|██████████| 400/400 [00:00<00:00, 1068.02it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 105/400 [00:00<00:00, 1047.46it/s]
+ 53%|█████▎    | 212/400 [00:00<00:00, 1057.02it/s]
+ 80%|███████▉  | 318/400 [00:00<00:00, 1058.23it/s]
+100%|██████████| 400/400 [00:00<00:00, 1060.51it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1056.42it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1067.06it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1072.41it/s]
+100%|██████████| 400/400 [00:00<00:00, 1067.90it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1071.26it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1066.57it/s]
+ 81%|████████  | 323/400 [00:00<00:00, 1066.86it/s]
+100%|██████████| 400/400 [00:00<00:00, 1066.13it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1054.38it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1064.84it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1061.65it/s]
+100%|██████████| 400/400 [00:00<00:00, 1059.75it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1062.35it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1057.25it/s]
+ 80%|████████  | 320/400 [00:00<00:00, 1051.50it/s]
+100%|██████████| 400/400 [00:00<00:00, 1052.38it/s]
+
+  0%|          | 0/300 [00:00<?, ?it/s]
+ 36%|███▌      | 108/300 [00:00<00:00, 1075.51it/s]
+ 72%|███████▏  | 216/300 [00:00<00:00, 1076.09it/s]
+100%|██████████| 300/300 [00:00<00:00, 1075.06it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1055.08it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1060.89it/s]
+ 80%|████████  | 320/400 [00:00<00:00, 1061.74it/s]
+100%|██████████| 400/400 [00:00<00:00, 1062.34it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1073.18it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1069.30it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1071.88it/s]
+100%|██████████| 400/400 [00:00<00:00, 1071.01it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1072.30it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1070.79it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1074.50it/s]
+100%|██████████| 400/400 [00:00<00:00, 1073.42it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1049.54it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1056.31it/s]
+ 80%|███████▉  | 319/400 [00:00<00:00, 1050.97it/s]
+100%|██████████| 400/400 [00:00<00:00, 1052.53it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1076.47it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1067.72it/s]
+ 81%|████████▏ | 325/400 [00:00<00:00, 1074.15it/s]
+100%|██████████| 400/400 [00:00<00:00, 1070.97it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1055.25it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1072.55it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1074.06it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1073.42it/s]
+100%|██████████| 400/400 [00:00<00:00, 1074.17it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1072.71it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1073.57it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1075.16it/s]
+100%|██████████| 400/400 [00:00<00:00, 1074.41it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1070.94it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1074.61it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1075.62it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1065.64it/s]
+100%|██████████| 400/400 [00:00<00:00, 1068.86it/s]
+
+  0%|          | 0/200 [00:00<?, ?it/s]
+ 54%|█████▍    | 108/200 [00:00<00:00, 1072.05it/s]
+100%|██████████| 200/200 [00:00<00:00, 1070.18it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1051.35it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1062.18it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1068.55it/s]
+100%|██████████| 400/400 [00:00<00:00, 1063.34it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1073.55it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1076.06it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1071.33it/s]
+100%|██████████| 400/400 [00:00<00:00, 1071.01it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1068.89it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1065.41it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1065.68it/s]
+100%|██████████| 400/400 [00:00<00:00, 1066.56it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1068.75it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1068.70it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1065.91it/s]
+100%|██████████| 400/400 [00:00<00:00, 1066.62it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 105/400 [00:00<00:00, 1047.23it/s]
+ 52%|█████▎    | 210/400 [00:00<00:00, 1034.20it/s]
+ 79%|███████▉  | 316/400 [00:00<00:00, 1041.84it/s]
+100%|██████████| 400/400 [00:00<00:00, 1043.78it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1058.88it/s]
+ 53%|█████▎    | 212/400 [00:00<00:00, 1058.73it/s]
+ 80%|███████▉  | 319/400 [00:00<00:00, 1060.92it/s]
+100%|██████████| 400/400 [00:00<00:00, 1060.37it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1050.24it/s]
+ 53%|█████▎    | 212/400 [00:00<00:00, 1053.99it/s]
+ 80%|███████▉  | 318/400 [00:00<00:00, 1056.08it/s]
+100%|██████████| 400/400 [00:00<00:00, 1054.37it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 105/400 [00:00<00:00, 1043.49it/s]
+ 52%|█████▎    | 210/400 [00:00<00:00, 1046.79it/s]
+ 79%|███████▉  | 316/400 [00:00<00:00, 1050.63it/s]
+100%|██████████| 400/400 [00:00<00:00, 1050.18it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1077.39it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1074.26it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1066.47it/s]
+100%|██████████| 400/400 [00:00<00:00, 1067.12it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1066.74it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1070.64it/s]
+ 81%|████████  | 323/400 [00:00<00:00, 1074.87it/s]
+100%|██████████| 400/400 [00:00<00:00, 1073.67it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1067.71it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1077.03it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1075.93it/s]
+100%|██████████| 400/400 [00:00<00:00, 1075.02it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1072.55it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1065.77it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1071.87it/s]
+100%|██████████| 400/400 [00:00<00:00, 1072.51it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1074.13it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1069.47it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1073.13it/s]
+100%|██████████| 400/400 [00:00<00:00, 1073.02it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1073.11it/s]
+
+  0%|          | 0/200 [00:00<?, ?it/s]
+ 54%|█████▍    | 108/200 [00:00<00:00, 1073.59it/s]
+100%|██████████| 200/200 [00:00<00:00, 1074.07it/s]
+
+  0%|          | 0/130 [00:00<?, ?it/s]
+ 82%|████████▏ | 107/130 [00:00<00:00, 1066.31it/s]
+100%|██████████| 130/130 [00:00<00:00, 1062.39it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1070.09it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1070.03it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1071.50it/s]
+ 54%|█████▍    | 217/400 [00:00<00:00, 1077.48it/s]
+ 82%|████████▏ | 326/400 [00:00<00:00, 1078.41it/s]
+100%|██████████| 400/400 [00:00<00:00, 1071.39it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1064.28it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1070.01it/s]
+ 81%|████████  | 323/400 [00:00<00:00, 1070.92it/s]
+100%|██████████| 400/400 [00:00<00:00, 1070.23it/s]
+
+  0%|          | 0/300 [00:00<?, ?it/s]
+ 35%|███▌      | 105/300 [00:00<00:00, 1043.95it/s]
+ 71%|███████   | 212/300 [00:00<00:00, 1057.66it/s]
+100%|██████████| 300/300 [00:00<00:00, 1060.85it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 108/400 [00:00<00:00, 1074.68it/s]
+ 54%|█████▍    | 216/400 [00:00<00:00, 1076.12it/s]
+ 81%|████████  | 324/400 [00:00<00:00, 1074.14it/s]
+100%|██████████| 400/400 [00:00<00:00, 1074.93it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 10%|▉         | 38/400 [00:01<00:09, 36.29it/s]
+ 36%|███▌      | 143/400 [00:01<00:01, 157.87it/s]
+ 62%|██████▎   | 250/400 [00:01<00:00, 291.19it/s]
+ 89%|████████▉ | 357/400 [00:01<00:00, 425.53it/s]
+100%|██████████| 400/400 [00:01<00:00, 287.97it/s]
+
+  0%|          | 0/200 [00:00<?, ?it/s]
+ 54%|█████▎    | 107/200 [00:00<00:00, 1068.91it/s]
+100%|██████████| 200/200 [00:00<00:00, 1066.63it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+100%|██████████| 400/400 [00:00<00:00, 74948.47it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 20%|█▉        | 78/400 [00:00<00:00, 772.62it/s]
+ 39%|███▉      | 156/400 [00:00<00:00, 771.34it/s]
+ 58%|█████▊    | 234/400 [00:00<00:00, 773.54it/s]
+ 78%|███████▊  | 312/400 [00:00<00:00, 773.20it/s]
+ 98%|█████████▊| 390/400 [00:00<00:00, 761.80it/s]
+100%|██████████| 400/400 [00:00<00:00, 766.66it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 20%|█▉        | 79/400 [00:00<00:00, 780.87it/s]
+ 40%|███▉      | 158/400 [00:00<00:00, 782.50it/s]
+ 59%|█████▉    | 237/400 [00:00<00:00, 783.44it/s]
+ 79%|███████▉  | 316/400 [00:00<00:00, 781.19it/s]
+ 99%|█████████▉| 395/400 [00:00<00:00, 781.90it/s]
+100%|██████████| 400/400 [00:00<00:00, 781.77it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 50%|█████     | 202/400 [00:00<00:00, 2019.03it/s]
+100%|██████████| 400/400 [00:00<00:00, 2056.51it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 95%|█████████▌| 95/100 [00:00<00:00, 949.15it/s]
+100%|██████████| 100/100 [00:00<00:00, 948.26it/s]
+
+  0%|          | 0/135 [00:00<?, ?it/s]
+ 71%|███████   | 96/135 [00:00<00:00, 950.14it/s]
+100%|██████████| 135/135 [00:00<00:00, 949.53it/s]
+
+  0%|          | 0/152 [00:00<?, ?it/s]
+ 63%|██████▎   | 96/152 [00:00<00:00, 955.16it/s]
+100%|██████████| 152/152 [00:00<00:00, 955.53it/s]
+
+  0%|          | 0/144 [00:00<?, ?it/s]
+ 47%|████▋     | 68/144 [00:00<00:00, 674.09it/s]
+100%|██████████| 144/144 [00:00<00:00, 796.89it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 958.69it/s]
+100%|██████████| 100/100 [00:00<00:00, 957.28it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 954.18it/s]
+100%|██████████| 100/100 [00:00<00:00, 952.94it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 957.72it/s]
+100%|██████████| 100/100 [00:00<00:00, 955.06it/s]
+
+  0%|          | 0/102 [00:00<?, ?it/s]
+ 94%|█████████▍| 96/102 [00:00<00:00, 956.47it/s]
+100%|██████████| 102/102 [00:00<00:00, 951.93it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 958.46it/s]
+100%|██████████| 100/100 [00:00<00:00, 955.16it/s]
+
+  0%|          | 0/235 [00:00<?, ?it/s]
+ 41%|████      | 96/235 [00:00<00:00, 955.95it/s]
+ 82%|████████▏ | 192/235 [00:00<00:00, 918.85it/s]
+100%|██████████| 235/235 [00:00<00:00, 929.08it/s]
+
+  0%|          | 0/145 [00:00<?, ?it/s]
+ 66%|██████▌   | 96/145 [00:00<00:00, 956.71it/s]
+100%|██████████| 145/145 [00:00<00:00, 956.20it/s]
+
+  0%|          | 0/378 [00:00<?, ?it/s]
+ 25%|██▌       | 96/378 [00:00<00:00, 952.21it/s]
+ 51%|█████     | 193/378 [00:00<00:00, 956.86it/s]
+ 76%|███████▋  | 289/378 [00:00<00:00, 957.38it/s]
+100%|██████████| 378/378 [00:00<00:00, 956.53it/s]
+
+  0%|          | 0/310 [00:00<?, ?it/s]
+ 27%|██▋       | 84/310 [00:00<00:00, 836.46it/s]
+ 58%|█████▊    | 181/310 [00:00<00:00, 909.36it/s]
+ 89%|████████▉ | 277/310 [00:00<00:00, 932.22it/s]
+100%|██████████| 310/310 [00:00<00:00, 922.76it/s]
+
+  0%|          | 0/203 [00:00<?, ?it/s]
+ 47%|████▋     | 96/203 [00:00<00:00, 955.05it/s]
+ 95%|█████████▍| 192/203 [00:00<00:00, 952.78it/s]
+100%|██████████| 203/203 [00:00<00:00, 951.89it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 95%|█████████▌| 95/100 [00:00<00:00, 947.30it/s]
+100%|██████████| 100/100 [00:00<00:00, 942.72it/s]
+
+  0%|          | 0/270 [00:00<?, ?it/s]
+ 36%|███▌      | 96/270 [00:00<00:00, 953.04it/s]
+ 71%|███████   | 192/270 [00:00<00:00, 954.34it/s]
+100%|██████████| 270/270 [00:00<00:00, 945.81it/s]
+
+  0%|          | 0/151 [00:00<?, ?it/s]
+ 64%|██████▎   | 96/151 [00:00<00:00, 955.68it/s]
+100%|██████████| 151/151 [00:00<00:00, 954.38it/s]
+
+  0%|          | 0/216 [00:00<?, ?it/s]
+ 44%|████▍     | 95/216 [00:00<00:00, 943.30it/s]
+ 88%|████████▊ | 191/216 [00:00<00:00, 950.46it/s]
+100%|██████████| 216/216 [00:00<00:00, 950.00it/s]
+
+  0%|          | 0/112 [00:00<?, ?it/s]
+ 85%|████████▍ | 95/112 [00:00<00:00, 947.54it/s]
+100%|██████████| 112/112 [00:00<00:00, 945.39it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 95%|█████████▌| 95/100 [00:00<00:00, 947.12it/s]
+100%|██████████| 100/100 [00:00<00:00, 946.14it/s]
+
+  0%|          | 0/265 [00:00<?, ?it/s]
+ 36%|███▌      | 95/265 [00:00<00:00, 946.95it/s]
+ 72%|███████▏  | 190/265 [00:00<00:00, 946.46it/s]
+100%|██████████| 265/265 [00:00<00:00, 948.38it/s]
+
+  0%|          | 0/173 [00:00<?, ?it/s]
+ 55%|█████▍    | 95/173 [00:00<00:00, 945.54it/s]
+100%|██████████| 173/173 [00:00<00:00, 951.29it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 956.23it/s]
+100%|██████████| 100/100 [00:00<00:00, 955.08it/s]
+
+  0%|          | 0/223 [00:00<?, ?it/s]
+ 43%|████▎     | 95/223 [00:00<00:00, 942.01it/s]
+ 85%|████████▌ | 190/223 [00:00<00:00, 936.16it/s]
+100%|██████████| 223/223 [00:00<00:00, 935.46it/s]
+
+  0%|          | 0/103 [00:00<?, ?it/s]
+ 90%|█████████ | 93/103 [00:00<00:00, 925.51it/s]
+100%|██████████| 103/103 [00:00<00:00, 924.18it/s]
+
+  0%|          | 0/234 [00:00<?, ?it/s]
+ 40%|███▉      | 93/234 [00:00<00:00, 927.69it/s]
+ 80%|███████▉  | 187/234 [00:00<00:00, 932.38it/s]
+100%|██████████| 234/234 [00:00<00:00, 931.87it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 94%|█████████▍| 94/100 [00:00<00:00, 931.20it/s]
+100%|██████████| 100/100 [00:00<00:00, 931.38it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▎       | 94/400 [00:00<00:00, 934.71it/s]
+ 47%|████▋     | 189/400 [00:00<00:00, 937.63it/s]
+ 71%|███████   | 283/400 [00:00<00:00, 937.49it/s]
+ 95%|█████████▍| 379/400 [00:00<00:00, 944.11it/s]
+100%|██████████| 400/400 [00:00<00:00, 941.93it/s]
+
+  0%|          | 0/306 [00:00<?, ?it/s]
+ 31%|███▏      | 96/306 [00:00<00:00, 959.65it/s]
+ 63%|██████▎   | 192/306 [00:00<00:00, 958.89it/s]
+ 94%|█████████▍| 288/306 [00:00<00:00, 958.89it/s]
+100%|██████████| 306/306 [00:00<00:00, 958.23it/s]
+
+  0%|          | 0/282 [00:00<?, ?it/s]
+ 34%|███▍      | 96/282 [00:00<00:00, 958.34it/s]
+ 68%|██████▊   | 192/282 [00:00<00:00, 958.78it/s]
+100%|██████████| 282/282 [00:00<00:00, 958.74it/s]
+
+  0%|          | 0/272 [00:00<?, ?it/s]
+ 34%|███▍      | 93/272 [00:00<00:00, 920.93it/s]
+ 69%|██████▉   | 189/272 [00:00<00:00, 938.13it/s]
+100%|██████████| 272/272 [00:00<00:00, 940.56it/s]
+
+  0%|          | 0/166 [00:00<?, ?it/s]
+ 57%|█████▋    | 95/166 [00:00<00:00, 945.03it/s]
+100%|██████████| 166/166 [00:00<00:00, 946.76it/s]
+
+  0%|          | 0/114 [00:00<?, ?it/s]
+ 83%|████████▎ | 95/114 [00:00<00:00, 945.73it/s]
+100%|██████████| 114/114 [00:00<00:00, 945.92it/s]
+
+  0%|          | 0/198 [00:00<?, ?it/s]
+ 48%|████▊     | 96/198 [00:00<00:00, 957.39it/s]
+ 97%|█████████▋| 192/198 [00:00<00:00, 949.11it/s]
+100%|██████████| 198/198 [00:00<00:00, 949.41it/s]
+
+  0%|          | 0/193 [00:00<?, ?it/s]
+ 50%|████▉     | 96/193 [00:00<00:00, 954.89it/s]
+ 99%|█████████▉| 192/193 [00:00<00:00, 956.91it/s]
+100%|██████████| 193/193 [00:00<00:00, 955.91it/s]
+
+  0%|          | 0/390 [00:00<?, ?it/s]
+ 25%|██▍       | 96/390 [00:00<00:00, 953.19it/s]
+ 49%|████▉     | 192/390 [00:00<00:00, 946.10it/s]
+ 74%|███████▎  | 287/390 [00:00<00:00, 944.10it/s]
+ 98%|█████████▊| 382/390 [00:00<00:00, 942.14it/s]
+100%|██████████| 390/390 [00:00<00:00, 942.35it/s]
+
+  0%|          | 0/238 [00:00<?, ?it/s]
+ 39%|███▊      | 92/238 [00:00<00:00, 912.81it/s]
+ 79%|███████▉  | 188/238 [00:00<00:00, 934.87it/s]
+100%|██████████| 238/238 [00:00<00:00, 935.45it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▍       | 95/400 [00:00<00:00, 943.23it/s]
+ 48%|████▊     | 191/400 [00:00<00:00, 948.91it/s]
+ 72%|███████▏  | 287/400 [00:00<00:00, 950.58it/s]
+ 96%|█████████▌| 383/400 [00:00<00:00, 953.36it/s]
+100%|██████████| 400/400 [00:00<00:00, 951.66it/s]
+
+  0%|          | 0/131 [00:00<?, ?it/s]
+ 73%|███████▎  | 96/131 [00:00<00:00, 952.04it/s]
+100%|██████████| 131/131 [00:00<00:00, 951.63it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▍       | 96/400 [00:00<00:00, 954.14it/s]
+ 48%|████▊     | 192/400 [00:00<00:00, 953.08it/s]
+ 72%|███████▏  | 288/400 [00:00<00:00, 953.27it/s]
+ 96%|█████████▌| 384/400 [00:00<00:00, 954.50it/s]
+100%|██████████| 400/400 [00:00<00:00, 953.91it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+ 87%|████████▋ | 96/110 [00:00<00:00, 950.13it/s]
+100%|██████████| 110/110 [00:00<00:00, 947.75it/s]
+
+  0%|          | 0/245 [00:00<?, ?it/s]
+ 39%|███▉      | 95/245 [00:00<00:00, 944.66it/s]
+ 78%|███████▊  | 192/245 [00:00<00:00, 955.80it/s]
+100%|██████████| 245/245 [00:00<00:00, 953.46it/s]
+
+  0%|          | 0/201 [00:00<?, ?it/s]
+ 48%|████▊     | 96/201 [00:00<00:00, 956.19it/s]
+ 96%|█████████▌| 193/201 [00:00<00:00, 958.78it/s]
+100%|██████████| 201/201 [00:00<00:00, 958.03it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 957.97it/s]
+100%|██████████| 100/100 [00:00<00:00, 955.87it/s]
+
+  0%|          | 0/126 [00:00<?, ?it/s]
+ 76%|███████▌  | 96/126 [00:00<00:00, 957.74it/s]
+100%|██████████| 126/126 [00:00<00:00, 956.79it/s]
+
+  0%|          | 0/165 [00:00<?, ?it/s]
+ 58%|█████▊    | 95/165 [00:00<00:00, 948.74it/s]
+100%|██████████| 165/165 [00:00<00:00, 948.18it/s]
+
+  0%|          | 0/204 [00:00<?, ?it/s]
+ 47%|████▋     | 96/204 [00:00<00:00, 957.07it/s]
+ 94%|█████████▍| 192/204 [00:00<00:00, 955.44it/s]
+100%|██████████| 204/204 [00:00<00:00, 954.50it/s]
+
+  0%|          | 0/237 [00:00<?, ?it/s]
+ 40%|███▉      | 94/237 [00:00<00:00, 939.76it/s]
+ 80%|████████  | 190/237 [00:00<00:00, 946.31it/s]
+100%|██████████| 237/237 [00:00<00:00, 945.44it/s]
+
+  0%|          | 0/121 [00:00<?, ?it/s]
+ 80%|████████  | 97/121 [00:00<00:00, 961.04it/s]
+100%|██████████| 121/121 [00:00<00:00, 961.21it/s]
+
+  0%|          | 0/108 [00:00<?, ?it/s]
+ 89%|████████▉ | 96/108 [00:00<00:00, 955.36it/s]
+100%|██████████| 108/108 [00:00<00:00, 953.29it/s]
+
+  0%|          | 0/163 [00:00<?, ?it/s]
+ 57%|█████▋    | 93/163 [00:00<00:00, 929.43it/s]
+100%|██████████| 163/163 [00:00<00:00, 939.71it/s]
+
+  0%|          | 0/346 [00:00<?, ?it/s]
+ 28%|██▊       | 96/346 [00:00<00:00, 955.00it/s]
+ 55%|█████▌    | 192/346 [00:00<00:00, 956.77it/s]
+ 83%|████████▎ | 288/346 [00:00<00:00, 954.80it/s]
+100%|██████████| 346/346 [00:00<00:00, 955.19it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▍       | 95/400 [00:00<00:00, 948.20it/s]
+ 48%|████▊     | 190/400 [00:00<00:00, 947.32it/s]
+ 72%|███████▏  | 286/400 [00:00<00:00, 950.90it/s]
+ 96%|█████████▌| 382/400 [00:00<00:00, 947.75it/s]
+100%|██████████| 400/400 [00:00<00:00, 948.31it/s]
+
+  0%|          | 0/311 [00:00<?, ?it/s]
+ 31%|███       | 97/311 [00:00<00:00, 961.23it/s]
+ 62%|██████▏   | 194/311 [00:00<00:00, 958.11it/s]
+ 93%|█████████▎| 290/311 [00:00<00:00, 957.26it/s]
+100%|██████████| 311/311 [00:00<00:00, 957.74it/s]
+
+  0%|          | 0/324 [00:00<?, ?it/s]
+ 30%|██▉       | 96/324 [00:00<00:00, 950.18it/s]
+ 59%|█████▉    | 192/324 [00:00<00:00, 954.54it/s]
+ 89%|████████▉ | 288/324 [00:00<00:00, 955.84it/s]
+100%|██████████| 324/324 [00:00<00:00, 954.49it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▍       | 95/400 [00:00<00:00, 943.68it/s]
+ 48%|████▊     | 190/400 [00:00<00:00, 947.32it/s]
+ 72%|███████▏  | 286/400 [00:00<00:00, 950.19it/s]
+ 96%|█████████▌| 382/400 [00:00<00:00, 951.87it/s]
+100%|██████████| 400/400 [00:00<00:00, 950.29it/s]
+
+  0%|          | 0/171 [00:00<?, ?it/s]
+ 57%|█████▋    | 97/171 [00:00<00:00, 962.95it/s]
+100%|██████████| 171/171 [00:00<00:00, 961.89it/s]
+
+Running loglikelihood requests:   0%|          | 0/117208 [00:00<?, ?it/s]
+Running loglikelihood requests:   0%|          | 1/117208 [00:07<238:35:33,  7.33s/it]
+Running loglikelihood requests:   0%|          | 49/117208 [00:07<3:47:59,  8.56it/s] 
+Running loglikelihood requests:   0%|          | 97/117208 [00:08<1:47:31, 18.15it/s]
+Running loglikelihood requests:   0%|          | 145/117208 [00:08<1:07:45, 28.80it/s]
+Running loglikelihood requests:   0%|          | 193/117208 [00:09<48:44, 40.01it/s]  
+Running loglikelihood requests:   0%|          | 241/117208 [00:09<38:07, 51.13it/s]
+Running loglikelihood requests:   0%|          | 289/117208 [00:10<31:28, 61.92it/s]
+Running loglikelihood requests:   0%|          | 337/117208 [00:10<27:00, 72.11it/s]
+Running loglikelihood requests:   0%|          | 385/117208 [00:11<23:59, 81.15it/s]
+Running loglikelihood requests:   0%|          | 433/117208 [00:11<21:50, 89.12it/s]
+Running loglikelihood requests:   0%|          | 481/117208 [00:12<20:16, 95.96it/s]
+Running loglikelihood requests:   0%|          | 529/117208 [00:12<19:01, 102.25it/s]
+Running loglikelihood requests:   0%|          | 577/117208 [00:12<18:04, 107.57it/s]
+Running loglikelihood requests:   1%|          | 625/117208 [00:13<17:23, 111.75it/s]
+Running loglikelihood requests:   1%|          | 673/117208 [00:13<16:47, 115.68it/s]
+Running loglikelihood requests:   1%|          | 721/117208 [00:13<16:18, 119.06it/s]
+Running loglikelihood requests:   1%|          | 769/117208 [00:14<15:55, 121.86it/s]
+Running loglikelihood requests:   1%|          | 817/117208 [00:14<15:34, 124.57it/s]
+Running loglikelihood requests:   1%|          | 865/117208 [00:15<15:14, 127.27it/s]
+Running loglikelihood requests:   1%|          | 913/117208 [00:15<14:58, 129.44it/s]
+Running loglikelihood requests:   1%|          | 961/117208 [00:15<14:43, 131.56it/s]
+Running loglikelihood requests:   1%|          | 1009/117208 [00:16<14:32, 133.20it/s]
+Running loglikelihood requests:   1%|          | 1057/117208 [00:16<14:23, 134.56it/s]
+Running loglikelihood requests:   1%|          | 1105/117208 [00:16<14:12, 136.25it/s]
+Running loglikelihood requests:   1%|          | 1153/117208 [00:17<14:02, 137.71it/s]
+Running loglikelihood requests:   1%|          | 1201/117208 [00:17<13:43, 140.83it/s]
+Running loglikelihood requests:   1%|          | 1249/117208 [00:17<13:29, 143.19it/s]
+Running loglikelihood requests:   1%|          | 1297/117208 [00:18<13:18, 145.25it/s]
+Running loglikelihood requests:   1%|          | 1345/117208 [00:18<13:06, 147.23it/s]
+Running loglikelihood requests:   1%|          | 1393/117208 [00:18<12:58, 148.69it/s]
+Running loglikelihood requests:   1%|          | 1441/117208 [00:19<12:51, 150.06it/s]
+Running loglikelihood requests:   1%|▏         | 1489/117208 [00:19<12:45, 151.20it/s]
+Running loglikelihood requests:   1%|▏         | 1537/117208 [00:19<12:33, 153.41it/s]
+Running loglikelihood requests:   1%|▏         | 1585/117208 [00:19<12:25, 155.12it/s]
+Running loglikelihood requests:   1%|▏         | 1633/117208 [00:20<12:18, 156.55it/s]
+Running loglikelihood requests:   1%|▏         | 1681/117208 [00:20<12:09, 158.29it/s]
+Running loglikelihood requests:   1%|▏         | 1729/117208 [00:20<12:03, 159.70it/s]
+Running loglikelihood requests:   2%|▏         | 1777/117208 [00:21<11:57, 160.92it/s]
+Running loglikelihood requests:   2%|▏         | 1825/117208 [00:21<11:52, 161.91it/s]
+Running loglikelihood requests:   2%|▏         | 1873/117208 [00:21<11:49, 162.66it/s]
+Running loglikelihood requests:   2%|▏         | 1921/117208 [00:22<11:42, 164.06it/s]
+Running loglikelihood requests:   2%|▏         | 1969/117208 [00:22<11:37, 165.14it/s]
+Running loglikelihood requests:   2%|▏         | 2017/117208 [00:22<11:33, 166.03it/s]
+Running loglikelihood requests:   2%|▏         | 2065/117208 [00:22<11:30, 166.82it/s]
+Running loglikelihood requests:   2%|▏         | 2113/117208 [00:23<11:27, 167.42it/s]
+Running loglikelihood requests:   2%|▏         | 2161/117208 [00:23<11:23, 168.40it/s]
+Running loglikelihood requests:   2%|▏         | 2209/117208 [00:23<11:19, 169.22it/s]
+Running loglikelihood requests:   2%|▏         | 2257/117208 [00:24<11:16, 169.98it/s]
+Running loglikelihood requests:   2%|▏         | 2305/117208 [00:24<11:13, 170.54it/s]
+Running loglikelihood requests:   2%|▏         | 2353/117208 [00:24<11:10, 171.21it/s]
+Running loglikelihood requests:   2%|▏         | 2401/117208 [00:24<11:04, 172.87it/s]
+Running loglikelihood requests:   2%|▏         | 2449/117208 [00:25<10:59, 174.11it/s]
+Running loglikelihood requests:   2%|▏         | 2497/117208 [00:25<10:54, 175.18it/s]
+Running loglikelihood requests:   2%|▏         | 2545/117208 [00:25<10:51, 176.01it/s]
+Running loglikelihood requests:   2%|▏         | 2593/117208 [00:25<10:48, 176.79it/s]
+Running loglikelihood requests:   2%|▏         | 2641/117208 [00:26<10:46, 177.33it/s]
+Running loglikelihood requests:   2%|▏         | 2689/117208 [00:26<10:44, 177.77it/s]
+Running loglikelihood requests:   2%|▏         | 2737/117208 [00:26<10:38, 179.31it/s]
+Running loglikelihood requests:   2%|▏         | 2785/117208 [00:26<10:33, 180.53it/s]
+Running loglikelihood requests:   2%|▏         | 2833/117208 [00:27<10:29, 181.68it/s]
+Running loglikelihood requests:   2%|▏         | 2881/117208 [00:27<10:26, 182.60it/s]
+Running loglikelihood requests:   2%|▏         | 2929/117208 [00:27<10:23, 183.24it/s]
+Running loglikelihood requests:   3%|▎         | 2971/117208 [00:28<10:47, 176.42it/s]
+Running loglikelihood requests:   3%|▎         | 3019/117208 [00:28<10:33, 180.27it/s]
+Running loglikelihood requests:   3%|▎         | 3067/117208 [00:28<10:24, 182.85it/s]
+Running loglikelihood requests:   3%|▎         | 3115/117208 [00:28<10:16, 184.95it/s]
+Running loglikelihood requests:   3%|▎         | 3163/117208 [00:29<10:11, 186.58it/s]
+Running loglikelihood requests:   3%|▎         | 3211/117208 [00:29<10:07, 187.54it/s]
+Running loglikelihood requests:   3%|▎         | 3259/117208 [00:29<10:04, 188.58it/s]
+Running loglikelihood requests:   3%|▎         | 3307/117208 [00:29<10:01, 189.35it/s]
+Running loglikelihood requests:   3%|▎         | 3355/117208 [00:30<09:57, 190.69it/s]
+Running loglikelihood requests:   3%|▎         | 3403/117208 [00:30<09:53, 191.63it/s]
+Running loglikelihood requests:   3%|▎         | 3451/117208 [00:30<09:51, 192.44it/s]
+Running loglikelihood requests:   3%|▎         | 3499/117208 [00:30<09:49, 192.97it/s]
+Running loglikelihood requests:   3%|▎         | 3547/117208 [00:31<09:47, 193.48it/s]
+Running loglikelihood requests:   3%|▎         | 3595/117208 [00:31<09:45, 193.98it/s]
+Running loglikelihood requests:   3%|▎         | 3643/117208 [00:31<09:44, 194.23it/s]
+Running loglikelihood requests:   3%|▎         | 3691/117208 [00:31<09:43, 194.57it/s]
+Running loglikelihood requests:   3%|▎         | 3739/117208 [00:32<09:39, 195.88it/s]
+Running loglikelihood requests:   3%|▎         | 3787/117208 [00:32<09:36, 196.89it/s]
+Running loglikelihood requests:   3%|▎         | 3835/117208 [00:32<09:33, 197.68it/s]
+Running loglikelihood requests:   3%|▎         | 3883/117208 [00:32<09:31, 198.19it/s]
+Running loglikelihood requests:   3%|▎         | 3931/117208 [00:32<09:30, 198.65it/s]
+Running loglikelihood requests:   3%|▎         | 3979/117208 [00:33<09:28, 199.21it/s]
+Running loglikelihood requests:   3%|▎         | 4027/117208 [00:33<09:27, 199.57it/s]
+Running loglikelihood requests:   3%|▎         | 4075/117208 [00:33<09:26, 199.84it/s]
+Running loglikelihood requests:   4%|▎         | 4123/117208 [00:33<09:25, 200.10it/s]
+Running loglikelihood requests:   4%|▎         | 4171/117208 [00:34<09:24, 200.31it/s]
+Running loglikelihood requests:   4%|▎         | 4219/117208 [00:34<09:23, 200.67it/s]
+Running loglikelihood requests:   4%|▎         | 4267/117208 [00:34<09:20, 201.54it/s]
+Running loglikelihood requests:   4%|▎         | 4315/117208 [00:34<09:18, 202.13it/s]
+Running loglikelihood requests:   4%|▎         | 4360/117208 [00:35<09:27, 198.94it/s]
+Running loglikelihood requests:   4%|▍         | 4405/117208 [00:35<09:33, 196.75it/s]
+Running loglikelihood requests:   4%|▍         | 4453/117208 [00:35<09:26, 199.07it/s]
+Running loglikelihood requests:   4%|▍         | 4502/117208 [00:35<09:17, 202.05it/s]
+Running loglikelihood requests:   4%|▍         | 4547/117208 [00:36<09:25, 199.20it/s]
+Running loglikelihood requests:   4%|▍         | 4589/117208 [00:36<09:42, 193.37it/s]
+Running loglikelihood requests:   4%|▍         | 4637/117208 [00:36<09:31, 196.99it/s]
+Running loglikelihood requests:   4%|▍         | 4679/117208 [00:36<09:46, 192.01it/s]
+Running loglikelihood requests:   4%|▍         | 4724/117208 [00:36<09:44, 192.30it/s]
+Running loglikelihood requests:   4%|▍         | 4772/117208 [00:37<09:32, 196.34it/s]
+Running loglikelihood requests:   4%|▍         | 4817/117208 [00:37<09:26, 198.47it/s]
+Running loglikelihood requests:   4%|▍         | 4865/117208 [00:37<09:10, 203.91it/s]
+Running loglikelihood requests:   4%|▍         | 4910/117208 [00:37<09:10, 203.85it/s]
+Running loglikelihood requests:   4%|▍         | 4958/117208 [00:38<08:59, 207.89it/s]
+Running loglikelihood requests:   4%|▍         | 5006/117208 [00:38<08:51, 211.14it/s]
+Running loglikelihood requests:   4%|▍         | 5051/117208 [00:38<08:55, 209.32it/s]
+Running loglikelihood requests:   4%|▍         | 5099/117208 [00:38<08:48, 212.14it/s]
+Running loglikelihood requests:   4%|▍         | 5147/117208 [00:38<08:43, 214.26it/s]
+Running loglikelihood requests:   4%|▍         | 5195/117208 [00:39<08:38, 215.91it/s]
+Running loglikelihood requests:   4%|▍         | 5243/117208 [00:39<08:35, 217.19it/s]
+Running loglikelihood requests:   5%|▍         | 5288/117208 [00:39<08:42, 214.05it/s]
+Running loglikelihood requests:   5%|▍         | 5333/117208 [00:39<08:48, 211.88it/s]
+Running loglikelihood requests:   5%|▍         | 5381/117208 [00:40<08:38, 215.61it/s]
+Running loglikelihood requests:   5%|▍         | 5429/117208 [00:40<08:31, 218.59it/s]
+Running loglikelihood requests:   5%|▍         | 5474/117208 [00:40<08:36, 216.50it/s]
+Running loglikelihood requests:   5%|▍         | 5522/117208 [00:40<08:29, 219.26it/s]
+Running loglikelihood requests:   5%|▍         | 5570/117208 [00:40<08:24, 221.25it/s]
+Running loglikelihood requests:   5%|▍         | 5618/117208 [00:41<08:21, 222.57it/s]
+Running loglikelihood requests:   5%|▍         | 5663/117208 [00:41<08:27, 219.59it/s]
+Running loglikelihood requests:   5%|▍         | 5708/117208 [00:41<08:32, 217.40it/s]
+Running loglikelihood requests:   5%|▍         | 5760/117208 [00:41<08:13, 225.82it/s]
+Running loglikelihood requests:   5%|▍         | 5805/117208 [00:41<08:21, 222.05it/s]
+Running loglikelihood requests:   5%|▍         | 5853/117208 [00:42<08:18, 223.57it/s]
+Running loglikelihood requests:   5%|▌         | 5901/117208 [00:42<08:15, 224.85it/s]
+Running loglikelihood requests:   5%|▌         | 5946/117208 [00:42<08:22, 221.44it/s]
+Running loglikelihood requests:   5%|▌         | 5991/117208 [00:42<08:27, 219.14it/s]
+Running loglikelihood requests:   5%|▌         | 6033/117208 [00:43<08:41, 213.26it/s]
+Running loglikelihood requests:   5%|▌         | 6081/117208 [00:43<08:30, 217.75it/s]
+Running loglikelihood requests:   5%|▌         | 6123/117208 [00:43<08:35, 215.38it/s]
+Running loglikelihood requests:   5%|▌         | 6171/117208 [00:43<08:19, 222.22it/s]
+Running loglikelihood requests:   5%|▌         | 6216/117208 [00:43<08:18, 222.87it/s]
+Running loglikelihood requests:   5%|▌         | 6261/117208 [00:44<08:16, 223.24it/s]
+Running loglikelihood requests:   5%|▌         | 6306/117208 [00:44<08:15, 223.83it/s]
+Running loglikelihood requests:   5%|▌         | 6348/117208 [00:44<08:24, 219.69it/s]
+Running loglikelihood requests:   5%|▌         | 6396/117208 [00:44<08:10, 225.70it/s]
+Running loglikelihood requests:   5%|▌         | 6441/117208 [00:44<08:10, 225.61it/s]
+Running loglikelihood requests:   6%|▌         | 6489/117208 [00:45<08:01, 230.01it/s]
+Running loglikelihood requests:   6%|▌         | 6534/117208 [00:45<08:03, 228.78it/s]
+Running loglikelihood requests:   6%|▌         | 6573/117208 [00:45<08:24, 219.23it/s]
+Running loglikelihood requests:   6%|▌         | 6618/117208 [00:45<08:19, 221.19it/s]
+Running loglikelihood requests:   6%|▌         | 6666/117208 [00:45<08:06, 227.41it/s]
+Running loglikelihood requests:   6%|▌         | 6714/117208 [00:46<07:56, 231.88it/s]
+Running loglikelihood requests:   6%|▌         | 6756/117208 [00:46<08:08, 226.06it/s]
+Running loglikelihood requests:   6%|▌         | 6798/117208 [00:46<08:17, 222.12it/s]
+Running loglikelihood requests:   6%|▌         | 6837/117208 [00:46<08:31, 215.96it/s]
+Running loglikelihood requests:   6%|▌         | 6882/117208 [00:46<08:20, 220.45it/s]
+Running loglikelihood requests:   6%|▌         | 6927/117208 [00:47<08:13, 223.57it/s]
+Running loglikelihood requests:   6%|▌         | 6972/117208 [00:47<08:08, 225.84it/s]
+Running loglikelihood requests:   6%|▌         | 7017/117208 [00:47<08:04, 227.65it/s]
+Running loglikelihood requests:   6%|▌         | 7062/117208 [00:47<08:01, 228.84it/s]
+Running loglikelihood requests:   6%|▌         | 7107/117208 [00:47<07:58, 229.93it/s]
+Running loglikelihood requests:   6%|▌         | 7155/117208 [00:47<07:47, 235.41it/s]
+Running loglikelihood requests:   6%|▌         | 7197/117208 [00:48<07:58, 229.92it/s]
+Running loglikelihood requests:   6%|▌         | 7239/117208 [00:48<08:06, 226.07it/s]
+Running loglikelihood requests:   6%|▌         | 7284/117208 [00:48<08:01, 228.14it/s]
+Running loglikelihood requests:   6%|▋         | 7329/117208 [00:48<07:58, 229.69it/s]
+Running loglikelihood requests:   6%|▋         | 7368/117208 [00:48<08:15, 221.50it/s]
+Running loglikelihood requests:   6%|▋         | 7413/117208 [00:49<08:07, 225.16it/s]
+Running loglikelihood requests:   6%|▋         | 7461/117208 [00:49<07:53, 231.65it/s]
+Running loglikelihood requests:   6%|▋         | 7506/117208 [00:49<07:53, 231.58it/s]
+Running loglikelihood requests:   6%|▋         | 7551/117208 [00:49<07:53, 231.65it/s]
+Running loglikelihood requests:   6%|▋         | 7596/117208 [00:49<07:52, 231.77it/s]
+Running loglikelihood requests:   7%|▋         | 7641/117208 [00:50<07:52, 231.98it/s]
+Running loglikelihood requests:   7%|▋         | 7689/117208 [00:50<07:42, 236.72it/s]
+Running loglikelihood requests:   7%|▋         | 7734/117208 [00:50<07:42, 236.95it/s]
+Running loglikelihood requests:   7%|▋         | 7776/117208 [00:50<07:51, 232.25it/s]
+Running loglikelihood requests:   7%|▋         | 7815/117208 [00:50<08:07, 224.62it/s]
+Running loglikelihood requests:   7%|▋         | 7860/117208 [00:51<07:59, 228.28it/s]
+Running loglikelihood requests:   7%|▋         | 7902/117208 [00:51<08:02, 226.37it/s]
+Running loglikelihood requests:   7%|▋         | 7941/117208 [00:51<08:15, 220.42it/s]
+Running loglikelihood requests:   7%|▋         | 7977/117208 [00:51<08:36, 211.54it/s]
+Running loglikelihood requests:   7%|▋         | 8020/117208 [00:51<08:24, 216.27it/s]
+Running loglikelihood requests:   7%|▋         | 8053/117208 [00:52<08:55, 203.85it/s]
+Running loglikelihood requests:   7%|▋         | 8095/117208 [00:52<08:40, 209.61it/s]
+Running loglikelihood requests:   7%|▋         | 8125/117208 [00:52<09:20, 194.69it/s]
+Running loglikelihood requests:   7%|▋         | 8170/117208 [00:52<08:44, 208.03it/s]
+Running loglikelihood requests:   7%|▋         | 8209/117208 [00:52<08:44, 207.93it/s]
+Running loglikelihood requests:   7%|▋         | 8248/117208 [00:52<08:44, 207.73it/s]
+Running loglikelihood requests:   7%|▋         | 8293/117208 [00:53<08:21, 217.31it/s]
+Running loglikelihood requests:   7%|▋         | 8323/117208 [00:53<09:03, 200.25it/s]
+Running loglikelihood requests:   7%|▋         | 8371/117208 [00:53<08:22, 216.79it/s]
+Running loglikelihood requests:   7%|▋         | 8416/117208 [00:53<08:06, 223.59it/s]
+Running loglikelihood requests:   7%|▋         | 8464/117208 [00:53<07:46, 233.18it/s]
+Running loglikelihood requests:   7%|▋         | 8509/117208 [00:54<07:42, 235.18it/s]
+Running loglikelihood requests:   7%|▋         | 8554/117208 [00:54<07:38, 236.83it/s]
+Running loglikelihood requests:   7%|▋         | 8602/117208 [00:54<07:27, 242.51it/s]
+Running loglikelihood requests:   7%|▋         | 8647/117208 [00:54<07:28, 241.96it/s]
+Running loglikelihood requests:   7%|▋         | 8689/117208 [00:54<07:38, 236.76it/s]
+Running loglikelihood requests:   7%|▋         | 8734/117208 [00:55<07:29, 241.09it/s]
+Running loglikelihood requests:   7%|▋         | 8776/117208 [00:55<07:33, 239.21it/s]
+Running loglikelihood requests:   8%|▊         | 8821/117208 [00:55<07:26, 242.81it/s]
+Running loglikelihood requests:   8%|▊         | 8860/117208 [00:55<07:40, 235.49it/s]
+Running loglikelihood requests:   8%|▊         | 8908/117208 [00:55<07:21, 245.12it/s]
+Running loglikelihood requests:   8%|▊         | 8953/117208 [00:55<07:17, 247.22it/s]
+Running loglikelihood requests:   8%|▊         | 8992/117208 [00:56<07:32, 238.92it/s]
+Running loglikelihood requests:   8%|▊         | 9037/117208 [00:56<07:24, 243.13it/s]
+Running loglikelihood requests:   8%|▊         | 9082/117208 [00:56<07:19, 246.04it/s]
+Running loglikelihood requests:   8%|▊         | 9127/117208 [00:56<07:15, 248.45it/s]
+Running loglikelihood requests:   8%|▊         | 9179/117208 [00:56<06:53, 261.48it/s]
+Running loglikelihood requests:   8%|▊         | 9221/117208 [00:56<07:05, 253.92it/s]
+Running loglikelihood requests:   8%|▊         | 9269/117208 [00:57<06:56, 258.94it/s]
+Running loglikelihood requests:   8%|▊         | 9317/117208 [00:57<06:51, 262.31it/s]
+Running loglikelihood requests:   8%|▊         | 9356/117208 [00:57<07:12, 249.64it/s]
+Running loglikelihood requests:   8%|▊         | 9404/117208 [00:57<07:01, 256.01it/s]
+Running loglikelihood requests:   8%|▊         | 9452/117208 [00:57<06:53, 260.40it/s]
+Running loglikelihood requests:   8%|▊         | 9499/117208 [00:58<06:51, 261.97it/s]
+Running loglikelihood requests:   8%|▊         | 9547/117208 [00:58<06:46, 264.77it/s]
+Running loglikelihood requests:   8%|▊         | 9589/117208 [00:58<06:59, 256.76it/s]
+Running loglikelihood requests:   8%|▊         | 9631/117208 [00:58<07:09, 250.46it/s]
+Running loglikelihood requests:   8%|▊         | 9676/117208 [00:58<07:07, 251.63it/s]
+Running loglikelihood requests:   8%|▊         | 9712/117208 [00:58<07:33, 237.07it/s]
+Running loglikelihood requests:   8%|▊         | 9748/117208 [00:59<07:52, 227.55it/s]
+Running loglikelihood requests:   8%|▊         | 9796/117208 [00:59<07:26, 240.78it/s]
+Running loglikelihood requests:   8%|▊         | 9826/117208 [00:59<08:07, 220.05it/s]
+Running loglikelihood requests:   8%|▊         | 9868/117208 [00:59<07:46, 230.03it/s]
+Running loglikelihood requests:   8%|▊         | 9910/117208 [00:59<07:31, 237.45it/s]
+Running loglikelihood requests:   8%|▊         | 9946/117208 [00:59<07:42, 232.16it/s]
+Running loglikelihood requests:   9%|▊         | 9991/117208 [01:00<07:18, 244.25it/s]
+Running loglikelihood requests:   9%|▊         | 10024/117208 [01:00<07:43, 231.24it/s]
+Running loglikelihood requests:   9%|▊         | 10066/117208 [01:00<07:29, 238.23it/s]
+Running loglikelihood requests:   9%|▊         | 10108/117208 [01:00<07:19, 243.46it/s]
+Running loglikelihood requests:   9%|▊         | 10150/117208 [01:00<07:13, 246.77it/s]
+Running loglikelihood requests:   9%|▊         | 10192/117208 [01:00<07:09, 249.35it/s]
+Running loglikelihood requests:   9%|▊         | 10237/117208 [01:01<06:57, 256.49it/s]
+Running loglikelihood requests:   9%|▉         | 10285/117208 [01:01<06:40, 267.24it/s]
+Running loglikelihood requests:   9%|▉         | 10327/117208 [01:01<06:45, 263.79it/s]
+Running loglikelihood requests:   9%|▉         | 10372/117208 [01:01<06:40, 266.74it/s]
+Running loglikelihood requests:   9%|▉         | 10417/117208 [01:01<06:37, 268.98it/s]
+Running loglikelihood requests:   9%|▉         | 10462/117208 [01:01<06:34, 270.60it/s]
+Running loglikelihood requests:   9%|▉         | 10507/117208 [01:02<06:32, 272.07it/s]
+Running loglikelihood requests:   9%|▉         | 10543/117208 [01:02<06:55, 256.77it/s]
+Running loglikelihood requests:   9%|▉         | 10585/117208 [01:02<06:55, 256.91it/s]
+Running loglikelihood requests:   9%|▉         | 10627/117208 [01:02<06:54, 256.95it/s]
+Running loglikelihood requests:   9%|▉         | 10654/117208 [01:02<07:43, 229.85it/s]
+Running loglikelihood requests:   9%|▉         | 10699/117208 [01:02<07:17, 243.58it/s]
+Running loglikelihood requests:   9%|▉         | 10738/117208 [01:03<07:19, 242.46it/s]
+Running loglikelihood requests:   9%|▉         | 10784/117208 [01:03<06:58, 254.49it/s]
+Running loglikelihood requests:   9%|▉         | 10823/117208 [01:03<07:05, 250.10it/s]
+Running loglikelihood requests:   9%|▉         | 10859/117208 [01:03<07:20, 241.67it/s]
+Running loglikelihood requests:   9%|▉         | 10886/117208 [01:03<08:05, 219.22it/s]
+Running loglikelihood requests:   9%|▉         | 10934/117208 [01:03<07:19, 241.97it/s]
+Running loglikelihood requests:   9%|▉         | 10976/117208 [01:04<07:10, 246.85it/s]
+Running loglikelihood requests:   9%|▉         | 11024/117208 [01:04<06:46, 261.00it/s]
+Running loglikelihood requests:   9%|▉         | 11060/117208 [01:04<07:05, 249.44it/s]
+Running loglikelihood requests:   9%|▉         | 11105/117208 [01:04<06:51, 257.71it/s]
+Running loglikelihood requests:  10%|▉         | 11147/117208 [01:04<06:51, 257.97it/s]
+Running loglikelihood requests:  10%|▉         | 11195/117208 [01:04<06:33, 269.15it/s]
+Running loglikelihood requests:  10%|▉         | 11237/117208 [01:05<06:38, 265.99it/s]
+Running loglikelihood requests:  10%|▉         | 11285/117208 [01:05<06:25, 274.72it/s]
+Running loglikelihood requests:  10%|▉         | 11324/117208 [01:05<06:40, 264.13it/s]
+Running loglikelihood requests:  10%|▉         | 11372/117208 [01:05<06:27, 273.36it/s]
+Running loglikelihood requests:  10%|▉         | 11411/117208 [01:05<06:39, 264.93it/s]
+Running loglikelihood requests:  10%|▉         | 11459/117208 [01:05<06:23, 275.48it/s]
+Running loglikelihood requests:  10%|▉         | 11495/117208 [01:05<06:45, 260.85it/s]
+Running loglikelihood requests:  10%|▉         | 11531/117208 [01:06<07:01, 250.78it/s]
+Running loglikelihood requests:  10%|▉         | 11583/117208 [01:06<06:26, 273.18it/s]
+Running loglikelihood requests:  10%|▉         | 11622/117208 [01:06<06:38, 264.75it/s]
+Running loglikelihood requests:  10%|▉         | 11661/117208 [01:06<06:48, 258.65it/s]
+Running loglikelihood requests:  10%|▉         | 11710/117208 [01:06<06:26, 273.05it/s]
+Running loglikelihood requests:  10%|█         | 11758/117208 [01:06<06:15, 280.90it/s]
+Running loglikelihood requests:  10%|█         | 11794/117208 [01:07<06:38, 264.76it/s]
+Running loglikelihood requests:  10%|█         | 11830/117208 [01:07<06:55, 253.49it/s]
+Running loglikelihood requests:  10%|█         | 11875/117208 [01:07<06:42, 261.80it/s]
+Running loglikelihood requests:  10%|█         | 11911/117208 [01:07<06:58, 251.36it/s]
+Running loglikelihood requests:  10%|█         | 11944/117208 [01:07<07:21, 238.61it/s]
+Running loglikelihood requests:  10%|█         | 11992/117208 [01:07<06:48, 257.60it/s]
+Running loglikelihood requests:  10%|█         | 12025/117208 [01:08<07:12, 242.99it/s]
+Running loglikelihood requests:  10%|█         | 12058/117208 [01:08<07:31, 232.88it/s]
+Running loglikelihood requests:  10%|█         | 12097/117208 [01:08<07:23, 236.88it/s]
+Running loglikelihood requests:  10%|█         | 12139/117208 [01:08<07:07, 245.55it/s]
+Running loglikelihood requests:  10%|█         | 12187/117208 [01:08<06:39, 262.94it/s]
+Running loglikelihood requests:  10%|█         | 12220/117208 [01:08<07:04, 247.09it/s]
+Running loglikelihood requests:  10%|█         | 12259/117208 [01:08<07:04, 246.97it/s]
+Running loglikelihood requests:  10%|█         | 12295/117208 [01:09<07:14, 241.43it/s]
+Running loglikelihood requests:  11%|█         | 12343/117208 [01:09<06:43, 259.93it/s]
+Running loglikelihood requests:  11%|█         | 12382/117208 [01:09<06:49, 256.05it/s]
+Running loglikelihood requests:  11%|█         | 12424/117208 [01:09<06:44, 258.89it/s]
+Running loglikelihood requests:  11%|█         | 12466/117208 [01:09<06:40, 261.24it/s]
+Running loglikelihood requests:  11%|█         | 12511/117208 [01:09<06:30, 268.24it/s]
+Running loglikelihood requests:  11%|█         | 12559/117208 [01:10<06:15, 279.04it/s]
+Running loglikelihood requests:  11%|█         | 12592/117208 [01:10<06:44, 258.62it/s]
+Running loglikelihood requests:  11%|█         | 12637/117208 [01:10<06:32, 266.70it/s]
+Running loglikelihood requests:  11%|█         | 12682/117208 [01:10<06:23, 272.41it/s]
+Running loglikelihood requests:  11%|█         | 12724/117208 [01:10<06:25, 270.90it/s]
+Running loglikelihood requests:  11%|█         | 12763/117208 [01:10<06:35, 264.06it/s]
+Running loglikelihood requests:  11%|█         | 12802/117208 [01:11<06:42, 259.24it/s]
+Running loglikelihood requests:  11%|█         | 12850/117208 [01:11<06:21, 273.23it/s]
+Running loglikelihood requests:  11%|█         | 12898/117208 [01:11<06:08, 283.09it/s]
+Running loglikelihood requests:  11%|█         | 12927/117208 [01:11<06:50, 253.98it/s]
+Running loglikelihood requests:  11%|█         | 12961/117208 [01:11<07:09, 242.98it/s]
+Running loglikelihood requests:  11%|█         | 13003/117208 [01:11<06:55, 250.52it/s]
+Running loglikelihood requests:  11%|█         | 13045/117208 [01:11<06:47, 255.92it/s]
+Running loglikelihood requests:  11%|█         | 13093/117208 [01:12<06:24, 271.01it/s]
+Running loglikelihood requests:  11%|█         | 13129/117208 [01:12<06:41, 258.93it/s]
+Running loglikelihood requests:  11%|█         | 13168/117208 [01:12<06:46, 255.96it/s]
+Running loglikelihood requests:  11%|█▏        | 13210/117208 [01:12<06:37, 261.64it/s]
+Running loglikelihood requests:  11%|█▏        | 13252/117208 [01:12<06:31, 265.64it/s]
+Running loglikelihood requests:  11%|█▏        | 13288/117208 [01:12<06:44, 256.68it/s]
+Running loglikelihood requests:  11%|█▏        | 13333/117208 [01:13<06:28, 267.67it/s]
+Running loglikelihood requests:  11%|█▏        | 13372/117208 [01:13<06:33, 264.01it/s]
+Running loglikelihood requests:  11%|█▏        | 13420/117208 [01:13<06:12, 278.91it/s]
+Running loglikelihood requests:  11%|█▏        | 13465/117208 [01:13<06:06, 283.32it/s]
+Running loglikelihood requests:  12%|█▏        | 13498/117208 [01:13<06:33, 263.56it/s]
+Running loglikelihood requests:  12%|█▏        | 13537/117208 [01:13<06:36, 261.14it/s]
+Running loglikelihood requests:  12%|█▏        | 13582/117208 [01:13<06:22, 271.10it/s]
+Running loglikelihood requests:  12%|█▏        | 13630/117208 [01:14<06:04, 284.29it/s]
+Running loglikelihood requests:  12%|█▏        | 13666/117208 [01:14<06:23, 270.18it/s]
+Running loglikelihood requests:  12%|█▏        | 13699/117208 [01:14<06:46, 254.48it/s]
+Running loglikelihood requests:  12%|█▏        | 13735/117208 [01:14<06:55, 249.30it/s]
+Running loglikelihood requests:  12%|█▏        | 13783/117208 [01:14<06:25, 268.59it/s]
+Running loglikelihood requests:  12%|█▏        | 13828/117208 [01:14<06:13, 276.78it/s]
+Running loglikelihood requests:  12%|█▏        | 13856/117208 [01:15<06:55, 248.99it/s]
+Running loglikelihood requests:  12%|█▏        | 13894/117208 [01:15<06:54, 249.44it/s]
+Running loglikelihood requests:  12%|█▏        | 13942/117208 [01:15<06:23, 269.01it/s]
+Running loglikelihood requests:  12%|█▏        | 13984/117208 [01:15<06:21, 270.89it/s]
+Running loglikelihood requests:  12%|█▏        | 14023/117208 [01:15<06:26, 266.74it/s]
+Running loglikelihood requests:  12%|█▏        | 14065/117208 [01:15<06:22, 269.49it/s]
+Running loglikelihood requests:  12%|█▏        | 14098/117208 [01:15<06:45, 253.99it/s]
+Running loglikelihood requests:  12%|█▏        | 14137/117208 [01:16<06:44, 254.86it/s]
+Running loglikelihood requests:  12%|█▏        | 14182/117208 [01:16<06:25, 267.17it/s]
+Running loglikelihood requests:  12%|█▏        | 14221/117208 [01:16<06:30, 263.96it/s]
+Running loglikelihood requests:  12%|█▏        | 14248/117208 [01:16<07:11, 238.59it/s]
+Running loglikelihood requests:  12%|█▏        | 14293/117208 [01:16<06:42, 256.00it/s]
+Running loglikelihood requests:  12%|█▏        | 14335/117208 [01:16<06:31, 262.60it/s]
+Running loglikelihood requests:  12%|█▏        | 14383/117208 [01:17<06:08, 278.72it/s]
+Running loglikelihood requests:  12%|█▏        | 14425/117208 [01:17<06:08, 278.61it/s]
+Running loglikelihood requests:  12%|█▏        | 14467/117208 [01:17<06:09, 278.17it/s]
+Running loglikelihood requests:  12%|█▏        | 14509/117208 [01:17<06:09, 278.15it/s]
+Running loglikelihood requests:  12%|█▏        | 14554/117208 [01:17<06:01, 283.77it/s]
+Running loglikelihood requests:  12%|█▏        | 14602/117208 [01:17<05:49, 293.72it/s]
+Running loglikelihood requests:  12%|█▏        | 14632/117208 [01:17<06:26, 265.58it/s]
+Running loglikelihood requests:  13%|█▎        | 14674/117208 [01:18<06:20, 269.19it/s]
+Running loglikelihood requests:  13%|█▎        | 14719/117208 [01:18<06:08, 277.94it/s]
+Running loglikelihood requests:  13%|█▎        | 14767/117208 [01:18<05:53, 289.61it/s]
+Running loglikelihood requests:  13%|█▎        | 14803/117208 [01:18<06:12, 274.68it/s]
+Running loglikelihood requests:  13%|█▎        | 14833/117208 [01:18<06:46, 252.09it/s]
+Running loglikelihood requests:  13%|█▎        | 14881/117208 [01:18<06:16, 271.77it/s]
+Running loglikelihood requests:  13%|█▎        | 14929/117208 [01:18<05:58, 285.01it/s]
+Running loglikelihood requests:  13%|█▎        | 14977/117208 [01:19<05:46, 294.83it/s]
+Running loglikelihood requests:  13%|█▎        | 15016/117208 [01:19<05:59, 284.12it/s]
+Running loglikelihood requests:  13%|█▎        | 15058/117208 [01:19<06:01, 282.45it/s]
+Running loglikelihood requests:  13%|█▎        | 15103/117208 [01:19<05:55, 287.06it/s]
+Running loglikelihood requests:  13%|█▎        | 15151/117208 [01:19<05:43, 296.86it/s]
+Running loglikelihood requests:  13%|█▎        | 15199/117208 [01:19<05:35, 304.24it/s]
+Running loglikelihood requests:  13%|█▎        | 15247/117208 [01:20<05:30, 308.91it/s]
+Running loglikelihood requests:  13%|█▎        | 15278/117208 [01:20<06:06, 278.46it/s]
+Running loglikelihood requests:  13%|█▎        | 15307/117208 [01:20<06:41, 254.10it/s]
+Running loglikelihood requests:  13%|█▎        | 15349/117208 [01:20<06:28, 262.18it/s]
+Running loglikelihood requests:  13%|█▎        | 15397/117208 [01:20<06:04, 279.38it/s]
+Running loglikelihood requests:  13%|█▎        | 15445/117208 [01:20<05:48, 291.79it/s]
+Running loglikelihood requests:  13%|█▎        | 15481/117208 [01:20<06:07, 276.83it/s]
+Running loglikelihood requests:  13%|█▎        | 15520/117208 [01:21<06:13, 272.08it/s]
+Running loglikelihood requests:  13%|█▎        | 15565/117208 [01:21<06:01, 280.84it/s]
+Running loglikelihood requests:  13%|█▎        | 15610/117208 [01:21<05:53, 287.26it/s]
+Running loglikelihood requests:  13%|█▎        | 15658/117208 [01:21<05:40, 297.93it/s]
+Running loglikelihood requests:  13%|█▎        | 15706/117208 [01:21<05:32, 304.99it/s]
+Running loglikelihood requests:  13%|█▎        | 15737/117208 [01:21<06:06, 276.60it/s]
+Running loglikelihood requests:  13%|█▎        | 15781/117208 [01:21<05:59, 282.14it/s]
+Running loglikelihood requests:  14%|█▎        | 15826/117208 [01:22<05:51, 288.18it/s]
+Running loglikelihood requests:  14%|█▎        | 15874/117208 [01:22<05:39, 298.28it/s]
+Running loglikelihood requests:  14%|█▎        | 15919/117208 [01:22<05:38, 299.64it/s]
+Running loglikelihood requests:  14%|█▎        | 15967/117208 [01:22<05:30, 306.59it/s]
+Running loglikelihood requests:  14%|█▎        | 16009/117208 [01:22<05:38, 299.30it/s]
+Running loglikelihood requests:  14%|█▎        | 16057/117208 [01:22<05:30, 306.26it/s]
+Running loglikelihood requests:  14%|█▎        | 16102/117208 [01:23<05:31, 305.06it/s]
+Running loglikelihood requests:  14%|█▍        | 16147/117208 [01:23<05:32, 304.35it/s]
+Running loglikelihood requests:  14%|█▍        | 16192/117208 [01:23<05:32, 303.66it/s]
+Running loglikelihood requests:  14%|█▍        | 16240/117208 [01:23<05:26, 309.59it/s]
+Running loglikelihood requests:  14%|█▍        | 16288/117208 [01:23<05:21, 313.84it/s]
+Running loglikelihood requests:  14%|█▍        | 16330/117208 [01:23<05:31, 304.64it/s]
+Running loglikelihood requests:  14%|█▍        | 16366/117208 [01:23<05:51, 286.49it/s]
+Running loglikelihood requests:  14%|█▍        | 16414/117208 [01:24<05:38, 297.44it/s]
+Running loglikelihood requests:  14%|█▍        | 16459/117208 [01:24<05:36, 299.10it/s]
+Running loglikelihood requests:  14%|█▍        | 16498/117208 [01:24<05:49, 288.52it/s]
+Running loglikelihood requests:  14%|█▍        | 16546/117208 [01:24<05:36, 299.09it/s]
+Running loglikelihood requests:  14%|█▍        | 16591/117208 [01:24<05:35, 300.26it/s]
+Running loglikelihood requests:  14%|█▍        | 16624/117208 [01:24<06:02, 277.43it/s]
+Running loglikelihood requests:  14%|█▍        | 16669/117208 [01:24<05:52, 285.37it/s]
+Running loglikelihood requests:  14%|█▍        | 16711/117208 [01:25<05:52, 284.83it/s]
+Running loglikelihood requests:  14%|█▍        | 16753/117208 [01:25<05:52, 284.80it/s]
+Running loglikelihood requests:  14%|█▍        | 16801/117208 [01:25<05:38, 296.84it/s]
+Running loglikelihood requests:  14%|█▍        | 16849/117208 [01:25<05:29, 305.01it/s]
+Running loglikelihood requests:  14%|█▍        | 16882/117208 [01:25<05:57, 280.99it/s]
+Running loglikelihood requests:  14%|█▍        | 16915/117208 [01:25<06:20, 263.78it/s]
+Running loglikelihood requests:  14%|█▍        | 16963/117208 [01:25<05:55, 281.88it/s]
+Running loglikelihood requests:  15%|█▍        | 17005/117208 [01:26<05:54, 282.86it/s]
+Running loglikelihood requests:  15%|█▍        | 17047/117208 [01:26<05:53, 283.43it/s]
+Running loglikelihood requests:  15%|█▍        | 17095/117208 [01:26<05:38, 295.84it/s]
+Running loglikelihood requests:  15%|█▍        | 17140/117208 [01:26<05:34, 298.75it/s]
+Running loglikelihood requests:  15%|█▍        | 17176/117208 [01:26<05:53, 282.65it/s]
+Running loglikelihood requests:  15%|█▍        | 17218/117208 [01:26<05:52, 283.38it/s]
+Running loglikelihood requests:  15%|█▍        | 17266/117208 [01:27<05:37, 295.79it/s]
+Running loglikelihood requests:  15%|█▍        | 17305/117208 [01:27<05:48, 286.88it/s]
+Running loglikelihood requests:  15%|█▍        | 17353/117208 [01:27<05:33, 298.98it/s]
+Running loglikelihood requests:  15%|█▍        | 17395/117208 [01:27<05:38, 294.91it/s]
+Running loglikelihood requests:  15%|█▍        | 17437/117208 [01:27<05:41, 292.17it/s]
+Running loglikelihood requests:  15%|█▍        | 17467/117208 [01:27<06:15, 265.69it/s]
+Running loglikelihood requests:  15%|█▍        | 17497/117208 [01:27<06:41, 248.36it/s]
+Running loglikelihood requests:  15%|█▍        | 17545/117208 [01:28<06:07, 271.51it/s]
+Running loglikelihood requests:  15%|█▌        | 17590/117208 [01:28<05:53, 282.17it/s]
+Running loglikelihood requests:  15%|█▌        | 17638/117208 [01:28<05:37, 295.29it/s]
+Running loglikelihood requests:  15%|█▌        | 17686/117208 [01:28<05:26, 304.52it/s]
+Running loglikelihood requests:  15%|█▌        | 17731/117208 [01:28<05:25, 305.25it/s]
+Running loglikelihood requests:  15%|█▌        | 17762/117208 [01:28<05:58, 277.25it/s]
+Running loglikelihood requests:  15%|█▌        | 17803/117208 [01:28<05:57, 277.70it/s]
+Running loglikelihood requests:  15%|█▌        | 17851/117208 [01:29<05:39, 292.38it/s]
+Running loglikelihood requests:  15%|█▌        | 17896/117208 [01:29<05:35, 296.24it/s]
+Running loglikelihood requests:  15%|█▌        | 17941/117208 [01:29<05:31, 299.29it/s]
+Running loglikelihood requests:  15%|█▌        | 17989/117208 [01:29<05:22, 307.40it/s]
+Running loglikelihood requests:  15%|█▌        | 18031/117208 [01:29<05:29, 301.05it/s]
+Running loglikelihood requests:  15%|█▌        | 18076/117208 [01:29<05:27, 302.77it/s]
+Running loglikelihood requests:  15%|█▌        | 18112/117208 [01:29<05:46, 286.03it/s]
+Running loglikelihood requests:  15%|█▌        | 18157/117208 [01:30<05:38, 292.23it/s]
+Running loglikelihood requests:  16%|█▌        | 18196/117208 [01:30<05:39, 291.31it/s]
+Running loglikelihood requests:  16%|█▌        | 18244/117208 [01:30<05:19, 309.29it/s]
+Running loglikelihood requests:  16%|█▌        | 18286/117208 [01:30<05:19, 309.79it/s]
+Running loglikelihood requests:  16%|█▌        | 18325/117208 [01:30<05:25, 303.90it/s]
+Running loglikelihood requests:  16%|█▌        | 18358/117208 [01:30<05:45, 286.46it/s]
+Running loglikelihood requests:  16%|█▌        | 18400/117208 [01:30<05:36, 293.59it/s]
+Running loglikelihood requests:  16%|█▌        | 18445/117208 [01:31<05:23, 305.32it/s]
+Running loglikelihood requests:  16%|█▌        | 18487/117208 [01:31<05:21, 307.06it/s]
+Running loglikelihood requests:  16%|█▌        | 18532/117208 [01:31<05:13, 314.82it/s]
+Running loglikelihood requests:  16%|█▌        | 18580/117208 [01:31<05:01, 326.90it/s]
+Running loglikelihood requests:  16%|█▌        | 18622/117208 [01:31<05:05, 322.36it/s]
+Running loglikelihood requests:  16%|█▌        | 18655/117208 [01:31<05:30, 298.32it/s]
+Running loglikelihood requests:  16%|█▌        | 18685/117208 [01:31<05:57, 275.96it/s]
+Running loglikelihood requests:  16%|█▌        | 18727/117208 [01:31<05:44, 286.26it/s]
+Running loglikelihood requests:  16%|█▌        | 18772/117208 [01:32<05:28, 300.05it/s]
+Running loglikelihood requests:  16%|█▌        | 18820/117208 [01:32<05:10, 316.66it/s]
+Running loglikelihood requests:  16%|█▌        | 18868/117208 [01:32<04:59, 328.28it/s]
+Running loglikelihood requests:  16%|█▌        | 18913/117208 [01:32<04:58, 329.76it/s]
+Running loglikelihood requests:  16%|█▌        | 18958/117208 [01:32<04:56, 331.06it/s]
+Running loglikelihood requests:  16%|█▌        | 18992/117208 [01:32<05:20, 306.77it/s]
+Running loglikelihood requests:  16%|█▌        | 19023/117208 [01:32<05:44, 284.95it/s]
+Running loglikelihood requests:  16%|█▋        | 19063/117208 [01:33<05:39, 288.68it/s]
+Running loglikelihood requests:  16%|█▋        | 19105/117208 [01:33<05:31, 296.01it/s]
+Running loglikelihood requests:  16%|█▋        | 19153/117208 [01:33<05:12, 314.25it/s]
+Running loglikelihood requests:  16%|█▋        | 19201/117208 [01:33<04:59, 326.77it/s]
+Running loglikelihood requests:  16%|█▋        | 19240/117208 [01:33<05:10, 315.96it/s]
+Running loglikelihood requests:  16%|█▋        | 19273/117208 [01:33<05:31, 295.47it/s]
+Running loglikelihood requests:  16%|█▋        | 19315/117208 [01:33<05:25, 300.42it/s]
+Running loglikelihood requests:  17%|█▋        | 19360/117208 [01:34<05:15, 310.49it/s]
+Running loglikelihood requests:  17%|█▋        | 19399/117208 [01:34<05:20, 304.77it/s]
+Running loglikelihood requests:  17%|█▋        | 19444/117208 [01:34<05:11, 314.19it/s]
+Running loglikelihood requests:  17%|█▋        | 19492/117208 [01:34<04:58, 326.90it/s]
+Running loglikelihood requests:  17%|█▋        | 19540/117208 [01:34<04:50, 335.99it/s]
+Running loglikelihood requests:  17%|█▋        | 19576/117208 [01:34<05:08, 316.50it/s]
+Running loglikelihood requests:  17%|█▋        | 19608/117208 [01:34<05:33, 292.42it/s]
+Running loglikelihood requests:  17%|█▋        | 19638/117208 [01:34<05:57, 272.95it/s]
+Running loglikelihood requests:  17%|█▋        | 19675/117208 [01:35<05:54, 274.91it/s]
+Running loglikelihood requests:  17%|█▋        | 19711/117208 [01:35<05:56, 273.12it/s]
+Running loglikelihood requests:  17%|█▋        | 19756/117208 [01:35<05:33, 292.18it/s]
+Running loglikelihood requests:  17%|█▋        | 19804/117208 [01:35<05:11, 312.48it/s]
+Running loglikelihood requests:  17%|█▋        | 19852/117208 [01:35<04:58, 325.97it/s]
+Running loglikelihood requests:  17%|█▋        | 19885/117208 [01:35<05:21, 302.64it/s]
+Running loglikelihood requests:  17%|█▋        | 19916/117208 [01:35<05:46, 280.92it/s]
+Running loglikelihood requests:  17%|█▋        | 19945/117208 [01:36<06:09, 263.19it/s]
+Running loglikelihood requests:  17%|█▋        | 19984/117208 [01:36<05:57, 272.09it/s]
+Running loglikelihood requests:  17%|█▋        | 20029/117208 [01:36<05:32, 291.85it/s]
+Running loglikelihood requests:  17%|█▋        | 20071/117208 [01:36<05:25, 298.57it/s]
+Running loglikelihood requests:  17%|█▋        | 20119/117208 [01:36<05:06, 316.74it/s]
+Running loglikelihood requests:  17%|█▋        | 20167/117208 [01:36<04:54, 329.74it/s]
+Running loglikelihood requests:  17%|█▋        | 20212/117208 [01:36<04:52, 331.87it/s]
+Running loglikelihood requests:  17%|█▋        | 20246/117208 [01:36<05:14, 308.72it/s]
+Running loglikelihood requests:  17%|█▋        | 20277/117208 [01:37<05:39, 285.53it/s]
+Running loglikelihood requests:  17%|█▋        | 20306/117208 [01:37<06:03, 266.60it/s]
+Running loglikelihood requests:  17%|█▋        | 20350/117208 [01:37<05:38, 285.75it/s]
+Running loglikelihood requests:  17%|█▋        | 20395/117208 [01:37<05:21, 301.36it/s]
+Running loglikelihood requests:  17%|█▋        | 20443/117208 [01:37<05:03, 318.55it/s]
+Running loglikelihood requests:  17%|█▋        | 20491/117208 [01:37<04:52, 330.63it/s]
+Running loglikelihood requests:  18%|█▊        | 20539/117208 [01:37<04:44, 339.51it/s]
+Running loglikelihood requests:  18%|█▊        | 20584/117208 [01:38<04:45, 338.59it/s]
+Running loglikelihood requests:  18%|█▊        | 20618/117208 [01:38<05:08, 313.39it/s]
+Running loglikelihood requests:  18%|█▊        | 20653/117208 [01:38<05:22, 299.38it/s]
+Running loglikelihood requests:  18%|█▊        | 20698/117208 [01:38<05:10, 310.66it/s]
+Running loglikelihood requests:  18%|█▊        | 20746/117208 [01:38<04:56, 325.69it/s]
+Running loglikelihood requests:  18%|█▊        | 20794/117208 [01:38<04:46, 336.49it/s]
+Running loglikelihood requests:  18%|█▊        | 20842/117208 [01:38<04:40, 343.68it/s]
+Running loglikelihood requests:  18%|█▊        | 20887/117208 [01:38<04:41, 342.04it/s]
+Running loglikelihood requests:  18%|█▊        | 20922/117208 [01:39<05:01, 318.93it/s]
+Running loglikelihood requests:  18%|█▊        | 20956/117208 [01:39<05:20, 299.99it/s]
+Running loglikelihood requests:  18%|█▊        | 21004/117208 [01:39<05:02, 318.30it/s]
+Running loglikelihood requests:  18%|█▊        | 21052/117208 [01:39<04:50, 330.90it/s]
+Running loglikelihood requests:  18%|█▊        | 21100/117208 [01:39<04:42, 340.15it/s]
+Running loglikelihood requests:  18%|█▊        | 21148/117208 [01:39<04:37, 346.76it/s]
+Running loglikelihood requests:  18%|█▊        | 21196/117208 [01:39<04:33, 350.99it/s]
+Running loglikelihood requests:  18%|█▊        | 21248/117208 [01:39<04:24, 362.76it/s]
+Running loglikelihood requests:  18%|█▊        | 21285/117208 [01:40<04:43, 338.23it/s]
+Running loglikelihood requests:  18%|█▊        | 21319/117208 [01:40<05:06, 313.15it/s]
+Running loglikelihood requests:  18%|█▊        | 21351/117208 [01:40<05:27, 292.77it/s]
+Running loglikelihood requests:  18%|█▊        | 21392/117208 [01:40<05:21, 298.21it/s]
+Running loglikelihood requests:  18%|█▊        | 21440/117208 [01:40<05:02, 317.08it/s]
+Running loglikelihood requests:  18%|█▊        | 21488/117208 [01:40<04:49, 330.81it/s]
+Running loglikelihood requests:  18%|█▊        | 21536/117208 [01:40<04:41, 340.01it/s]
+Running loglikelihood requests:  18%|█▊        | 21581/117208 [01:41<04:41, 340.05it/s]
+Running loglikelihood requests:  18%|█▊        | 21617/117208 [01:41<04:58, 320.45it/s]
+Running loglikelihood requests:  18%|█▊        | 21650/117208 [01:41<05:20, 298.28it/s]
+Running loglikelihood requests:  18%|█▊        | 21680/117208 [01:41<05:43, 278.21it/s]
+Running loglikelihood requests:  19%|█▊        | 21716/117208 [01:41<05:44, 277.03it/s]
+Running loglikelihood requests:  19%|█▊        | 21758/117208 [01:41<05:30, 289.15it/s]
+Running loglikelihood requests:  19%|█▊        | 21803/117208 [01:41<05:17, 300.58it/s]
+Running loglikelihood requests:  19%|█▊        | 21851/117208 [01:41<05:02, 314.83it/s]
+Running loglikelihood requests:  19%|█▊        | 21899/117208 [01:42<04:53, 324.66it/s]
+Running loglikelihood requests:  19%|█▊        | 21944/117208 [01:42<04:53, 325.09it/s]
+Running loglikelihood requests:  19%|█▉        | 21986/117208 [01:42<04:58, 319.05it/s]
+Running loglikelihood requests:  19%|█▉        | 22018/117208 [01:42<05:25, 292.86it/s]
+Running loglikelihood requests:  19%|█▉        | 22048/117208 [01:42<05:50, 271.13it/s]
+Running loglikelihood requests:  19%|█▉        | 22091/117208 [01:42<05:35, 283.65it/s]
+Running loglikelihood requests:  19%|█▉        | 22136/117208 [01:42<05:20, 296.55it/s]
+Running loglikelihood requests:  19%|█▉        | 22184/117208 [01:43<05:04, 312.20it/s]
+Running loglikelihood requests:  19%|█▉        | 22229/117208 [01:43<05:00, 316.27it/s]
+Running loglikelihood requests:  19%|█▉        | 22277/117208 [01:43<04:51, 325.64it/s]
+Running loglikelihood requests:  19%|█▉        | 22325/117208 [01:43<04:45, 332.21it/s]
+Running loglikelihood requests:  19%|█▉        | 22370/117208 [01:43<04:46, 330.51it/s]
+Running loglikelihood requests:  19%|█▉        | 22404/117208 [01:43<05:10, 304.93it/s]
+Running loglikelihood requests:  19%|█▉        | 22435/117208 [01:43<05:35, 282.33it/s]
+Running loglikelihood requests:  19%|█▉        | 22478/117208 [01:44<05:24, 291.55it/s]
+Running loglikelihood requests:  19%|█▉        | 22526/117208 [01:44<05:06, 308.68it/s]
+Running loglikelihood requests:  19%|█▉        | 22571/117208 [01:44<05:01, 314.04it/s]
+Running loglikelihood requests:  19%|█▉        | 22619/117208 [01:44<04:51, 324.12it/s]
+Running loglikelihood requests:  19%|█▉        | 22667/117208 [01:44<04:45, 330.94it/s]
+Running loglikelihood requests:  19%|█▉        | 22715/117208 [01:44<04:41, 336.00it/s]
+Running loglikelihood requests:  19%|█▉        | 22757/117208 [01:44<04:49, 326.74it/s]
+Running loglikelihood requests:  19%|█▉        | 22790/117208 [01:45<05:14, 300.63it/s]
+Running loglikelihood requests:  19%|█▉        | 22832/117208 [01:45<05:12, 302.40it/s]
+Running loglikelihood requests:  20%|█▉        | 22874/117208 [01:45<05:10, 303.35it/s]
+Running loglikelihood requests:  20%|█▉        | 22913/117208 [01:45<05:16, 297.53it/s]
+Running loglikelihood requests:  20%|█▉        | 22961/117208 [01:45<05:01, 312.67it/s]
+Running loglikelihood requests:  20%|█▉        | 23009/117208 [01:45<04:51, 323.29it/s]
+Running loglikelihood requests:  20%|█▉        | 23057/117208 [01:45<04:44, 330.45it/s]
+Running loglikelihood requests:  20%|█▉        | 23105/117208 [01:45<04:40, 335.65it/s]
+Running loglikelihood requests:  20%|█▉        | 23144/117208 [01:46<04:53, 320.07it/s]
+Running loglikelihood requests:  20%|█▉        | 23192/117208 [01:46<04:46, 328.22it/s]
+Running loglikelihood requests:  20%|█▉        | 23225/117208 [01:46<05:11, 301.52it/s]
+Running loglikelihood requests:  20%|█▉        | 23264/117208 [01:46<05:16, 296.89it/s]
+Running loglikelihood requests:  20%|█▉        | 23306/117208 [01:46<05:13, 299.59it/s]
+Running loglikelihood requests:  20%|█▉        | 23342/117208 [01:46<05:25, 288.34it/s]
+Running loglikelihood requests:  20%|█▉        | 23384/117208 [01:46<05:19, 293.23it/s]
+Running loglikelihood requests:  20%|█▉        | 23432/117208 [01:47<05:02, 309.90it/s]
+Running loglikelihood requests:  20%|██        | 23480/117208 [01:47<04:51, 321.30it/s]
+Running loglikelihood requests:  20%|██        | 23528/117208 [01:47<04:44, 329.37it/s]
+Running loglikelihood requests:  20%|██        | 23576/117208 [01:47<04:39, 335.03it/s]
+Running loglikelihood requests:  20%|██        | 23618/117208 [01:47<04:46, 326.12it/s]
+Running loglikelihood requests:  20%|██        | 23651/117208 [01:47<05:11, 300.39it/s]
+Running loglikelihood requests:  20%|██        | 23682/117208 [01:47<05:35, 278.74it/s]
+Running loglikelihood requests:  20%|██        | 23726/117208 [01:48<05:20, 291.27it/s]
+Running loglikelihood requests:  20%|██        | 23771/117208 [01:48<05:09, 301.98it/s]
+Running loglikelihood requests:  20%|██        | 23813/117208 [01:48<05:08, 303.04it/s]
+Running loglikelihood requests:  20%|██        | 23861/117208 [01:48<04:54, 316.48it/s]
+Running loglikelihood requests:  20%|██        | 23900/117208 [01:48<05:04, 306.92it/s]
+Running loglikelihood requests:  20%|██        | 23948/117208 [01:48<04:51, 319.78it/s]
+Running loglikelihood requests:  20%|██        | 23996/117208 [01:48<04:43, 328.80it/s]
+Running loglikelihood requests:  21%|██        | 24041/117208 [01:49<04:43, 328.30it/s]
+Running loglikelihood requests:  21%|██        | 24077/117208 [01:49<05:01, 308.73it/s]
+Running loglikelihood requests:  21%|██        | 24108/117208 [01:49<05:27, 284.24it/s]
+Running loglikelihood requests:  21%|██        | 24152/117208 [01:49<05:15, 295.22it/s]
+Running loglikelihood requests:  21%|██        | 24194/117208 [01:49<05:11, 298.53it/s]
+Running loglikelihood requests:  21%|██        | 24236/117208 [01:49<05:08, 300.95it/s]
+Running loglikelihood requests:  21%|██        | 24281/117208 [01:49<05:00, 309.07it/s]
+Running loglikelihood requests:  21%|██        | 24323/117208 [01:49<05:01, 308.53it/s]
+Running loglikelihood requests:  21%|██        | 24371/117208 [01:50<04:49, 320.78it/s]
+Running loglikelihood requests:  21%|██        | 24419/117208 [01:50<04:41, 329.28it/s]
+Running loglikelihood requests:  21%|██        | 24467/117208 [01:50<04:36, 335.33it/s]
+Running loglikelihood requests:  21%|██        | 24515/117208 [01:50<04:32, 339.68it/s]
+Running loglikelihood requests:  21%|██        | 24563/117208 [01:50<04:30, 342.96it/s]
+Running loglikelihood requests:  21%|██        | 24602/117208 [01:50<04:43, 326.13it/s]
+Running loglikelihood requests:  21%|██        | 24635/117208 [01:50<05:08, 300.05it/s]
+Running loglikelihood requests:  21%|██        | 24666/117208 [01:51<05:31, 279.31it/s]
+Running loglikelihood requests:  21%|██        | 24704/117208 [01:51<05:31, 279.45it/s]
+Running loglikelihood requests:  21%|██        | 24743/117208 [01:51<05:28, 281.50it/s]
+Running loglikelihood requests:  21%|██        | 24785/117208 [01:51<05:19, 289.52it/s]
+Running loglikelihood requests:  21%|██        | 24827/117208 [01:51<05:13, 294.99it/s]
+Running loglikelihood requests:  21%|██        | 24875/117208 [01:51<04:56, 311.73it/s]
+Running loglikelihood requests:  21%|██▏       | 24923/117208 [01:51<04:45, 323.33it/s]
+Running loglikelihood requests:  21%|██▏       | 24971/117208 [01:52<04:38, 331.58it/s]
+Running loglikelihood requests:  21%|██▏       | 25005/117208 [01:52<05:00, 306.80it/s]
+Running loglikelihood requests:  21%|██▏       | 25037/117208 [01:52<05:22, 286.08it/s]
+Running loglikelihood requests:  21%|██▏       | 25085/117208 [01:52<05:01, 305.45it/s]
+Running loglikelihood requests:  21%|██▏       | 25130/117208 [01:52<04:54, 312.49it/s]
+Running loglikelihood requests:  21%|██▏       | 25169/117208 [01:52<05:01, 304.92it/s]
+Running loglikelihood requests:  22%|██▏       | 25217/117208 [01:52<04:48, 318.97it/s]
+Running loglikelihood requests:  22%|██▏       | 25265/117208 [01:52<04:39, 329.02it/s]
+Running loglikelihood requests:  22%|██▏       | 25313/117208 [01:53<04:33, 335.88it/s]
+Running loglikelihood requests:  22%|██▏       | 25361/117208 [01:53<04:29, 340.48it/s]
+Running loglikelihood requests:  22%|██▏       | 25406/117208 [01:53<04:32, 337.28it/s]
+Running loglikelihood requests:  22%|██▏       | 25440/117208 [01:53<04:54, 311.36it/s]
+Running loglikelihood requests:  22%|██▏       | 25472/117208 [01:53<05:17, 289.04it/s]
+Running loglikelihood requests:  22%|██▏       | 25520/117208 [01:53<04:57, 308.06it/s]
+Running loglikelihood requests:  22%|██▏       | 25568/117208 [01:53<04:45, 320.65it/s]
+Running loglikelihood requests:  22%|██▏       | 25616/117208 [01:54<04:37, 329.72it/s]
+Running loglikelihood requests:  22%|██▏       | 25664/117208 [01:54<04:32, 336.10it/s]
+Running loglikelihood requests:  22%|██▏       | 25709/117208 [01:54<04:33, 334.66it/s]
+Running loglikelihood requests:  22%|██▏       | 25757/117208 [01:54<04:28, 340.20it/s]
+Running loglikelihood requests:  22%|██▏       | 25805/117208 [01:54<04:25, 343.83it/s]
+Running loglikelihood requests:  22%|██▏       | 25853/117208 [01:54<04:23, 346.18it/s]
+Running loglikelihood requests:  22%|██▏       | 25898/117208 [01:54<04:27, 340.83it/s]
+Running loglikelihood requests:  22%|██▏       | 25933/117208 [01:55<04:48, 315.99it/s]
+Running loglikelihood requests:  22%|██▏       | 25965/117208 [01:55<05:13, 291.22it/s]
+Running loglikelihood requests:  22%|██▏       | 25995/117208 [01:55<05:36, 270.84it/s]
+Running loglikelihood requests:  22%|██▏       | 26024/117208 [01:55<05:57, 254.89it/s]
+Running loglikelihood requests:  22%|██▏       | 26072/117208 [01:55<05:20, 284.28it/s]
+Running loglikelihood requests:  22%|██▏       | 26117/117208 [01:55<05:05, 298.10it/s]
+Running loglikelihood requests:  22%|██▏       | 26162/117208 [01:55<04:49, 314.27it/s]
+Running loglikelihood requests:  22%|██▏       | 26210/117208 [01:55<04:33, 332.32it/s]
+Running loglikelihood requests:  22%|██▏       | 26258/117208 [01:56<04:23, 345.24it/s]
+Running loglikelihood requests:  22%|██▏       | 26306/117208 [01:56<04:16, 354.71it/s]
+Running loglikelihood requests:  22%|██▏       | 26354/117208 [01:56<04:11, 361.06it/s]
+Running loglikelihood requests:  23%|██▎       | 26399/117208 [01:56<04:12, 358.93it/s]
+Running loglikelihood requests:  23%|██▎       | 26435/117208 [01:56<04:29, 336.27it/s]
+Running loglikelihood requests:  23%|██▎       | 26469/117208 [01:56<04:48, 314.69it/s]
+Running loglikelihood requests:  23%|██▎       | 26501/117208 [01:56<05:05, 296.64it/s]
+Running loglikelihood requests:  23%|██▎       | 26531/117208 [01:56<05:23, 280.01it/s]
+Running loglikelihood requests:  23%|██▎       | 26567/117208 [01:57<05:21, 281.66it/s]
+Running loglikelihood requests:  23%|██▎       | 26609/117208 [01:57<05:05, 296.31it/s]
+Running loglikelihood requests:  23%|██▎       | 26651/117208 [01:57<04:55, 306.42it/s]
+Running loglikelihood requests:  23%|██▎       | 26699/117208 [01:57<04:36, 327.56it/s]
+Running loglikelihood requests:  23%|██▎       | 26747/117208 [01:57<04:24, 342.40it/s]
+Running loglikelihood requests:  23%|██▎       | 26795/117208 [01:57<04:16, 352.42it/s]
+Running loglikelihood requests:  23%|██▎       | 26843/117208 [01:57<04:11, 359.73it/s]
+Running loglikelihood requests:  23%|██▎       | 26888/117208 [01:57<04:12, 357.90it/s]
+Running loglikelihood requests:  23%|██▎       | 26924/117208 [01:58<04:28, 335.75it/s]
+Running loglikelihood requests:  23%|██▎       | 26958/117208 [01:58<04:47, 314.28it/s]
+Running loglikelihood requests:  23%|██▎       | 26990/117208 [01:58<05:04, 296.44it/s]
+Running loglikelihood requests:  23%|██▎       | 27020/117208 [01:58<05:23, 279.17it/s]
+Running loglikelihood requests:  23%|██▎       | 27056/117208 [01:58<05:20, 281.06it/s]
+Running loglikelihood requests:  23%|██▎       | 27101/117208 [01:58<04:57, 302.90it/s]
+Running loglikelihood requests:  23%|██▎       | 27149/117208 [01:58<04:37, 324.84it/s]
+Running loglikelihood requests:  23%|██▎       | 27194/117208 [01:58<04:29, 333.98it/s]
+Running loglikelihood requests:  23%|██▎       | 27242/117208 [01:59<04:19, 347.05it/s]
+Running loglikelihood requests:  23%|██▎       | 27290/117208 [01:59<04:12, 356.25it/s]
+Running loglikelihood requests:  23%|██▎       | 27338/117208 [01:59<04:07, 362.76it/s]
+Running loglikelihood requests:  23%|██▎       | 27383/117208 [01:59<04:09, 359.68it/s]
+Running loglikelihood requests:  23%|██▎       | 27431/117208 [01:59<04:06, 364.89it/s]
+Running loglikelihood requests:  23%|██▎       | 27468/117208 [01:59<04:22, 342.11it/s]
+Running loglikelihood requests:  23%|██▎       | 27503/117208 [01:59<04:38, 322.36it/s]
+Running loglikelihood requests:  23%|██▎       | 27536/117208 [02:00<04:54, 304.90it/s]
+Running loglikelihood requests:  24%|██▎       | 27572/117208 [02:00<04:59, 299.70it/s]
+Running loglikelihood requests:  24%|██▎       | 27617/117208 [02:00<04:43, 315.97it/s]
+Running loglikelihood requests:  24%|██▎       | 27665/117208 [02:00<04:28, 333.93it/s]
+Running loglikelihood requests:  24%|██▎       | 27710/117208 [02:00<04:22, 340.88it/s]
+Running loglikelihood requests:  24%|██▎       | 27758/117208 [02:00<04:13, 352.72it/s]
+Running loglikelihood requests:  24%|██▎       | 27806/117208 [02:00<04:07, 360.97it/s]
+Running loglikelihood requests:  24%|██▍       | 27854/117208 [02:00<04:03, 367.19it/s]
+Running loglikelihood requests:  24%|██▍       | 27902/117208 [02:01<04:00, 370.83it/s]
+Running loglikelihood requests:  24%|██▍       | 27944/117208 [02:01<04:08, 359.69it/s]
+Running loglikelihood requests:  24%|██▍       | 27986/117208 [02:01<04:13, 351.99it/s]
+Running loglikelihood requests:  24%|██▍       | 28022/117208 [02:01<04:28, 332.05it/s]
+Running loglikelihood requests:  24%|██▍       | 28056/117208 [02:01<04:45, 312.29it/s]
+Running loglikelihood requests:  24%|██▍       | 28088/117208 [02:01<05:00, 296.17it/s]
+Running loglikelihood requests:  24%|██▍       | 28118/117208 [02:01<05:18, 280.08it/s]
+Running loglikelihood requests:  24%|██▍       | 28166/117208 [02:01<04:47, 310.11it/s]
+Running loglikelihood requests:  24%|██▍       | 28214/117208 [02:02<04:28, 330.91it/s]
+Running loglikelihood requests:  24%|██▍       | 28263/117208 [02:02<04:15, 348.47it/s]
+Running loglikelihood requests:  24%|██▍       | 28311/117208 [02:02<04:08, 358.07it/s]
+Running loglikelihood requests:  24%|██▍       | 28359/117208 [02:02<04:03, 364.97it/s]
+Running loglikelihood requests:  24%|██▍       | 28407/117208 [02:02<04:00, 369.88it/s]
+Running loglikelihood requests:  24%|██▍       | 28455/117208 [02:02<03:57, 373.46it/s]
+Running loglikelihood requests:  24%|██▍       | 28503/117208 [02:02<03:56, 375.50it/s]
+Running loglikelihood requests:  24%|██▍       | 28545/117208 [02:02<04:04, 363.23it/s]
+Running loglikelihood requests:  24%|██▍       | 28593/117208 [02:03<04:00, 368.99it/s]
+Running loglikelihood requests:  24%|██▍       | 28630/117208 [02:03<04:16, 345.03it/s]
+Running loglikelihood requests:  24%|██▍       | 28665/117208 [02:03<04:31, 326.25it/s]
+Running loglikelihood requests:  24%|██▍       | 28698/117208 [02:03<04:47, 308.35it/s]
+Running loglikelihood requests:  25%|██▍       | 28740/117208 [02:03<04:39, 316.77it/s]
+Running loglikelihood requests:  25%|██▍       | 28788/117208 [02:03<04:23, 335.88it/s]
+Running loglikelihood requests:  25%|██▍       | 28833/117208 [02:03<04:17, 342.63it/s]
+Running loglikelihood requests:  25%|██▍       | 28881/117208 [02:03<04:09, 354.47it/s]
+Running loglikelihood requests:  25%|██▍       | 28929/117208 [02:04<04:03, 362.78it/s]
+Running loglikelihood requests:  25%|██▍       | 28977/117208 [02:04<03:59, 368.16it/s]
+Running loglikelihood requests:  25%|██▍       | 29025/117208 [02:04<03:56, 372.21it/s]
+Running loglikelihood requests:  25%|██▍       | 29064/117208 [02:04<04:08, 354.17it/s]
+Running loglikelihood requests:  25%|██▍       | 29100/117208 [02:04<04:23, 334.43it/s]
+Running loglikelihood requests:  25%|██▍       | 29134/117208 [02:04<04:40, 314.46it/s]
+Running loglikelihood requests:  25%|██▍       | 29166/117208 [02:04<04:55, 298.16it/s]
+Running loglikelihood requests:  25%|██▍       | 29205/117208 [02:04<04:50, 302.95it/s]
+Running loglikelihood requests:  25%|██▍       | 29250/117208 [02:05<04:35, 319.58it/s]
+Running loglikelihood requests:  25%|██▍       | 29298/117208 [02:05<04:19, 338.18it/s]
+Running loglikelihood requests:  25%|██▌       | 29340/117208 [02:05<04:20, 337.54it/s]
+Running loglikelihood requests:  25%|██▌       | 29388/117208 [02:05<04:09, 351.41it/s]
+Running loglikelihood requests:  25%|██▌       | 29436/117208 [02:05<04:03, 360.60it/s]
+Running loglikelihood requests:  25%|██▌       | 29484/117208 [02:05<03:58, 367.15it/s]
+Running loglikelihood requests:  25%|██▌       | 29532/117208 [02:05<03:55, 371.97it/s]
+Running loglikelihood requests:  25%|██▌       | 29580/117208 [02:05<03:53, 375.45it/s]
+Running loglikelihood requests:  25%|██▌       | 29628/117208 [02:06<03:51, 377.69it/s]
+Running loglikelihood requests:  25%|██▌       | 29676/117208 [02:06<03:50, 379.27it/s]
+Running loglikelihood requests:  25%|██▌       | 29714/117208 [02:06<04:06, 355.01it/s]
+Running loglikelihood requests:  25%|██▌       | 29750/117208 [02:06<04:22, 333.66it/s]
+Running loglikelihood requests:  25%|██▌       | 29784/117208 [02:06<04:36, 316.35it/s]
+Running loglikelihood requests:  25%|██▌       | 29816/117208 [02:06<04:51, 299.44it/s]
+Running loglikelihood requests:  25%|██▌       | 29856/117208 [02:06<04:45, 306.26it/s]
+Running loglikelihood requests:  26%|██▌       | 29901/117208 [02:06<04:30, 322.28it/s]
+Running loglikelihood requests:  26%|██▌       | 29946/117208 [02:07<04:16, 339.76it/s]
+Running loglikelihood requests:  26%|██▌       | 29994/117208 [02:07<04:02, 360.07it/s]
+Running loglikelihood requests:  26%|██▌       | 30042/117208 [02:07<03:52, 374.73it/s]
+Running loglikelihood requests:  26%|██▌       | 30090/117208 [02:07<03:46, 385.29it/s]
+Running loglikelihood requests:  26%|██▌       | 30138/117208 [02:07<03:41, 392.39it/s]
+Running loglikelihood requests:  26%|██▌       | 30186/117208 [02:07<03:38, 397.70it/s]
+Running loglikelihood requests:  26%|██▌       | 30234/117208 [02:07<03:36, 401.29it/s]
+Running loglikelihood requests:  26%|██▌       | 30282/117208 [02:07<03:35, 403.67it/s]
+Running loglikelihood requests:  26%|██▌       | 30330/117208 [02:07<03:34, 405.22it/s]
+Running loglikelihood requests:  26%|██▌       | 30371/117208 [02:08<03:43, 387.78it/s]
+Running loglikelihood requests:  26%|██▌       | 30410/117208 [02:08<05:01, 288.11it/s]
+Running loglikelihood requests:  26%|██▌       | 30443/117208 [02:08<05:01, 287.76it/s]
+Running loglikelihood requests:  26%|██▌       | 30486/117208 [02:08<04:40, 308.77it/s]
+Running loglikelihood requests:  26%|██▌       | 30534/117208 [02:08<04:17, 336.30it/s]
+Running loglikelihood requests:  26%|██▌       | 30582/117208 [02:08<04:02, 356.88it/s]
+Running loglikelihood requests:  26%|██▌       | 30627/117208 [02:08<03:57, 365.08it/s]
+Running loglikelihood requests:  26%|██▌       | 30675/117208 [02:09<03:48, 378.41it/s]
+Running loglikelihood requests:  26%|██▌       | 30723/117208 [02:09<03:42, 387.99it/s]
+Running loglikelihood requests:  26%|██▋       | 30775/117208 [02:09<03:33, 404.86it/s]
+Running loglikelihood requests:  26%|██▋       | 30823/117208 [02:09<03:32, 406.69it/s]
+Running loglikelihood requests:  26%|██▋       | 30871/117208 [02:09<03:31, 408.22it/s]
+Running loglikelihood requests:  26%|██▋       | 30919/117208 [02:09<03:31, 408.91it/s]
+Running loglikelihood requests:  26%|██▋       | 30961/117208 [02:09<03:38, 394.38it/s]
+Running loglikelihood requests:  26%|██▋       | 31001/117208 [02:09<04:53, 293.58it/s]
+Running loglikelihood requests:  26%|██▋       | 31039/117208 [02:10<04:44, 302.64it/s]
+Running loglikelihood requests:  27%|██▋       | 31087/117208 [02:10<04:20, 330.95it/s]
+Running loglikelihood requests:  27%|██▋       | 31135/117208 [02:10<04:04, 352.75it/s]
+Running loglikelihood requests:  27%|██▋       | 31183/117208 [02:10<03:53, 369.20it/s]
+Running loglikelihood requests:  27%|██▋       | 31225/117208 [02:10<03:54, 366.74it/s]
+Running loglikelihood requests:  27%|██▋       | 31270/117208 [02:10<03:50, 372.20it/s]
+Running loglikelihood requests:  27%|██▋       | 31318/117208 [02:10<03:43, 383.66it/s]
+Running loglikelihood requests:  27%|██▋       | 31366/117208 [02:10<03:39, 391.61it/s]
+Running loglikelihood requests:  27%|██▋       | 31414/117208 [02:11<03:36, 396.96it/s]
+Running loglikelihood requests:  27%|██▋       | 31466/117208 [02:11<03:28, 411.00it/s]
+Running loglikelihood requests:  27%|██▋       | 31514/117208 [02:11<03:28, 411.03it/s]
+Running loglikelihood requests:  27%|██▋       | 31562/117208 [02:11<03:28, 410.89it/s]
+Running loglikelihood requests:  27%|██▋       | 31604/117208 [02:11<03:36, 396.00it/s]
+Running loglikelihood requests:  27%|██▋       | 31646/117208 [02:11<03:42, 385.14it/s]
+Running loglikelihood requests:  27%|██▋       | 31685/117208 [02:11<04:58, 286.66it/s]
+Running loglikelihood requests:  27%|██▋       | 31730/117208 [02:11<04:34, 311.64it/s]
+Running loglikelihood requests:  27%|██▋       | 31775/117208 [02:12<04:18, 331.02it/s]
+Running loglikelihood requests:  27%|██▋       | 31823/117208 [02:12<04:02, 352.56it/s]
+Running loglikelihood requests:  27%|██▋       | 31871/117208 [02:12<03:51, 368.67it/s]
+Running loglikelihood requests:  27%|██▋       | 31916/117208 [02:12<03:48, 373.52it/s]
+Running loglikelihood requests:  27%|██▋       | 31964/117208 [02:12<03:41, 384.25it/s]
+Running loglikelihood requests:  27%|██▋       | 32009/117208 [02:12<03:41, 384.88it/s]
+Running loglikelihood requests:  27%|██▋       | 32057/117208 [02:12<03:36, 392.99it/s]
+Running loglikelihood requests:  27%|██▋       | 32105/117208 [02:12<03:33, 398.49it/s]
+Running loglikelihood requests:  27%|██▋       | 32153/117208 [02:12<03:31, 402.06it/s]
+Running loglikelihood requests:  27%|██▋       | 32201/117208 [02:13<03:29, 405.19it/s]
+Running loglikelihood requests:  28%|██▊       | 32249/117208 [02:13<03:28, 407.50it/s]
+Running loglikelihood requests:  28%|██▊       | 32297/117208 [02:13<03:27, 408.90it/s]
+Running loglikelihood requests:  28%|██▊       | 32338/117208 [02:13<03:36, 392.14it/s]
+Running loglikelihood requests:  28%|██▊       | 32378/117208 [02:13<03:44, 377.59it/s]
+Running loglikelihood requests:  28%|██▊       | 32416/117208 [02:13<03:54, 361.11it/s]
+Running loglikelihood requests:  28%|██▊       | 32453/117208 [02:13<04:02, 348.95it/s]
+Running loglikelihood requests:  28%|██▊       | 32488/117208 [02:13<04:12, 335.69it/s]
+Running loglikelihood requests:  28%|██▊       | 32522/117208 [02:14<04:21, 324.20it/s]
+Running loglikelihood requests:  28%|██▊       | 32555/117208 [02:14<04:30, 313.14it/s]
+Running loglikelihood requests:  28%|██▊       | 32600/117208 [02:14<04:12, 335.30it/s]
+Running loglikelihood requests:  28%|██▊       | 32648/117208 [02:14<03:55, 358.33it/s]
+Running loglikelihood requests:  28%|██▊       | 32690/117208 [02:14<03:54, 360.24it/s]
+Running loglikelihood requests:  28%|██▊       | 32738/117208 [02:14<03:44, 376.74it/s]
+Running loglikelihood requests:  28%|██▊       | 32786/117208 [02:14<03:37, 388.30it/s]
+Running loglikelihood requests:  28%|██▊       | 32834/117208 [02:14<03:32, 396.66it/s]
+Running loglikelihood requests:  28%|██▊       | 32882/117208 [02:14<03:29, 402.61it/s]
+Running loglikelihood requests:  28%|██▊       | 32930/117208 [02:15<03:27, 406.26it/s]
+Running loglikelihood requests:  28%|██▊       | 32975/117208 [02:15<03:29, 401.26it/s]
+Running loglikelihood requests:  28%|██▊       | 33020/117208 [02:15<03:31, 397.87it/s]
+Running loglikelihood requests:  28%|██▊       | 33060/117208 [02:15<04:43, 296.52it/s]
+Running loglikelihood requests:  28%|██▊       | 33104/117208 [02:15<04:24, 317.76it/s]
+Running loglikelihood requests:  28%|██▊       | 33149/117208 [02:15<04:09, 336.92it/s]
+Running loglikelihood requests:  28%|██▊       | 33194/117208 [02:15<03:59, 351.51it/s]
+Running loglikelihood requests:  28%|██▊       | 33239/117208 [02:15<03:51, 362.32it/s]
+Running loglikelihood requests:  28%|██▊       | 33287/117208 [02:16<03:42, 377.39it/s]
+Running loglikelihood requests:  28%|██▊       | 33326/117208 [02:16<03:48, 366.96it/s]
+Running loglikelihood requests:  28%|██▊       | 33374/117208 [02:16<03:39, 381.97it/s]
+Running loglikelihood requests:  29%|██▊       | 33422/117208 [02:16<03:33, 392.29it/s]
+Running loglikelihood requests:  29%|██▊       | 33470/117208 [02:16<03:29, 399.46it/s]
+Running loglikelihood requests:  29%|██▊       | 33518/117208 [02:16<03:26, 404.74it/s]
+Running loglikelihood requests:  29%|██▊       | 33566/117208 [02:16<03:24, 408.17it/s]
+Running loglikelihood requests:  29%|██▊       | 33614/117208 [02:16<03:23, 410.80it/s]
+Running loglikelihood requests:  29%|██▊       | 33662/117208 [02:17<03:22, 412.55it/s]
+Running loglikelihood requests:  29%|██▉       | 33707/117208 [02:17<03:25, 406.22it/s]
+Running loglikelihood requests:  29%|██▉       | 33752/117208 [02:17<03:27, 401.72it/s]
+Running loglikelihood requests:  29%|██▉       | 33793/117208 [02:17<04:38, 299.90it/s]
+Running loglikelihood requests:  29%|██▉       | 33827/117208 [02:17<04:38, 299.30it/s]
+Running loglikelihood requests:  29%|██▉       | 33860/117208 [02:17<04:40, 297.35it/s]
+Running loglikelihood requests:  29%|██▉       | 33899/117208 [02:17<04:29, 309.16it/s]
+Running loglikelihood requests:  29%|██▉       | 33947/117208 [02:17<04:05, 339.40it/s]
+Running loglikelihood requests:  29%|██▉       | 33995/117208 [02:18<03:50, 361.76it/s]
+Running loglikelihood requests:  29%|██▉       | 34037/117208 [02:18<03:49, 363.10it/s]
+Running loglikelihood requests:  29%|██▉       | 34085/117208 [02:18<03:39, 378.83it/s]
+Running loglikelihood requests:  29%|██▉       | 34133/117208 [02:18<03:32, 390.31it/s]
+Running loglikelihood requests:  29%|██▉       | 34181/117208 [02:18<03:28, 398.59it/s]
+Running loglikelihood requests:  29%|██▉       | 34229/117208 [02:18<03:25, 404.26it/s]
+Running loglikelihood requests:  29%|██▉       | 34277/117208 [02:18<03:23, 408.19it/s]
+Running loglikelihood requests:  29%|██▉       | 34325/117208 [02:18<03:21, 411.21it/s]
+Running loglikelihood requests:  29%|██▉       | 34373/117208 [02:18<03:20, 413.47it/s]
+Running loglikelihood requests:  29%|██▉       | 34415/117208 [02:19<03:27, 399.49it/s]
+Running loglikelihood requests:  29%|██▉       | 34463/117208 [02:19<03:24, 404.81it/s]
+Running loglikelihood requests:  29%|██▉       | 34504/117208 [02:19<04:33, 302.17it/s]
+Running loglikelihood requests:  29%|██▉       | 34538/117208 [02:19<04:34, 301.64it/s]
+Running loglikelihood requests:  30%|██▉       | 34580/117208 [02:19<04:18, 319.08it/s]
+Running loglikelihood requests:  30%|██▉       | 34628/117208 [02:19<03:58, 346.01it/s]
+Running loglikelihood requests:  30%|██▉       | 34673/117208 [02:19<03:49, 358.96it/s]
+Running loglikelihood requests:  30%|██▉       | 34715/117208 [02:20<03:48, 361.00it/s]
+Running loglikelihood requests:  30%|██▉       | 34763/117208 [02:20<03:38, 377.57it/s]
+Running loglikelihood requests:  30%|██▉       | 34811/117208 [02:20<03:31, 389.72it/s]
+Running loglikelihood requests:  30%|██▉       | 34859/117208 [02:20<03:26, 398.30it/s]
+Running loglikelihood requests:  30%|██▉       | 34907/117208 [02:20<03:23, 404.43it/s]
+Running loglikelihood requests:  30%|██▉       | 34955/117208 [02:20<03:21, 408.85it/s]
+Running loglikelihood requests:  30%|██▉       | 35003/117208 [02:20<03:19, 411.63it/s]
+Running loglikelihood requests:  30%|██▉       | 35051/117208 [02:20<03:18, 413.61it/s]
+Running loglikelihood requests:  30%|██▉       | 35099/117208 [02:20<03:17, 415.28it/s]
+Running loglikelihood requests:  30%|██▉       | 35144/117208 [02:21<03:20, 408.99it/s]
+Running loglikelihood requests:  30%|███       | 35185/117208 [02:21<03:28, 393.77it/s]
+Running loglikelihood requests:  30%|███       | 35228/117208 [02:21<03:30, 388.60it/s]
+Running loglikelihood requests:  30%|███       | 35267/117208 [02:21<04:42, 290.44it/s]
+Running loglikelihood requests:  30%|███       | 35315/117208 [02:21<04:14, 322.40it/s]
+Running loglikelihood requests:  30%|███       | 35363/117208 [02:21<03:55, 347.94it/s]
+Running loglikelihood requests:  30%|███       | 35411/117208 [02:21<03:42, 367.27it/s]
+Running loglikelihood requests:  30%|███       | 35453/117208 [02:21<03:42, 367.17it/s]
+Running loglikelihood requests:  30%|███       | 35501/117208 [02:22<03:33, 382.25it/s]
+Running loglikelihood requests:  30%|███       | 35549/117208 [02:22<03:27, 393.26it/s]
+Running loglikelihood requests:  30%|███       | 35597/117208 [02:22<03:23, 400.84it/s]
+Running loglikelihood requests:  30%|███       | 35645/117208 [02:22<03:20, 406.43it/s]
+Running loglikelihood requests:  30%|███       | 35693/117208 [02:22<03:18, 410.72it/s]
+Running loglikelihood requests:  30%|███       | 35741/117208 [02:22<03:17, 413.33it/s]
+Running loglikelihood requests:  31%|███       | 35789/117208 [02:22<03:16, 415.09it/s]
+Running loglikelihood requests:  31%|███       | 35837/117208 [02:22<03:15, 416.69it/s]
+Running loglikelihood requests:  31%|███       | 35885/117208 [02:22<03:14, 417.71it/s]
+Running loglikelihood requests:  31%|███       | 35927/117208 [02:23<03:21, 402.60it/s]
+Running loglikelihood requests:  31%|███       | 35968/117208 [02:23<03:28, 389.65it/s]
+Running loglikelihood requests:  31%|███       | 36011/117208 [02:23<03:30, 386.31it/s]
+Running loglikelihood requests:  31%|███       | 36050/117208 [02:23<03:38, 371.19it/s]
+Running loglikelihood requests:  31%|███       | 36088/117208 [02:23<03:45, 359.78it/s]
+Running loglikelihood requests:  31%|███       | 36124/117208 [02:23<03:53, 347.89it/s]
+Running loglikelihood requests:  31%|███       | 36159/117208 [02:23<04:00, 337.10it/s]
+Running loglikelihood requests:  31%|███       | 36194/117208 [02:23<04:06, 329.18it/s]
+Running loglikelihood requests:  31%|███       | 36242/117208 [02:24<03:46, 356.72it/s]
+Running loglikelihood requests:  31%|███       | 36290/117208 [02:24<03:35, 375.75it/s]
+Running loglikelihood requests:  31%|███       | 36332/117208 [02:24<03:36, 373.55it/s]
+Running loglikelihood requests:  31%|███       | 36377/117208 [02:24<03:32, 380.78it/s]
+Running loglikelihood requests:  31%|███       | 36425/117208 [02:24<03:25, 393.86it/s]
+Running loglikelihood requests:  31%|███       | 36473/117208 [02:24<03:20, 402.76it/s]
+Running loglikelihood requests:  31%|███       | 36525/117208 [02:24<03:12, 419.18it/s]
+Running loglikelihood requests:  31%|███       | 36573/117208 [02:24<03:11, 420.78it/s]
+Running loglikelihood requests:  31%|███       | 36621/117208 [02:24<03:11, 421.48it/s]
+Running loglikelihood requests:  31%|███▏      | 36669/117208 [02:25<03:10, 421.92it/s]
+Running loglikelihood requests:  31%|███▏      | 36717/117208 [02:25<03:10, 422.54it/s]
+Running loglikelihood requests:  31%|███▏      | 36760/117208 [02:25<03:16, 409.53it/s]
+Running loglikelihood requests:  31%|███▏      | 36801/117208 [02:25<04:23, 305.49it/s]
+Running loglikelihood requests:  31%|███▏      | 36836/117208 [02:25<04:22, 306.48it/s]
+Running loglikelihood requests:  31%|███▏      | 36870/117208 [02:25<04:22, 306.35it/s]
+Running loglikelihood requests:  31%|███▏      | 36906/117208 [02:25<04:18, 310.61it/s]
+Running loglikelihood requests:  32%|███▏      | 36954/117208 [02:25<03:54, 342.22it/s]
+Running loglikelihood requests:  32%|███▏      | 36996/117208 [02:26<03:48, 350.76it/s]
+Running loglikelihood requests:  32%|███▏      | 37038/117208 [02:26<03:44, 356.61it/s]
+Running loglikelihood requests:  32%|███▏      | 37086/117208 [02:26<03:33, 375.89it/s]
+Running loglikelihood requests:  32%|███▏      | 37134/117208 [02:26<03:25, 389.99it/s]
+Running loglikelihood requests:  32%|███▏      | 37182/117208 [02:26<03:20, 399.66it/s]
+Running loglikelihood requests:  32%|███▏      | 37230/117208 [02:26<03:16, 406.65it/s]
+Running loglikelihood requests:  32%|███▏      | 37282/117208 [02:26<03:09, 422.08it/s]
+Running loglikelihood requests:  32%|███▏      | 37330/117208 [02:26<03:09, 422.17it/s]
+Running loglikelihood requests:  32%|███▏      | 37378/117208 [02:26<03:09, 422.37it/s]
+Running loglikelihood requests:  32%|███▏      | 37426/117208 [02:27<03:08, 422.88it/s]
+Running loglikelihood requests:  32%|███▏      | 37474/117208 [02:27<03:08, 422.57it/s]
+Running loglikelihood requests:  32%|███▏      | 37517/117208 [02:27<04:11, 316.66it/s]
+Running loglikelihood requests:  32%|███▏      | 37553/117208 [02:27<04:10, 317.96it/s]
+Running loglikelihood requests:  32%|███▏      | 37594/117208 [02:27<04:01, 330.08it/s]
+Running loglikelihood requests:  32%|███▏      | 37642/117208 [02:27<03:43, 355.31it/s]
+Running loglikelihood requests:  32%|███▏      | 37684/117208 [02:27<03:41, 359.57it/s]
+Running loglikelihood requests:  32%|███▏      | 37732/117208 [02:27<03:30, 377.92it/s]
+Running loglikelihood requests:  32%|███▏      | 37780/117208 [02:28<03:23, 390.93it/s]
+Running loglikelihood requests:  32%|███▏      | 37822/117208 [02:28<03:26, 384.42it/s]
+Running loglikelihood requests:  32%|███▏      | 37870/117208 [02:28<03:20, 396.25it/s]
+Running loglikelihood requests:  32%|███▏      | 37918/117208 [02:28<03:15, 404.80it/s]
+Running loglikelihood requests:  32%|███▏      | 37966/117208 [02:28<03:13, 410.21it/s]
+Running loglikelihood requests:  32%|███▏      | 38014/117208 [02:28<03:11, 414.49it/s]
+Running loglikelihood requests:  32%|███▏      | 38062/117208 [02:28<03:09, 417.48it/s]
+Running loglikelihood requests:  33%|███▎      | 38107/117208 [02:28<03:13, 409.23it/s]
+Running loglikelihood requests:  33%|███▎      | 38148/117208 [02:28<03:20, 394.54it/s]
+Running loglikelihood requests:  33%|███▎      | 38188/117208 [02:29<03:27, 381.46it/s]
+Running loglikelihood requests:  33%|███▎      | 38227/117208 [02:29<03:32, 371.49it/s]
+Running loglikelihood requests:  33%|███▎      | 38265/117208 [02:29<03:38, 361.86it/s]
+Running loglikelihood requests:  33%|███▎      | 38302/117208 [02:29<03:43, 352.74it/s]
+Running loglikelihood requests:  33%|███▎      | 38338/117208 [02:29<03:49, 343.61it/s]
+Running loglikelihood requests:  33%|███▎      | 38383/117208 [02:29<03:38, 360.60it/s]
+Running loglikelihood requests:  33%|███▎      | 38425/117208 [02:29<03:36, 364.00it/s]
+Running loglikelihood requests:  33%|███▎      | 38474/117208 [02:29<03:25, 383.63it/s]
+Running loglikelihood requests:  33%|███▎      | 38522/117208 [02:29<03:18, 396.22it/s]
+Running loglikelihood requests:  33%|███▎      | 38570/117208 [02:30<03:14, 404.92it/s]
+Running loglikelihood requests:  33%|███▎      | 38618/117208 [02:30<03:11, 411.08it/s]
+Running loglikelihood requests:  33%|███▎      | 38666/117208 [02:30<03:09, 415.34it/s]
+Running loglikelihood requests:  33%|███▎      | 38714/117208 [02:30<03:07, 417.96it/s]
+Running loglikelihood requests:  33%|███▎      | 38762/117208 [02:30<03:06, 420.11it/s]
+Running loglikelihood requests:  33%|███▎      | 38807/117208 [02:30<03:09, 413.93it/s]
+Running loglikelihood requests:  33%|███▎      | 38849/117208 [02:30<03:15, 401.40it/s]
+Running loglikelihood requests:  33%|███▎      | 38890/117208 [02:31<04:18, 302.41it/s]
+Running loglikelihood requests:  33%|███▎      | 38927/117208 [02:31<04:13, 309.34it/s]
+Running loglikelihood requests:  33%|███▎      | 38972/117208 [02:31<03:55, 332.85it/s]
+Running loglikelihood requests:  33%|███▎      | 39020/117208 [02:31<03:38, 357.69it/s]
+Running loglikelihood requests:  33%|███▎      | 39065/117208 [02:31<03:31, 369.68it/s]
+Running loglikelihood requests:  33%|███▎      | 39113/117208 [02:31<03:22, 385.41it/s]
+Running loglikelihood requests:  33%|███▎      | 39158/117208 [02:31<03:20, 389.25it/s]
+Running loglikelihood requests:  33%|███▎      | 39200/117208 [02:31<03:22, 384.74it/s]
+Running loglikelihood requests:  33%|███▎      | 39248/117208 [02:31<03:16, 396.77it/s]
+Running loglikelihood requests:  34%|███▎      | 39296/117208 [02:32<03:12, 405.61it/s]
+Running loglikelihood requests:  34%|███▎      | 39344/117208 [02:32<03:08, 412.13it/s]
+Running loglikelihood requests:  34%|███▎      | 39396/117208 [02:32<03:02, 426.71it/s]
+Running loglikelihood requests:  34%|███▎      | 39444/117208 [02:32<03:02, 426.95it/s]
+Running loglikelihood requests:  34%|███▎      | 39492/117208 [02:32<03:02, 426.83it/s]
+Running loglikelihood requests:  34%|███▎      | 39535/117208 [02:32<03:07, 413.48it/s]
+Running loglikelihood requests:  34%|███▍      | 39577/117208 [02:32<04:09, 310.87it/s]
+Running loglikelihood requests:  34%|███▍      | 39612/117208 [02:32<04:09, 311.52it/s]
+Running loglikelihood requests:  34%|███▍      | 39646/117208 [02:33<04:10, 310.18it/s]
+Running loglikelihood requests:  34%|███▍      | 39693/117208 [02:33<03:48, 339.42it/s]
+Running loglikelihood requests:  34%|███▍      | 39738/117208 [02:33<03:37, 356.81it/s]
+Running loglikelihood requests:  34%|███▍      | 39783/117208 [02:33<03:29, 369.11it/s]
+Running loglikelihood requests:  34%|███▍      | 39828/117208 [02:33<03:24, 378.45it/s]
+Running loglikelihood requests:  34%|███▍      | 39876/117208 [02:33<03:17, 392.24it/s]
+Running loglikelihood requests:  34%|███▍      | 39924/117208 [02:33<03:12, 401.49it/s]
+Running loglikelihood requests:  34%|███▍      | 39969/117208 [02:33<03:12, 401.53it/s]
+Running loglikelihood requests:  34%|███▍      | 40017/117208 [02:33<03:08, 408.84it/s]
+Running loglikelihood requests:  34%|███▍      | 40065/117208 [02:34<03:06, 413.95it/s]
+Running loglikelihood requests:  34%|███▍      | 40113/117208 [02:34<03:04, 417.77it/s]
+Running loglikelihood requests:  34%|███▍      | 40161/117208 [02:34<03:03, 420.03it/s]
+Running loglikelihood requests:  34%|███▍      | 40209/117208 [02:34<03:02, 422.22it/s]
+Running loglikelihood requests:  34%|███▍      | 40261/117208 [02:34<02:57, 433.94it/s]
+Running loglikelihood requests:  34%|███▍      | 40306/117208 [02:34<03:01, 423.83it/s]
+Running loglikelihood requests:  34%|███▍      | 40349/117208 [02:34<04:01, 318.90it/s]
+Running loglikelihood requests:  34%|███▍      | 40385/117208 [02:34<04:00, 319.71it/s]
+Running loglikelihood requests:  34%|███▍      | 40423/117208 [02:35<03:55, 325.54it/s]
+Running loglikelihood requests:  35%|███▍      | 40468/117208 [02:35<03:41, 346.01it/s]
+Running loglikelihood requests:  35%|███▍      | 40513/117208 [02:35<03:32, 361.47it/s]
+Running loglikelihood requests:  35%|███▍      | 40561/117208 [02:35<03:21, 379.84it/s]
+Running loglikelihood requests:  35%|███▍      | 40603/117208 [02:35<03:22, 378.42it/s]
+Running loglikelihood requests:  35%|███▍      | 40651/117208 [02:35<03:12, 397.93it/s]
+Running loglikelihood requests:  35%|███▍      | 40696/117208 [02:35<03:08, 405.16it/s]
+Running loglikelihood requests:  35%|███▍      | 40744/117208 [02:35<03:03, 417.73it/s]
+Running loglikelihood requests:  35%|███▍      | 40792/117208 [02:35<02:58, 427.35it/s]
+Running loglikelihood requests:  35%|███▍      | 40840/117208 [02:36<02:55, 434.06it/s]
+Running loglikelihood requests:  35%|███▍      | 40888/117208 [02:36<02:54, 438.48it/s]
+Running loglikelihood requests:  35%|███▍      | 40936/117208 [02:36<02:52, 441.71it/s]
+Running loglikelihood requests:  35%|███▍      | 40984/117208 [02:36<02:51, 444.27it/s]
+Running loglikelihood requests:  35%|███▌      | 41032/117208 [02:36<02:51, 444.98it/s]
+Running loglikelihood requests:  35%|███▌      | 41077/117208 [02:36<02:53, 437.58it/s]
+Running loglikelihood requests:  35%|███▌      | 41121/117208 [02:36<03:49, 332.12it/s]
+Running loglikelihood requests:  35%|███▌      | 41158/117208 [02:36<03:46, 335.68it/s]
+Running loglikelihood requests:  35%|███▌      | 41195/117208 [02:36<03:43, 339.68it/s]
+Running loglikelihood requests:  35%|███▌      | 41231/117208 [02:37<03:43, 340.33it/s]
+Running loglikelihood requests:  35%|███▌      | 41267/117208 [02:37<03:43, 340.24it/s]
+Running loglikelihood requests:  35%|███▌      | 41314/117208 [02:37<03:25, 369.13it/s]
+Running loglikelihood requests:  35%|███▌      | 41359/117208 [02:37<03:17, 384.49it/s]
+Running loglikelihood requests:  35%|███▌      | 41404/117208 [02:37<03:12, 394.64it/s]
+Running loglikelihood requests:  35%|███▌      | 41452/117208 [02:37<03:04, 411.05it/s]
+Running loglikelihood requests:  35%|███▌      | 41500/117208 [02:37<02:59, 422.41it/s]
+Running loglikelihood requests:  35%|███▌      | 41548/117208 [02:37<02:55, 430.48it/s]
+Running loglikelihood requests:  35%|███▌      | 41596/117208 [02:37<02:53, 436.58it/s]
+Running loglikelihood requests:  36%|███▌      | 41648/117208 [02:38<02:47, 451.18it/s]
+Running loglikelihood requests:  36%|███▌      | 41694/117208 [02:38<02:49, 444.93it/s]
+Running loglikelihood requests:  36%|███▌      | 41741/117208 [02:38<02:50, 442.63it/s]
+Running loglikelihood requests:  36%|███▌      | 41789/117208 [02:38<02:49, 443.98it/s]
+Running loglikelihood requests:  36%|███▌      | 41834/117208 [02:38<02:53, 434.76it/s]
+Running loglikelihood requests:  36%|███▌      | 41878/117208 [02:38<02:56, 426.61it/s]
+Running loglikelihood requests:  36%|███▌      | 41921/117208 [02:38<02:59, 419.58it/s]
+Running loglikelihood requests:  36%|███▌      | 41963/117208 [02:38<03:02, 411.64it/s]
+Running loglikelihood requests:  36%|███▌      | 42005/117208 [02:38<03:04, 407.37it/s]
+Running loglikelihood requests:  36%|███▌      | 42046/117208 [02:39<03:07, 400.92it/s]
+Running loglikelihood requests:  36%|███▌      | 42087/117208 [02:39<03:09, 396.75it/s]
+Running loglikelihood requests:  36%|███▌      | 42127/117208 [02:39<03:11, 391.34it/s]
+Running loglikelihood requests:  36%|███▌      | 42167/117208 [02:39<03:13, 387.31it/s]
+Running loglikelihood requests:  36%|███▌      | 42212/117208 [02:39<03:08, 398.50it/s]
+Running loglikelihood requests:  36%|███▌      | 42260/117208 [02:39<03:00, 415.12it/s]
+Running loglikelihood requests:  36%|███▌      | 42308/117208 [02:39<02:55, 426.15it/s]
+Running loglikelihood requests:  36%|███▌      | 42356/117208 [02:39<02:52, 433.99it/s]
+Running loglikelihood requests:  36%|███▌      | 42404/117208 [02:39<02:50, 439.86it/s]
+Running loglikelihood requests:  36%|███▌      | 42449/117208 [02:39<02:51, 435.66it/s]
+Running loglikelihood requests:  36%|███▋      | 42497/117208 [02:40<02:49, 440.37it/s]
+Running loglikelihood requests:  36%|███▋      | 42545/117208 [02:40<02:48, 444.42it/s]
+Running loglikelihood requests:  36%|███▋      | 42590/117208 [02:40<02:49, 439.06it/s]
+Running loglikelihood requests:  36%|███▋      | 42634/117208 [02:40<03:43, 333.03it/s]
+Running loglikelihood requests:  36%|███▋      | 42671/117208 [02:40<03:40, 337.79it/s]
+Running loglikelihood requests:  36%|███▋      | 42708/117208 [02:40<03:37, 342.03it/s]
+Running loglikelihood requests:  36%|███▋      | 42746/117208 [02:40<03:34, 347.39it/s]
+Running loglikelihood requests:  37%|███▋      | 42794/117208 [02:40<03:17, 376.50it/s]
+Running loglikelihood requests:  37%|███▋      | 42842/117208 [02:41<03:06, 398.72it/s]
+Running loglikelihood requests:  37%|███▋      | 42883/117208 [02:41<03:07, 395.77it/s]
+Running loglikelihood requests:  37%|███▋      | 42929/117208 [02:41<03:02, 406.14it/s]
+Running loglikelihood requests:  37%|███▋      | 42977/117208 [02:41<02:56, 419.85it/s]
+Running loglikelihood requests:  37%|███▋      | 43025/117208 [02:41<02:52, 429.58it/s]
+Running loglikelihood requests:  37%|███▋      | 43073/117208 [02:41<02:50, 435.79it/s]
+Running loglikelihood requests:  37%|███▋      | 43121/117208 [02:41<02:48, 440.80it/s]
+Running loglikelihood requests:  37%|███▋      | 43169/117208 [02:41<02:46, 444.88it/s]
+Running loglikelihood requests:  37%|███▋      | 43217/117208 [02:41<02:45, 446.94it/s]
+Running loglikelihood requests:  37%|███▋      | 43265/117208 [02:41<02:44, 448.52it/s]
+Running loglikelihood requests:  37%|███▋      | 43313/117208 [02:42<02:44, 450.01it/s]
+Running loglikelihood requests:  37%|███▋      | 43361/117208 [02:42<02:43, 450.31it/s]
+Running loglikelihood requests:  37%|███▋      | 43407/117208 [02:42<04:22, 281.39it/s]
+Running loglikelihood requests:  37%|███▋      | 43443/117208 [02:42<04:10, 294.08it/s]
+Running loglikelihood requests:  37%|███▋      | 43487/117208 [02:42<03:48, 322.40it/s]
+Running loglikelihood requests:  37%|███▋      | 43535/117208 [02:42<03:27, 354.95it/s]
+Running loglikelihood requests:  37%|███▋      | 43583/117208 [02:42<03:13, 380.82it/s]
+Running loglikelihood requests:  37%|███▋      | 43631/117208 [02:43<03:03, 400.03it/s]
+Running loglikelihood requests:  37%|███▋      | 43676/117208 [02:43<03:00, 407.05it/s]
+Running loglikelihood requests:  37%|███▋      | 43719/117208 [02:43<03:00, 406.47it/s]
+Running loglikelihood requests:  37%|███▋      | 43763/117208 [02:43<02:59, 408.62it/s]
+Running loglikelihood requests:  37%|███▋      | 43808/117208 [02:43<02:57, 413.43it/s]
+Running loglikelihood requests:  37%|███▋      | 43856/117208 [02:43<02:52, 425.46it/s]
+Running loglikelihood requests:  37%|███▋      | 43904/117208 [02:43<02:48, 433.78it/s]
+Running loglikelihood requests:  37%|███▋      | 43952/117208 [02:43<02:46, 439.31it/s]
+Running loglikelihood requests:  38%|███▊      | 44000/117208 [02:43<02:44, 443.78it/s]
+Running loglikelihood requests:  38%|███▊      | 44048/117208 [02:43<02:43, 447.00it/s]
+Running loglikelihood requests:  38%|███▊      | 44096/117208 [02:44<02:43, 447.63it/s]
+Running loglikelihood requests:  38%|███▊      | 44144/117208 [02:44<02:42, 448.69it/s]
+Running loglikelihood requests:  38%|███▊      | 44189/117208 [02:44<02:45, 441.97it/s]
+Running loglikelihood requests:  38%|███▊      | 44234/117208 [02:44<02:46, 437.14it/s]
+Running loglikelihood requests:  38%|███▊      | 44278/117208 [02:44<03:38, 333.26it/s]
+Running loglikelihood requests:  38%|███▊      | 44315/117208 [02:44<03:36, 337.08it/s]
+Running loglikelihood requests:  38%|███▊      | 44352/117208 [02:44<03:33, 341.80it/s]
+Running loglikelihood requests:  38%|███▊      | 44389/117208 [02:44<03:31, 344.82it/s]
+Running loglikelihood requests:  38%|███▊      | 44429/117208 [02:45<03:24, 355.07it/s]
+Running loglikelihood requests:  38%|███▊      | 44477/117208 [02:45<03:09, 383.14it/s]
+Running loglikelihood requests:  38%|███▊      | 44525/117208 [02:45<03:00, 403.08it/s]
+Running loglikelihood requests:  38%|███▊      | 44567/117208 [02:45<03:00, 401.64it/s]
+Running loglikelihood requests:  38%|███▊      | 44612/117208 [02:45<02:57, 408.14it/s]
+Running loglikelihood requests:  38%|███▊      | 44657/117208 [02:45<02:55, 413.65it/s]
+Running loglikelihood requests:  38%|███▊      | 44705/117208 [02:45<02:50, 426.07it/s]
+Running loglikelihood requests:  38%|███▊      | 44753/117208 [02:45<02:46, 434.92it/s]
+Running loglikelihood requests:  38%|███▊      | 44801/117208 [02:45<02:43, 441.60it/s]
+Running loglikelihood requests:  38%|███▊      | 44849/117208 [02:45<02:42, 445.37it/s]
+Running loglikelihood requests:  38%|███▊      | 44897/117208 [02:46<02:41, 448.38it/s]
+Running loglikelihood requests:  38%|███▊      | 44945/117208 [02:46<02:40, 450.79it/s]
+Running loglikelihood requests:  38%|███▊      | 44991/117208 [02:46<02:41, 445.98it/s]
+Running loglikelihood requests:  38%|███▊      | 45036/117208 [02:46<02:44, 439.85it/s]
+Running loglikelihood requests:  38%|███▊      | 45081/117208 [02:46<03:33, 338.29it/s]
+Running loglikelihood requests:  38%|███▊      | 45119/117208 [02:46<03:29, 344.39it/s]
+Running loglikelihood requests:  39%|███▊      | 45157/117208 [02:46<03:26, 349.61it/s]
+Running loglikelihood requests:  39%|███▊      | 45200/117208 [02:46<03:16, 365.81it/s]
+Running loglikelihood requests:  39%|███▊      | 45248/117208 [02:47<03:04, 390.94it/s]
+Running loglikelihood requests:  39%|███▊      | 45296/117208 [02:47<02:55, 409.51it/s]
+Running loglikelihood requests:  39%|███▊      | 45338/117208 [02:47<02:56, 406.25it/s]
+Running loglikelihood requests:  39%|███▊      | 45380/117208 [02:47<02:57, 403.58it/s]
+Running loglikelihood requests:  39%|███▉      | 45428/117208 [02:47<02:51, 419.31it/s]
+Running loglikelihood requests:  39%|███▉      | 45476/117208 [02:47<02:46, 430.88it/s]
+Running loglikelihood requests:  39%|███▉      | 45524/117208 [02:47<02:43, 438.83it/s]
+Running loglikelihood requests:  39%|███▉      | 45576/117208 [02:47<02:37, 455.29it/s]
+Running loglikelihood requests:  39%|███▉      | 45624/117208 [02:47<02:36, 456.14it/s]
+Running loglikelihood requests:  39%|███▉      | 45672/117208 [02:47<02:36, 457.06it/s]
+Running loglikelihood requests:  39%|███▉      | 45720/117208 [02:48<02:36, 457.11it/s]
+Running loglikelihood requests:  39%|███▉      | 45768/117208 [02:48<02:36, 457.54it/s]
+Running loglikelihood requests:  39%|███▉      | 45814/117208 [02:48<02:37, 452.23it/s]
+Running loglikelihood requests:  39%|███▉      | 45860/117208 [02:48<02:39, 448.20it/s]
+Running loglikelihood requests:  39%|███▉      | 45905/117208 [02:48<02:41, 442.22it/s]
+Running loglikelihood requests:  39%|███▉      | 45950/117208 [02:48<03:30, 339.16it/s]
+Running loglikelihood requests:  39%|███▉      | 45988/117208 [02:48<03:25, 346.08it/s]
+Running loglikelihood requests:  39%|███▉      | 46026/117208 [02:48<03:22, 351.56it/s]
+Running loglikelihood requests:  39%|███▉      | 46068/117208 [02:49<03:14, 365.83it/s]
+Running loglikelihood requests:  39%|███▉      | 46113/117208 [02:49<03:05, 383.64it/s]
+Running loglikelihood requests:  39%|███▉      | 46161/117208 [02:49<02:55, 404.87it/s]
+Running loglikelihood requests:  39%|███▉      | 46206/117208 [02:49<02:52, 412.18it/s]
+Running loglikelihood requests:  39%|███▉      | 46248/117208 [02:49<02:53, 409.30it/s]
+Running loglikelihood requests:  39%|███▉      | 46290/117208 [02:49<02:54, 406.44it/s]
+Running loglikelihood requests:  40%|███▉      | 46338/117208 [02:49<02:48, 421.68it/s]
+Running loglikelihood requests:  40%|███▉      | 46386/117208 [02:49<02:43, 432.87it/s]
+Running loglikelihood requests:  40%|███▉      | 46434/117208 [02:49<02:40, 441.09it/s]
+Running loglikelihood requests:  40%|███▉      | 46482/117208 [02:49<02:38, 446.14it/s]
+Running loglikelihood requests:  40%|███▉      | 46530/117208 [02:50<02:37, 449.56it/s]
+Running loglikelihood requests:  40%|███▉      | 46578/117208 [02:50<02:36, 452.57it/s]
+Running loglikelihood requests:  40%|███▉      | 46624/117208 [02:50<02:37, 449.06it/s]
+Running loglikelihood requests:  40%|███▉      | 46671/117208 [02:50<02:37, 448.97it/s]
+Running loglikelihood requests:  40%|███▉      | 46719/117208 [02:50<02:36, 450.62it/s]
+Running loglikelihood requests:  40%|███▉      | 46765/117208 [02:50<03:23, 346.28it/s]
+Running loglikelihood requests:  40%|███▉      | 46804/117208 [02:50<03:19, 353.42it/s]
+Running loglikelihood requests:  40%|███▉      | 46843/117208 [02:50<03:15, 359.52it/s]
+Running loglikelihood requests:  40%|███▉      | 46881/117208 [02:51<03:14, 361.25it/s]
+Running loglikelihood requests:  40%|████      | 46926/117208 [02:51<03:04, 381.00it/s]
+Running loglikelihood requests:  40%|████      | 46971/117208 [02:51<02:57, 395.28it/s]
+Running loglikelihood requests:  40%|████      | 47016/117208 [02:51<02:53, 405.62it/s]
+Running loglikelihood requests:  40%|████      | 47064/117208 [02:51<02:46, 420.99it/s]
+Running loglikelihood requests:  40%|████      | 47112/117208 [02:51<02:42, 431.69it/s]
+Running loglikelihood requests:  40%|████      | 47156/117208 [02:51<02:43, 428.35it/s]
+Running loglikelihood requests:  40%|████      | 47202/117208 [02:51<02:41, 432.59it/s]
+Running loglikelihood requests:  40%|████      | 47250/117208 [02:51<02:38, 440.66it/s]
+Running loglikelihood requests:  40%|████      | 47298/117208 [02:51<02:36, 446.06it/s]
+Running loglikelihood requests:  40%|████      | 47346/117208 [02:52<02:35, 450.15it/s]
+Running loglikelihood requests:  40%|████      | 47394/117208 [02:52<02:34, 453.19it/s]
+Running loglikelihood requests:  40%|████      | 47442/117208 [02:52<02:33, 455.03it/s]
+Running loglikelihood requests:  41%|████      | 47490/117208 [02:52<02:32, 455.97it/s]
+Running loglikelihood requests:  41%|████      | 47538/117208 [02:52<02:32, 456.90it/s]
+Running loglikelihood requests:  41%|████      | 47586/117208 [02:52<02:32, 457.55it/s]
+Running loglikelihood requests:  41%|████      | 47632/117208 [02:52<03:18, 349.96it/s]
+Running loglikelihood requests:  41%|████      | 47671/117208 [02:52<03:15, 356.32it/s]
+Running loglikelihood requests:  41%|████      | 47710/117208 [02:52<03:12, 361.57it/s]
+Running loglikelihood requests:  41%|████      | 47749/117208 [02:53<03:09, 365.59it/s]
+Running loglikelihood requests:  41%|████      | 47787/117208 [02:53<03:09, 366.68it/s]
+Running loglikelihood requests:  41%|████      | 47832/117208 [02:53<02:59, 385.65it/s]
+Running loglikelihood requests:  41%|████      | 47880/117208 [02:53<02:50, 406.91it/s]
+Running loglikelihood requests:  41%|████      | 47928/117208 [02:53<02:44, 421.74it/s]
+Running loglikelihood requests:  41%|████      | 47971/117208 [02:53<02:45, 418.37it/s]
+Running loglikelihood requests:  41%|████      | 48018/117208 [02:53<02:41, 427.36it/s]
+Running loglikelihood requests:  41%|████      | 48066/117208 [02:53<02:38, 436.63it/s]
+Running loglikelihood requests:  41%|████      | 48114/117208 [02:53<02:35, 443.52it/s]
+Running loglikelihood requests:  41%|████      | 48162/117208 [02:54<02:33, 448.79it/s]
+Running loglikelihood requests:  41%|████      | 48210/117208 [02:54<02:32, 452.65it/s]
+Running loglikelihood requests:  41%|████      | 48258/117208 [02:54<02:31, 455.07it/s]
+Running loglikelihood requests:  41%|████      | 48306/117208 [02:54<02:30, 456.38it/s]
+Running loglikelihood requests:  41%|████▏     | 48354/117208 [02:54<02:30, 457.15it/s]
+Running loglikelihood requests:  41%|████▏     | 48402/117208 [02:54<02:30, 458.15it/s]
+Running loglikelihood requests:  41%|████▏     | 48450/117208 [02:54<02:29, 458.99it/s]
+Running loglikelihood requests:  41%|████▏     | 48496/117208 [02:54<03:59, 287.21it/s]
+Running loglikelihood requests:  41%|████▏     | 48540/117208 [02:55<03:37, 315.93it/s]
+Running loglikelihood requests:  41%|████▏     | 48588/117208 [02:55<03:16, 349.31it/s]
+Running loglikelihood requests:  41%|████▏     | 48636/117208 [02:55<03:01, 377.14it/s]
+Running loglikelihood requests:  42%|████▏     | 48684/117208 [02:55<02:51, 398.82it/s]
+Running loglikelihood requests:  42%|████▏     | 48732/117208 [02:55<02:44, 415.21it/s]
+Running loglikelihood requests:  42%|████▏     | 48780/117208 [02:55<02:40, 427.61it/s]
+Running loglikelihood requests:  42%|████▏     | 48842/117208 [02:55<02:22, 480.89it/s]
+Running loglikelihood requests:  42%|████▏     | 48912/117208 [02:55<02:05, 543.06it/s]
+Running loglikelihood requests:  42%|████▏     | 48972/117208 [02:55<02:32, 446.70it/s]
+Running loglikelihood requests:  42%|████▏     | 49044/117208 [02:56<02:12, 514.30it/s]
+Running loglikelihood requests:  42%|████▏     | 49117/117208 [02:56<01:59, 570.57it/s]
+Running loglikelihood requests:  42%|████▏     | 49178/117208 [02:56<02:25, 468.93it/s]
+Running loglikelihood requests:  42%|████▏     | 49250/117208 [02:56<02:08, 528.65it/s]
+Running loglikelihood requests:  42%|████▏     | 49309/117208 [02:56<02:32, 444.67it/s]
+Running loglikelihood requests:  42%|████▏     | 49360/117208 [02:56<02:58, 379.92it/s]
+Running loglikelihood requests:  42%|████▏     | 49414/117208 [02:57<03:14, 348.94it/s]
+Running loglikelihood requests:  42%|████▏     | 49486/117208 [02:57<02:39, 424.46it/s]
+Running loglikelihood requests:  42%|████▏     | 49558/117208 [02:57<02:45, 408.57it/s]
+Running loglikelihood requests:  42%|████▏     | 49628/117208 [02:57<02:23, 470.07it/s]
+Running loglikelihood requests:  42%|████▏     | 49699/117208 [02:57<02:39, 424.58it/s]
+Running loglikelihood requests:  42%|████▏     | 49765/117208 [02:57<02:22, 474.22it/s]
+Running loglikelihood requests:  43%|████▎     | 49843/117208 [02:57<02:29, 451.95it/s]
+Running loglikelihood requests:  43%|████▎     | 49916/117208 [02:58<02:11, 511.78it/s]
+Running loglikelihood requests:  43%|████▎     | 49987/117208 [02:58<02:25, 461.17it/s]
+Running loglikelihood requests:  43%|████▎     | 50049/117208 [02:58<02:15, 495.65it/s]
+Running loglikelihood requests:  43%|████▎     | 50117/117208 [02:58<02:04, 539.18it/s]
+Running loglikelihood requests:  43%|████▎     | 50176/117208 [02:58<03:25, 326.00it/s]
+Running loglikelihood requests:  43%|████▎     | 50244/117208 [02:58<02:52, 388.16it/s]
+Running loglikelihood requests:  43%|████▎     | 50311/117208 [02:59<02:57, 376.92it/s]
+Running loglikelihood requests:  43%|████▎     | 50377/117208 [02:59<02:34, 432.20it/s]
+Running loglikelihood requests:  43%|████▎     | 50446/117208 [02:59<02:16, 488.34it/s]
+Running loglikelihood requests:  43%|████▎     | 50504/117208 [02:59<02:38, 421.93it/s]
+Running loglikelihood requests:  43%|████▎     | 50573/117208 [02:59<02:18, 480.67it/s]
+Running loglikelihood requests:  43%|████▎     | 50638/117208 [02:59<02:34, 429.99it/s]
+Running loglikelihood requests:  43%|████▎     | 50707/117208 [02:59<02:16, 487.02it/s]
+Running loglikelihood requests:  43%|████▎     | 50778/117208 [02:59<02:02, 540.20it/s]
+Running loglikelihood requests:  43%|████▎     | 50838/117208 [03:00<02:26, 454.45it/s]
+Running loglikelihood requests:  43%|████▎     | 50908/117208 [03:00<02:09, 510.58it/s]
+Running loglikelihood requests:  43%|████▎     | 50974/117208 [03:00<02:27, 449.19it/s]
+Running loglikelihood requests:  44%|████▎     | 51047/117208 [03:00<02:09, 512.15it/s]
+Running loglikelihood requests:  44%|████▎     | 51112/117208 [03:00<02:27, 448.94it/s]
+Running loglikelihood requests:  44%|████▎     | 51163/117208 [03:00<02:51, 384.26it/s]
+Running loglikelihood requests:  44%|████▎     | 51220/117208 [03:01<03:04, 357.22it/s]
+Running loglikelihood requests:  44%|████▍     | 51291/117208 [03:01<02:33, 428.04it/s]
+Running loglikelihood requests:  44%|████▍     | 51362/117208 [03:01<02:14, 490.89it/s]
+Running loglikelihood requests:  44%|████▍     | 51418/117208 [03:01<02:36, 419.33it/s]
+Running loglikelihood requests:  44%|████▍     | 51488/117208 [03:01<02:16, 481.48it/s]
+Running loglikelihood requests:  44%|████▍     | 51547/117208 [03:01<02:36, 418.57it/s]
+Running loglikelihood requests:  44%|████▍     | 51619/117208 [03:01<02:15, 485.07it/s]
+Running loglikelihood requests:  44%|████▍     | 51689/117208 [03:01<02:02, 536.50it/s]
+Running loglikelihood requests:  44%|████▍     | 51749/117208 [03:02<02:24, 452.02it/s]
+Running loglikelihood requests:  44%|████▍     | 51823/117208 [03:02<02:06, 517.91it/s]
+Running loglikelihood requests:  44%|████▍     | 51883/117208 [03:02<02:27, 442.94it/s]
+Running loglikelihood requests:  44%|████▍     | 51956/117208 [03:02<02:08, 507.45it/s]
+Running loglikelihood requests:  44%|████▍     | 52027/117208 [03:02<02:22, 458.19it/s]
+Running loglikelihood requests:  44%|████▍     | 52079/117208 [03:02<02:45, 392.38it/s]
+Running loglikelihood requests:  44%|████▍     | 52141/117208 [03:03<02:55, 371.45it/s]
+Running loglikelihood requests:  45%|████▍     | 52211/117208 [03:03<02:28, 437.45it/s]
+Running loglikelihood requests:  45%|████▍     | 52279/117208 [03:03<02:38, 410.67it/s]
+Running loglikelihood requests:  45%|████▍     | 52344/117208 [03:03<02:20, 460.65it/s]
+Running loglikelihood requests:  45%|████▍     | 52417/117208 [03:03<02:29, 433.62it/s]
+Running loglikelihood requests:  45%|████▍     | 52493/117208 [03:03<02:08, 503.59it/s]
+Running loglikelihood requests:  45%|████▍     | 52561/117208 [03:03<02:22, 452.49it/s]
+Running loglikelihood requests:  45%|████▍     | 52638/117208 [03:04<02:03, 521.55it/s]
+Running loglikelihood requests:  45%|████▍     | 52705/117208 [03:04<02:19, 460.79it/s]
+Running loglikelihood requests:  45%|████▌     | 52781/117208 [03:04<02:02, 526.26it/s]
+Running loglikelihood requests:  45%|████▌     | 52849/117208 [03:04<02:18, 466.07it/s]
+Running loglikelihood requests:  45%|████▌     | 52922/117208 [03:04<02:02, 524.00it/s]
+Running loglikelihood requests:  45%|████▌     | 52993/117208 [03:04<02:16, 469.85it/s]
+Running loglikelihood requests:  45%|████▌     | 53056/117208 [03:05<02:31, 422.22it/s]
+Running loglikelihood requests:  45%|████▌     | 53103/117208 [03:05<02:55, 365.02it/s]
+Running loglikelihood requests:  45%|████▌     | 53176/117208 [03:05<02:26, 438.19it/s]
+Running loglikelihood requests:  45%|████▌     | 53236/117208 [03:05<02:41, 396.17it/s]
+Running loglikelihood requests:  45%|████▌     | 53310/117208 [03:05<02:16, 468.34it/s]
+Running loglikelihood requests:  46%|████▌     | 53380/117208 [03:05<02:27, 434.10it/s]
+Running loglikelihood requests:  46%|████▌     | 53453/117208 [03:05<02:08, 497.79it/s]
+Running loglikelihood requests:  46%|████▌     | 53521/117208 [03:06<02:22, 448.04it/s]
+Running loglikelihood requests:  46%|████▌     | 53601/117208 [03:06<02:01, 524.53it/s]
+Running loglikelihood requests:  46%|████▌     | 53665/117208 [03:06<02:18, 458.00it/s]
+Running loglikelihood requests:  46%|████▌     | 53743/117208 [03:06<02:00, 528.54it/s]
+Running loglikelihood requests:  46%|████▌     | 53809/117208 [03:06<02:16, 464.16it/s]
+Running loglikelihood requests:  46%|████▌     | 53885/117208 [03:06<01:59, 529.30it/s]
+Running loglikelihood requests:  46%|████▌     | 53953/117208 [03:06<02:15, 467.37it/s]
+Running loglikelihood requests:  46%|████▌     | 54030/117208 [03:07<01:58, 533.95it/s]
+Running loglikelihood requests:  46%|████▌     | 54094/117208 [03:07<02:16, 463.24it/s]
+Running loglikelihood requests:  46%|████▌     | 54147/117208 [03:07<02:38, 399.00it/s]
+Running loglikelihood requests:  46%|████▌     | 54199/117208 [03:07<02:55, 359.43it/s]
+Running loglikelihood requests:  46%|████▋     | 54275/117208 [03:07<02:23, 440.01it/s]
+Running loglikelihood requests:  46%|████▋     | 54343/117208 [03:07<02:32, 413.14it/s]
+Running loglikelihood requests:  46%|████▋     | 54418/117208 [03:07<02:09, 484.14it/s]
+Running loglikelihood requests:  46%|████▋     | 54487/117208 [03:08<02:21, 441.80it/s]
+Running loglikelihood requests:  47%|████▋     | 54567/117208 [03:08<02:00, 518.86it/s]
+Running loglikelihood requests:  47%|████▋     | 54631/117208 [03:08<02:17, 455.08it/s]
+Running loglikelihood requests:  47%|████▋     | 54710/117208 [03:08<01:58, 528.11it/s]
+Running loglikelihood requests:  47%|████▋     | 54775/117208 [03:08<02:14, 462.88it/s]
+Running loglikelihood requests:  47%|████▋     | 54852/117208 [03:08<01:57, 530.36it/s]
+Running loglikelihood requests:  47%|████▋     | 54912/117208 [03:09<02:17, 453.08it/s]
+Running loglikelihood requests:  47%|████▋     | 54983/117208 [03:09<02:02, 509.70it/s]
+Running loglikelihood requests:  47%|████▋     | 55041/117208 [03:09<02:22, 436.30it/s]
+Running loglikelihood requests:  47%|████▋     | 55091/117208 [03:09<02:45, 376.34it/s]
+Running loglikelihood requests:  47%|████▋     | 55144/117208 [03:09<02:59, 346.20it/s]
+Running loglikelihood requests:  47%|████▋     | 55224/117208 [03:09<02:21, 437.72it/s]
+Running loglikelihood requests:  47%|████▋     | 55288/117208 [03:09<02:32, 404.81it/s]
+Running loglikelihood requests:  47%|████▋     | 55336/117208 [03:10<02:27, 419.42it/s]
+Running loglikelihood requests:  47%|████▋     | 55412/117208 [03:10<02:04, 498.19it/s]
+Running loglikelihood requests:  47%|████▋     | 55480/117208 [03:10<02:17, 447.46it/s]
+Running loglikelihood requests:  47%|████▋     | 55554/117208 [03:10<02:00, 513.52it/s]
+Running loglikelihood requests:  47%|████▋     | 55618/117208 [03:10<02:16, 450.41it/s]
+Running loglikelihood requests:  48%|████▊     | 55701/117208 [03:10<01:54, 535.38it/s]
+Running loglikelihood requests:  48%|████▊     | 55762/117208 [03:10<02:13, 458.77it/s]
+Running loglikelihood requests:  48%|████▊     | 55840/117208 [03:11<01:55, 530.35it/s]
+Running loglikelihood requests:  48%|████▊     | 55910/117208 [03:11<02:09, 473.21it/s]
+Running loglikelihood requests:  48%|████▊     | 55986/117208 [03:11<01:54, 536.96it/s]
+Running loglikelihood requests:  48%|████▊     | 56051/117208 [03:11<02:10, 467.34it/s]
+Running loglikelihood requests:  48%|████▊     | 56130/117208 [03:11<01:53, 538.86it/s]
+Running loglikelihood requests:  48%|████▊     | 56191/117208 [03:11<02:12, 459.97it/s]
+Running loglikelihood requests:  48%|████▊     | 56244/117208 [03:12<02:58, 340.97it/s]
+Running loglikelihood requests:  48%|████▊     | 56326/117208 [03:12<02:21, 429.46it/s]
+Running loglikelihood requests:  48%|████▊     | 56384/117208 [03:12<02:35, 391.53it/s]
+Running loglikelihood requests:  48%|████▊     | 56457/117208 [03:12<02:12, 459.99it/s]
+Running loglikelihood requests:  48%|████▊     | 56528/117208 [03:12<02:20, 432.43it/s]
+Running loglikelihood requests:  48%|████▊     | 56607/117208 [03:12<01:59, 507.96it/s]
+Running loglikelihood requests:  48%|████▊     | 56669/117208 [03:12<02:15, 446.35it/s]
+Running loglikelihood requests:  48%|████▊     | 56757/117208 [03:13<01:51, 540.41it/s]
+Running loglikelihood requests:  48%|████▊     | 56820/117208 [03:13<02:09, 467.71it/s]
+Running loglikelihood requests:  49%|████▊     | 56906/117208 [03:13<01:48, 553.77it/s]
+Running loglikelihood requests:  49%|████▊     | 56970/117208 [03:13<02:06, 477.77it/s]
+Running loglikelihood requests:  49%|████▊     | 57053/117208 [03:13<02:09, 466.04it/s]
+Running loglikelihood requests:  49%|████▊     | 57136/117208 [03:13<01:50, 542.17it/s]
+Running loglikelihood requests:  49%|████▉     | 57198/117208 [03:13<02:08, 468.22it/s]
+Running loglikelihood requests:  49%|████▉     | 57251/117208 [03:14<02:27, 405.75it/s]
+Running loglikelihood requests:  49%|████▉     | 57309/117208 [03:14<02:39, 375.92it/s]
+Running loglikelihood requests:  49%|████▉     | 57391/117208 [03:14<02:08, 464.82it/s]
+Running loglikelihood requests:  49%|████▉     | 57453/117208 [03:14<02:22, 420.59it/s]
+Running loglikelihood requests:  49%|████▉     | 57537/117208 [03:14<01:57, 509.11it/s]
+Running loglikelihood requests:  49%|████▉     | 57597/117208 [03:14<02:14, 443.70it/s]
+Running loglikelihood requests:  49%|████▉     | 57683/117208 [03:15<01:51, 533.99it/s]
+Running loglikelihood requests:  49%|████▉     | 57745/117208 [03:15<02:08, 461.90it/s]
+Running loglikelihood requests:  49%|████▉     | 57831/117208 [03:15<01:48, 549.36it/s]
+Running loglikelihood requests:  49%|████▉     | 57894/117208 [03:15<02:05, 473.39it/s]
+Running loglikelihood requests:  49%|████▉     | 57978/117208 [03:15<01:46, 554.39it/s]
+Running loglikelihood requests:  50%|████▉     | 58042/117208 [03:15<02:03, 478.71it/s]
+Running loglikelihood requests:  50%|████▉     | 58126/117208 [03:15<01:45, 558.89it/s]
+Running loglikelihood requests:  50%|████▉     | 58190/117208 [03:16<02:02, 480.82it/s]
+Running loglikelihood requests:  50%|████▉     | 58258/117208 [03:16<02:13, 440.22it/s]
+Running loglikelihood requests:  50%|████▉     | 58342/117208 [03:16<01:52, 524.30it/s]
+Running loglikelihood requests:  50%|████▉     | 58402/117208 [03:16<02:32, 384.73it/s]
+Running loglikelihood requests:  50%|████▉     | 58468/117208 [03:16<02:36, 376.08it/s]
+Running loglikelihood requests:  50%|████▉     | 58553/117208 [03:16<02:05, 465.65it/s]
+Running loglikelihood requests:  50%|█████     | 58610/117208 [03:17<02:21, 414.27it/s]
+Running loglikelihood requests:  50%|█████     | 58692/117208 [03:17<01:57, 497.49it/s]
+Running loglikelihood requests:  50%|█████     | 58753/117208 [03:17<02:12, 439.60it/s]
+Running loglikelihood requests:  50%|█████     | 58836/117208 [03:17<01:51, 523.08it/s]
+Running loglikelihood requests:  50%|█████     | 58897/117208 [03:17<02:08, 453.89it/s]
+Running loglikelihood requests:  50%|█████     | 58984/117208 [03:17<01:46, 544.68it/s]
+Running loglikelihood requests:  50%|█████     | 59047/117208 [03:17<02:03, 470.94it/s]
+Running loglikelihood requests:  50%|█████     | 59135/117208 [03:18<02:03, 470.54it/s]
+Running loglikelihood requests:  51%|█████     | 59222/117208 [03:18<01:44, 553.97it/s]
+Running loglikelihood requests:  51%|█████     | 59285/117208 [03:18<02:00, 479.88it/s]
+Running loglikelihood requests:  51%|█████     | 59370/117208 [03:18<01:43, 559.82it/s]
+Running loglikelihood requests:  51%|█████     | 59434/117208 [03:18<01:59, 483.51it/s]
+Running loglikelihood requests:  51%|█████     | 59489/117208 [03:18<02:17, 419.85it/s]
+Running loglikelihood requests:  51%|█████     | 59549/117208 [03:19<02:28, 387.74it/s]
+Running loglikelihood requests:  51%|█████     | 59633/117208 [03:19<02:00, 479.14it/s]
+Running loglikelihood requests:  51%|█████     | 59693/117208 [03:19<02:14, 426.91it/s]
+Running loglikelihood requests:  51%|█████     | 59778/117208 [03:19<01:51, 516.79it/s]
+Running loglikelihood requests:  51%|█████     | 59838/117208 [03:19<02:07, 449.55it/s]
+Running loglikelihood requests:  51%|█████     | 59933/117208 [03:19<01:58, 483.82it/s]
+Running loglikelihood requests:  51%|█████     | 60029/117208 [03:19<01:52, 507.89it/s]
+Running loglikelihood requests:  51%|█████▏    | 60125/117208 [03:20<01:48, 524.87it/s]
+Running loglikelihood requests:  51%|█████▏    | 60221/117208 [03:20<01:46, 536.24it/s]
+Running loglikelihood requests:  51%|█████▏    | 60321/117208 [03:20<01:43, 551.17it/s]
+Running loglikelihood requests:  52%|█████▏    | 60399/117208 [03:20<01:48, 523.97it/s]
+Running loglikelihood requests:  52%|█████▏    | 60495/117208 [03:20<01:45, 535.51it/s]
+Running loglikelihood requests:  52%|█████▏    | 60549/117208 [03:21<02:00, 471.66it/s]
+Running loglikelihood requests:  52%|█████▏    | 60624/117208 [03:21<02:02, 463.47it/s]
+Running loglikelihood requests:  52%|█████▏    | 60720/117208 [03:21<01:54, 493.17it/s]
+Running loglikelihood requests:  52%|█████▏    | 60816/117208 [03:21<01:49, 514.28it/s]
+Running loglikelihood requests:  52%|█████▏    | 60909/117208 [03:21<01:47, 523.38it/s]
+Running loglikelihood requests:  52%|█████▏    | 61005/117208 [03:21<01:44, 535.44it/s]
+Running loglikelihood requests:  52%|█████▏    | 61101/117208 [03:22<01:43, 544.55it/s]
+Running loglikelihood requests:  52%|█████▏    | 61197/117208 [03:22<01:41, 551.38it/s]
+Running loglikelihood requests:  52%|█████▏    | 61293/117208 [03:22<01:40, 555.80it/s]
+Running loglikelihood requests:  52%|█████▏    | 61389/117208 [03:22<01:39, 559.10it/s]
+Running loglikelihood requests:  52%|█████▏    | 61485/117208 [03:22<01:39, 560.52it/s]
+Running loglikelihood requests:  53%|█████▎    | 61569/117208 [03:22<01:42, 542.10it/s]
+Running loglikelihood requests:  53%|█████▎    | 61665/117208 [03:23<01:41, 548.74it/s]
+Running loglikelihood requests:  53%|█████▎    | 61720/117208 [03:23<01:54, 482.91it/s]
+Running loglikelihood requests:  53%|█████▎    | 61800/117208 [03:23<01:55, 481.07it/s]
+Running loglikelihood requests:  53%|█████▎    | 61896/117208 [03:23<01:49, 506.69it/s]
+Running loglikelihood requests:  53%|█████▎    | 61989/117208 [03:23<01:46, 519.22it/s]
+Running loglikelihood requests:  53%|█████▎    | 62085/117208 [03:23<01:43, 533.13it/s]
+Running loglikelihood requests:  53%|█████▎    | 62181/117208 [03:24<01:41, 543.16it/s]
+Running loglikelihood requests:  53%|█████▎    | 62277/117208 [03:24<01:40, 546.48it/s]
+Running loglikelihood requests:  53%|█████▎    | 62373/117208 [03:24<01:39, 553.11it/s]
+Running loglikelihood requests:  53%|█████▎    | 62469/117208 [03:24<01:38, 557.82it/s]
+Running loglikelihood requests:  53%|█████▎    | 62565/117208 [03:24<01:37, 560.33it/s]
+Running loglikelihood requests:  53%|█████▎    | 62661/117208 [03:24<01:36, 562.52it/s]
+Running loglikelihood requests:  54%|█████▎    | 62757/117208 [03:25<01:36, 564.08it/s]
+Running loglikelihood requests:  54%|█████▎    | 62814/117208 [03:25<01:49, 497.37it/s]
+Running loglikelihood requests:  54%|█████▎    | 62892/117208 [03:25<01:51, 488.37it/s]
+Running loglikelihood requests:  54%|█████▎    | 62988/117208 [03:25<01:45, 511.89it/s]
+Running loglikelihood requests:  54%|█████▍    | 63084/117208 [03:25<01:42, 528.60it/s]
+Running loglikelihood requests:  54%|█████▍    | 63180/117208 [03:25<01:40, 539.95it/s]
+Running loglikelihood requests:  54%|█████▍    | 63270/117208 [03:26<01:40, 538.71it/s]
+Running loglikelihood requests:  54%|█████▍    | 63366/117208 [03:26<01:38, 548.29it/s]
+Running loglikelihood requests:  54%|█████▍    | 63462/117208 [03:26<01:36, 554.34it/s]
+Running loglikelihood requests:  54%|█████▍    | 63558/117208 [03:26<01:36, 557.02it/s]
+Running loglikelihood requests:  54%|█████▍    | 63654/117208 [03:26<01:35, 560.58it/s]
+Running loglikelihood requests:  54%|█████▍    | 63750/117208 [03:26<01:34, 563.84it/s]
+Running loglikelihood requests:  54%|█████▍    | 63841/117208 [03:27<01:35, 556.75it/s]
+Running loglikelihood requests:  55%|█████▍    | 63931/117208 [03:27<01:36, 549.62it/s]
+Running loglikelihood requests:  55%|█████▍    | 63986/117208 [03:27<01:50, 483.76it/s]
+Running loglikelihood requests:  55%|█████▍    | 64063/117208 [03:27<01:51, 475.88it/s]
+Running loglikelihood requests:  55%|█████▍    | 64156/117208 [03:27<01:46, 499.09it/s]
+Running loglikelihood requests:  55%|█████▍    | 64252/117208 [03:27<01:41, 519.20it/s]
+Running loglikelihood requests:  55%|█████▍    | 64348/117208 [03:28<01:38, 534.24it/s]
+Running loglikelihood requests:  55%|█████▍    | 64444/117208 [03:28<01:36, 543.98it/s]
+Running loglikelihood requests:  55%|█████▌    | 64540/117208 [03:28<01:35, 551.80it/s]
+Running loglikelihood requests:  55%|█████▌    | 64636/117208 [03:28<01:34, 556.90it/s]
+Running loglikelihood requests:  55%|█████▌    | 64732/117208 [03:28<01:33, 560.49it/s]
+Running loglikelihood requests:  55%|█████▌    | 64828/117208 [03:28<01:32, 563.85it/s]
+Running loglikelihood requests:  55%|█████▌    | 64915/117208 [03:29<01:35, 549.27it/s]
+Running loglikelihood requests:  55%|█████▌    | 65008/117208 [03:29<01:34, 550.70it/s]
+Running loglikelihood requests:  56%|█████▌    | 65104/117208 [03:29<01:33, 556.05it/s]
+Running loglikelihood requests:  56%|█████▌    | 65160/117208 [03:29<01:45, 491.62it/s]
+Running loglikelihood requests:  56%|█████▌    | 65251/117208 [03:29<01:42, 506.09it/s]
+Running loglikelihood requests:  56%|█████▌    | 65347/117208 [03:29<01:39, 523.68it/s]
+Running loglikelihood requests:  56%|█████▌    | 65443/117208 [03:30<01:36, 537.03it/s]
+Running loglikelihood requests:  56%|█████▌    | 65533/117208 [03:30<01:36, 536.20it/s]
+Running loglikelihood requests:  56%|█████▌    | 65629/117208 [03:30<01:34, 547.69it/s]
+Running loglikelihood requests:  56%|█████▌    | 65725/117208 [03:30<01:32, 555.67it/s]
+Running loglikelihood requests:  56%|█████▌    | 65821/117208 [03:30<01:31, 560.13it/s]
+Running loglikelihood requests:  56%|█████▌    | 65917/117208 [03:30<01:30, 564.75it/s]
+Running loglikelihood requests:  56%|█████▋    | 66013/117208 [03:31<01:30, 567.99it/s]
+Running loglikelihood requests:  56%|█████▋    | 66103/117208 [03:31<01:31, 559.75it/s]
+Running loglikelihood requests:  56%|█████▋    | 66196/117208 [03:31<01:31, 559.14it/s]
+Running loglikelihood requests:  57%|█████▋    | 66252/117208 [03:31<01:43, 494.67it/s]
+Running loglikelihood requests:  57%|█████▋    | 66346/117208 [03:31<01:38, 514.57it/s]
+Running loglikelihood requests:  57%|█████▋    | 66442/117208 [03:31<01:35, 532.77it/s]
+Running loglikelihood requests:  57%|█████▋    | 66538/117208 [03:32<01:33, 544.74it/s]
+Running loglikelihood requests:  57%|█████▋    | 66628/117208 [03:32<01:33, 543.41it/s]
+Running loglikelihood requests:  57%|█████▋    | 66724/117208 [03:32<01:31, 552.76it/s]
+Running loglikelihood requests:  57%|█████▋    | 66817/117208 [03:32<01:30, 554.07it/s]
+Running loglikelihood requests:  57%|█████▋    | 66917/117208 [03:32<01:28, 567.33it/s]
+Running loglikelihood requests:  57%|█████▋    | 67013/117208 [03:32<01:28, 569.27it/s]
+Running loglikelihood requests:  57%|█████▋    | 67109/117208 [03:33<01:27, 570.44it/s]
+Running loglikelihood requests:  57%|█████▋    | 67205/117208 [03:33<01:27, 572.09it/s]
+Running loglikelihood requests:  57%|█████▋    | 67301/117208 [03:33<01:27, 572.21it/s]
+Running loglikelihood requests:  57%|█████▋    | 67385/117208 [03:33<01:30, 551.57it/s]
+Running loglikelihood requests:  58%|█████▊    | 67454/117208 [03:33<01:37, 511.85it/s]
+Running loglikelihood requests:  58%|█████▊    | 67547/117208 [03:33<01:34, 524.98it/s]
+Running loglikelihood requests:  58%|█████▊    | 67643/117208 [03:34<01:31, 539.24it/s]
+Running loglikelihood requests:  58%|█████▊    | 67739/117208 [03:34<01:29, 550.20it/s]
+Running loglikelihood requests:  58%|█████▊    | 67835/117208 [03:34<01:28, 557.11it/s]
+Running loglikelihood requests:  58%|█████▊    | 67925/117208 [03:34<01:29, 552.62it/s]
+Running loglikelihood requests:  58%|█████▊    | 68021/117208 [03:34<01:27, 560.32it/s]
+Running loglikelihood requests:  58%|█████▊    | 68117/117208 [03:34<01:26, 565.86it/s]
+Running loglikelihood requests:  58%|█████▊    | 68213/117208 [03:35<01:26, 569.15it/s]
+Running loglikelihood requests:  58%|█████▊    | 68313/117208 [03:35<01:24, 577.86it/s]
+Running loglikelihood requests:  58%|█████▊    | 68409/117208 [03:35<01:24, 577.97it/s]
+Running loglikelihood requests:  58%|█████▊    | 68487/117208 [03:35<01:29, 546.16it/s]
+Running loglikelihood requests:  59%|█████▊    | 68580/117208 [03:35<01:28, 550.10it/s]
+Running loglikelihood requests:  59%|█████▊    | 68635/117208 [03:35<01:39, 485.73it/s]
+Running loglikelihood requests:  59%|█████▊    | 68712/117208 [03:36<01:40, 480.72it/s]
+Running loglikelihood requests:  59%|█████▊    | 68808/117208 [03:36<01:35, 509.33it/s]
+Running loglikelihood requests:  59%|█████▉    | 68904/117208 [03:36<01:31, 529.72it/s]
+Running loglikelihood requests:  59%|█████▉    | 69000/117208 [03:36<01:28, 544.21it/s]
+Running loglikelihood requests:  59%|█████▉    | 69093/117208 [03:36<01:27, 548.79it/s]
+Running loglikelihood requests:  59%|█████▉    | 69189/117208 [03:36<01:26, 557.45it/s]
+Running loglikelihood requests:  59%|█████▉    | 69285/117208 [03:37<01:25, 563.49it/s]
+Running loglikelihood requests:  59%|█████▉    | 69381/117208 [03:37<01:24, 567.74it/s]
+Running loglikelihood requests:  59%|█████▉    | 69477/117208 [03:37<01:23, 570.94it/s]
+Running loglikelihood requests:  59%|█████▉    | 69573/117208 [03:37<01:23, 572.70it/s]
+Running loglikelihood requests:  59%|█████▉    | 69669/117208 [03:37<01:22, 574.38it/s]
+Running loglikelihood requests:  60%|█████▉    | 69747/117208 [03:37<01:27, 544.01it/s]
+Running loglikelihood requests:  60%|█████▉    | 69831/117208 [03:38<01:28, 533.45it/s]
+Running loglikelihood requests:  60%|█████▉    | 69888/117208 [03:38<01:38, 478.15it/s]
+Running loglikelihood requests:  60%|█████▉    | 69984/117208 [03:38<01:32, 508.14it/s]
+Running loglikelihood requests:  60%|█████▉    | 70044/117208 [03:38<01:29, 526.90it/s]
+Running loglikelihood requests:  60%|█████▉    | 70128/117208 [03:38<01:29, 528.60it/s]
+Running loglikelihood requests:  60%|█████▉    | 70224/117208 [03:38<01:26, 545.16it/s]
+Running loglikelihood requests:  60%|█████▉    | 70320/117208 [03:39<01:24, 557.07it/s]
+Running loglikelihood requests:  60%|██████    | 70416/117208 [03:39<01:22, 565.08it/s]
+Running loglikelihood requests:  60%|██████    | 70512/117208 [03:39<01:21, 570.47it/s]
+Running loglikelihood requests:  60%|██████    | 70608/117208 [03:39<01:21, 573.61it/s]
+Running loglikelihood requests:  60%|██████    | 70704/117208 [03:39<01:20, 576.24it/s]
+Running loglikelihood requests:  60%|██████    | 70800/117208 [03:39<01:20, 578.37it/s]
+Running loglikelihood requests:  60%|██████    | 70896/117208 [03:40<01:19, 579.70it/s]
+Running loglikelihood requests:  61%|██████    | 70968/117208 [03:40<01:25, 538.04it/s]
+Running loglikelihood requests:  61%|██████    | 71058/117208 [03:40<01:25, 540.65it/s]
+Running loglikelihood requests:  61%|██████    | 71136/117208 [03:40<01:28, 521.22it/s]
+Running loglikelihood requests:  61%|██████    | 71232/117208 [03:40<01:25, 539.85it/s]
+Running loglikelihood requests:  61%|██████    | 71328/117208 [03:40<01:23, 552.07it/s]
+Running loglikelihood requests:  61%|██████    | 71424/117208 [03:41<01:21, 560.78it/s]
+Running loglikelihood requests:  61%|██████    | 71520/117208 [03:41<01:20, 567.28it/s]
+Running loglikelihood requests:  61%|██████    | 71610/117208 [03:41<01:21, 562.02it/s]
+Running loglikelihood requests:  61%|██████    | 71706/117208 [03:41<01:20, 568.71it/s]
+Running loglikelihood requests:  61%|██████▏   | 71802/117208 [03:41<01:19, 572.60it/s]
+Running loglikelihood requests:  61%|██████▏   | 71898/117208 [03:41<01:18, 574.95it/s]
+Running loglikelihood requests:  61%|██████▏   | 71994/117208 [03:42<01:18, 577.71it/s]
+Running loglikelihood requests:  62%|██████▏   | 72087/117208 [03:42<01:18, 574.37it/s]
+Running loglikelihood requests:  62%|██████▏   | 72156/117208 [03:42<01:25, 529.53it/s]
+Running loglikelihood requests:  62%|██████▏   | 72237/117208 [03:42<01:26, 519.07it/s]
+Running loglikelihood requests:  62%|██████▏   | 72333/117208 [03:42<01:23, 538.13it/s]
+Running loglikelihood requests:  62%|██████▏   | 72429/117208 [03:42<01:21, 551.72it/s]
+Running loglikelihood requests:  62%|██████▏   | 72525/117208 [03:42<01:19, 561.07it/s]
+Running loglikelihood requests:  62%|██████▏   | 72621/117208 [03:43<01:18, 567.83it/s]
+Running loglikelihood requests:  62%|██████▏   | 72717/117208 [03:43<01:17, 575.38it/s]
+Running loglikelihood requests:  62%|██████▏   | 72807/117208 [03:43<01:17, 573.87it/s]
+Running loglikelihood requests:  62%|██████▏   | 72903/117208 [03:43<01:15, 583.47it/s]
+Running loglikelihood requests:  62%|██████▏   | 72999/117208 [03:43<01:14, 590.05it/s]
+Running loglikelihood requests:  62%|██████▏   | 73095/117208 [03:43<01:14, 594.63it/s]
+Running loglikelihood requests:  62%|██████▏   | 73191/117208 [03:44<01:13, 597.93it/s]
+Running loglikelihood requests:  63%|██████▎   | 73287/117208 [03:44<01:13, 599.65it/s]
+Running loglikelihood requests:  63%|██████▎   | 73380/117208 [03:44<01:13, 596.06it/s]
+Running loglikelihood requests:  63%|██████▎   | 73449/117208 [03:44<01:19, 549.15it/s]
+Running loglikelihood requests:  63%|██████▎   | 73539/117208 [03:44<01:18, 554.93it/s]
+Running loglikelihood requests:  63%|██████▎   | 73605/117208 [03:44<01:24, 515.70it/s]
+Running loglikelihood requests:  63%|██████▎   | 73701/117208 [03:45<01:20, 542.09it/s]
+Running loglikelihood requests:  63%|██████▎   | 73797/117208 [03:45<01:17, 560.38it/s]
+Running loglikelihood requests:  63%|██████▎   | 73893/117208 [03:45<01:15, 573.89it/s]
+Running loglikelihood requests:  63%|██████▎   | 73989/117208 [03:45<01:14, 583.51it/s]
+Running loglikelihood requests:  63%|██████▎   | 74085/117208 [03:45<01:12, 590.73it/s]
+Running loglikelihood requests:  63%|██████▎   | 74181/117208 [03:45<01:12, 596.01it/s]
+Running loglikelihood requests:  63%|██████▎   | 74277/117208 [03:46<01:11, 599.92it/s]
+Running loglikelihood requests:  63%|██████▎   | 74373/117208 [03:46<01:11, 599.95it/s]
+Running loglikelihood requests:  64%|██████▎   | 74469/117208 [03:46<01:10, 602.08it/s]
+Running loglikelihood requests:  64%|██████▎   | 74565/117208 [03:46<01:10, 603.99it/s]
+Running loglikelihood requests:  64%|██████▎   | 74661/117208 [03:46<01:10, 604.65it/s]
+Running loglikelihood requests:  64%|██████▍   | 74742/117208 [03:46<01:13, 578.68it/s]
+Running loglikelihood requests:  64%|██████▍   | 74832/117208 [03:46<01:13, 576.06it/s]
+Running loglikelihood requests:  64%|██████▍   | 74919/117208 [03:47<01:14, 569.19it/s]
+Running loglikelihood requests:  64%|██████▍   | 75015/117208 [03:47<01:12, 581.00it/s]
+Running loglikelihood requests:  64%|██████▍   | 75111/117208 [03:47<01:11, 589.02it/s]
+Running loglikelihood requests:  64%|██████▍   | 75207/117208 [03:47<01:10, 594.92it/s]
+Running loglikelihood requests:  64%|██████▍   | 75303/117208 [03:47<01:10, 598.22it/s]
+Running loglikelihood requests:  64%|██████▍   | 75393/117208 [03:47<01:10, 590.74it/s]
+Running loglikelihood requests:  64%|██████▍   | 75489/117208 [03:48<01:09, 597.00it/s]
+Running loglikelihood requests:  64%|██████▍   | 75585/117208 [03:48<01:09, 601.48it/s]
+Running loglikelihood requests:  65%|██████▍   | 75681/117208 [03:48<01:08, 604.54it/s]
+Running loglikelihood requests:  65%|██████▍   | 75777/117208 [03:48<01:08, 606.95it/s]
+Running loglikelihood requests:  65%|██████▍   | 75873/117208 [03:48<01:07, 608.67it/s]
+Running loglikelihood requests:  65%|██████▍   | 75957/117208 [03:48<01:10, 587.25it/s]
+Running loglikelihood requests:  65%|██████▍   | 76044/117208 [03:48<01:11, 577.84it/s]
+Running loglikelihood requests:  65%|██████▍   | 76128/117208 [03:49<01:12, 565.74it/s]
+Running loglikelihood requests:  65%|██████▌   | 76221/117208 [03:49<01:12, 564.69it/s]
+Running loglikelihood requests:  65%|██████▌   | 76317/117208 [03:49<01:10, 579.27it/s]
+Running loglikelihood requests:  65%|██████▌   | 76413/117208 [03:49<01:09, 589.15it/s]
+Running loglikelihood requests:  65%|██████▌   | 76509/117208 [03:49<01:08, 595.33it/s]
+Running loglikelihood requests:  65%|██████▌   | 76602/117208 [03:49<01:08, 594.43it/s]
+Running loglikelihood requests:  65%|██████▌   | 76692/117208 [03:50<01:08, 588.25it/s]
+Running loglikelihood requests:  66%|██████▌   | 76792/117208 [03:50<01:07, 601.98it/s]
+Running loglikelihood requests:  66%|██████▌   | 76888/117208 [03:50<01:06, 604.83it/s]
+Running loglikelihood requests:  66%|██████▌   | 76984/117208 [03:50<01:06, 606.79it/s]
+Running loglikelihood requests:  66%|██████▌   | 77080/117208 [03:50<01:05, 608.26it/s]
+Running loglikelihood requests:  66%|██████▌   | 77176/117208 [03:50<01:05, 609.33it/s]
+Running loglikelihood requests:  66%|██████▌   | 77266/117208 [03:51<01:06, 599.28it/s]
+Running loglikelihood requests:  66%|██████▌   | 77330/117208 [03:51<01:13, 543.99it/s]
+Running loglikelihood requests:  66%|██████▌   | 77411/117208 [03:51<01:14, 535.36it/s]
+Running loglikelihood requests:  66%|██████▌   | 77507/117208 [03:51<01:11, 557.63it/s]
+Running loglikelihood requests:  66%|██████▌   | 77603/117208 [03:51<01:09, 573.77it/s]
+Running loglikelihood requests:  66%|██████▋   | 77699/117208 [03:51<01:07, 585.22it/s]
+Running loglikelihood requests:  66%|██████▋   | 77795/117208 [03:51<01:06, 593.18it/s]
+Running loglikelihood requests:  66%|██████▋   | 77888/117208 [03:52<01:06, 594.21it/s]
+Running loglikelihood requests:  67%|██████▋   | 77978/117208 [03:52<01:06, 589.32it/s]
+Running loglikelihood requests:  67%|██████▋   | 78074/117208 [03:52<01:05, 596.77it/s]
+Running loglikelihood requests:  67%|██████▋   | 78170/117208 [03:52<01:04, 602.42it/s]
+Running loglikelihood requests:  67%|██████▋   | 78266/117208 [03:52<01:04, 606.57it/s]
+Running loglikelihood requests:  67%|██████▋   | 78362/117208 [03:52<01:03, 609.23it/s]
+Running loglikelihood requests:  67%|██████▋   | 78458/117208 [03:53<01:03, 611.13it/s]
+Running loglikelihood requests:  67%|██████▋   | 78545/117208 [03:53<01:04, 595.92it/s]
+Running loglikelihood requests:  67%|██████▋   | 78605/117208 [03:53<01:12, 534.76it/s]
+Running loglikelihood requests:  67%|██████▋   | 78683/117208 [03:53<01:13, 525.80it/s]
+Running loglikelihood requests:  67%|██████▋   | 78758/117208 [03:53<01:14, 513.47it/s]
+Running loglikelihood requests:  67%|██████▋   | 78854/117208 [03:53<01:11, 539.76it/s]
+Running loglikelihood requests:  67%|██████▋   | 78950/117208 [03:54<01:08, 562.24it/s]
+Running loglikelihood requests:  67%|██████▋   | 79046/117208 [03:54<01:06, 577.98it/s]
+Running loglikelihood requests:  68%|██████▊   | 79142/117208 [03:54<01:04, 588.42it/s]
+Running loglikelihood requests:  68%|██████▊   | 79235/117208 [03:54<01:04, 590.84it/s]
+Running loglikelihood requests:  68%|██████▊   | 79331/117208 [03:54<01:03, 597.76it/s]
+Running loglikelihood requests:  68%|██████▊   | 79427/117208 [03:54<01:02, 602.98it/s]
+Running loglikelihood requests:  68%|██████▊   | 79523/117208 [03:54<01:02, 606.49it/s]
+Running loglikelihood requests:  68%|██████▊   | 79619/117208 [03:55<01:01, 608.92it/s]
+Running loglikelihood requests:  68%|██████▊   | 79715/117208 [03:55<01:01, 610.65it/s]
+Running loglikelihood requests:  68%|██████▊   | 79811/117208 [03:55<01:01, 611.80it/s]
+Running loglikelihood requests:  68%|██████▊   | 79889/117208 [03:55<01:04, 579.75it/s]
+Running loglikelihood requests:  68%|██████▊   | 79982/117208 [03:55<01:03, 584.10it/s]
+Running loglikelihood requests:  68%|██████▊   | 80072/117208 [03:55<01:03, 582.39it/s]
+Running loglikelihood requests:  68%|██████▊   | 80168/117208 [03:56<01:02, 591.79it/s]
+Running loglikelihood requests:  68%|██████▊   | 80264/117208 [03:56<01:01, 597.61it/s]
+Running loglikelihood requests:  69%|██████▊   | 80360/117208 [03:56<01:01, 602.38it/s]
+Running loglikelihood requests:  69%|██████▊   | 80456/117208 [03:56<01:00, 605.74it/s]
+Running loglikelihood requests:  69%|██████▊   | 80552/117208 [03:56<01:00, 608.37it/s]
+Running loglikelihood requests:  69%|██████▉   | 80645/117208 [03:56<01:00, 605.67it/s]
+Running loglikelihood requests:  69%|██████▉   | 80741/117208 [03:56<00:59, 609.02it/s]
+Running loglikelihood requests:  69%|██████▉   | 80837/117208 [03:57<00:59, 611.33it/s]
+Running loglikelihood requests:  69%|██████▉   | 80933/117208 [03:57<00:59, 613.11it/s]
+Running loglikelihood requests:  69%|██████▉   | 81029/117208 [03:57<00:59, 612.99it/s]
+Running loglikelihood requests:  69%|██████▉   | 81125/117208 [03:57<00:58, 614.44it/s]
+Running loglikelihood requests:  69%|██████▉   | 81221/117208 [03:57<00:58, 615.14it/s]
+Running loglikelihood requests:  69%|██████▉   | 81290/117208 [03:57<01:03, 565.92it/s]
+Running loglikelihood requests:  69%|██████▉   | 81380/117208 [03:58<01:02, 570.32it/s]
+Running loglikelihood requests:  70%|██████▉   | 81467/117208 [03:58<01:02, 567.67it/s]
+Running loglikelihood requests:  70%|██████▉   | 81563/117208 [03:58<01:01, 582.48it/s]
+Running loglikelihood requests:  70%|██████▉   | 81659/117208 [03:58<00:59, 592.60it/s]
+Running loglikelihood requests:  70%|██████▉   | 81755/117208 [03:58<00:59, 600.06it/s]
+Running loglikelihood requests:  70%|██████▉   | 81851/117208 [03:58<00:58, 604.78it/s]
+Running loglikelihood requests:  70%|██████▉   | 81935/117208 [03:58<01:00, 586.60it/s]
+Running loglikelihood requests:  70%|██████▉   | 82031/117208 [03:59<00:59, 595.76it/s]
+Running loglikelihood requests:  70%|███████   | 82127/117208 [03:59<00:58, 602.47it/s]
+Running loglikelihood requests:  70%|███████   | 82223/117208 [03:59<00:57, 607.31it/s]
+Running loglikelihood requests:  70%|███████   | 82319/117208 [03:59<00:57, 610.11it/s]
+Running loglikelihood requests:  70%|███████   | 82415/117208 [03:59<00:56, 612.17it/s]
+Running loglikelihood requests:  70%|███████   | 82511/117208 [03:59<00:56, 613.92it/s]
+Running loglikelihood requests:  70%|███████   | 82607/117208 [04:00<00:56, 614.82it/s]
+Running loglikelihood requests:  71%|███████   | 82669/117208 [04:00<01:02, 552.93it/s]
+Running loglikelihood requests:  71%|███████   | 82752/117208 [04:00<01:02, 548.01it/s]
+Running loglikelihood requests:  71%|███████   | 82839/117208 [04:00<01:02, 552.09it/s]
+Running loglikelihood requests:  71%|███████   | 82935/117208 [04:00<00:59, 571.83it/s]
+Running loglikelihood requests:  71%|███████   | 83031/117208 [04:00<00:58, 585.10it/s]
+Running loglikelihood requests:  71%|███████   | 83127/117208 [04:01<00:57, 594.75it/s]
+Running loglikelihood requests:  71%|███████   | 83223/117208 [04:01<00:56, 601.52it/s]
+Running loglikelihood requests:  71%|███████   | 83304/117208 [04:01<00:58, 579.69it/s]
+Running loglikelihood requests:  71%|███████   | 83400/117208 [04:01<00:57, 591.59it/s]
+Running loglikelihood requests:  71%|███████   | 83496/117208 [04:01<00:56, 600.11it/s]
+Running loglikelihood requests:  71%|███████▏  | 83592/117208 [04:01<00:55, 606.43it/s]
+Running loglikelihood requests:  71%|███████▏  | 83688/117208 [04:01<00:54, 610.91it/s]
+Running loglikelihood requests:  71%|███████▏  | 83784/117208 [04:02<00:54, 613.38it/s]
+Running loglikelihood requests:  72%|███████▏  | 83880/117208 [04:02<00:54, 615.48it/s]
+Running loglikelihood requests:  72%|███████▏  | 83973/117208 [04:02<00:54, 611.85it/s]
+Running loglikelihood requests:  72%|███████▏  | 84035/117208 [04:02<01:00, 551.07it/s]
+Running loglikelihood requests:  72%|███████▏  | 84114/117208 [04:02<01:01, 540.78it/s]
+Running loglikelihood requests:  72%|███████▏  | 84210/117208 [04:02<00:58, 564.81it/s]
+Running loglikelihood requests:  72%|███████▏  | 84306/117208 [04:03<00:56, 581.89it/s]
+Running loglikelihood requests:  72%|███████▏  | 84402/117208 [04:03<00:55, 593.54it/s]
+Running loglikelihood requests:  72%|███████▏  | 84498/117208 [04:03<00:54, 601.60it/s]
+Running loglikelihood requests:  72%|███████▏  | 84594/117208 [04:03<00:53, 607.40it/s]
+Running loglikelihood requests:  72%|███████▏  | 84687/117208 [04:03<00:53, 606.24it/s]
+Running loglikelihood requests:  72%|███████▏  | 84783/117208 [04:03<00:53, 611.03it/s]
+Running loglikelihood requests:  72%|███████▏  | 84879/117208 [04:03<00:52, 614.42it/s]
+Running loglikelihood requests:  72%|███████▏  | 84975/117208 [04:04<00:52, 617.20it/s]
+Running loglikelihood requests:  73%|███████▎  | 85071/117208 [04:04<00:51, 618.54it/s]
+Running loglikelihood requests:  73%|███████▎  | 85167/117208 [04:04<00:51, 619.55it/s]
+Running loglikelihood requests:  73%|███████▎  | 85257/117208 [04:04<00:52, 610.24it/s]
+Running loglikelihood requests:  73%|███████▎  | 85323/117208 [04:04<00:57, 558.09it/s]
+Running loglikelihood requests:  73%|███████▎  | 85416/117208 [04:04<00:55, 571.64it/s]
+Running loglikelihood requests:  73%|███████▎  | 85512/117208 [04:05<00:53, 587.20it/s]
+Running loglikelihood requests:  73%|███████▎  | 85608/117208 [04:05<00:52, 597.82it/s]
+Running loglikelihood requests:  73%|███████▎  | 85704/117208 [04:05<00:52, 605.14it/s]
+Running loglikelihood requests:  73%|███████▎  | 85800/117208 [04:05<00:51, 610.73it/s]
+Running loglikelihood requests:  73%|███████▎  | 85896/117208 [04:05<00:50, 614.20it/s]
+Running loglikelihood requests:  73%|███████▎  | 85974/117208 [04:05<00:52, 597.01it/s]
+Running loglikelihood requests:  73%|███████▎  | 86067/117208 [04:05<00:50, 612.84it/s]
+Running loglikelihood requests:  74%|███████▎  | 86163/117208 [04:06<00:49, 630.16it/s]
+Running loglikelihood requests:  74%|███████▎  | 86259/117208 [04:06<00:48, 642.72it/s]
+Running loglikelihood requests:  74%|███████▎  | 86355/117208 [04:06<00:47, 651.88it/s]
+Running loglikelihood requests:  74%|███████▍  | 86451/117208 [04:06<00:46, 658.32it/s]
+Running loglikelihood requests:  74%|███████▍  | 86547/117208 [04:06<00:46, 662.42it/s]
+Running loglikelihood requests:  74%|███████▍  | 86643/117208 [04:06<00:45, 664.83it/s]
+Running loglikelihood requests:  74%|███████▍  | 86710/117208 [04:06<00:50, 609.92it/s]
+Running loglikelihood requests:  74%|███████▍  | 86784/117208 [04:07<00:52, 584.32it/s]
+Running loglikelihood requests:  74%|███████▍  | 86871/117208 [04:07<00:51, 592.31it/s]
+Running loglikelihood requests:  74%|███████▍  | 86964/117208 [04:07<00:49, 609.96it/s]
+Running loglikelihood requests:  74%|███████▍  | 87060/117208 [04:07<00:47, 628.43it/s]
+Running loglikelihood requests:  74%|███████▍  | 87156/117208 [04:07<00:46, 642.06it/s]
+Running loglikelihood requests:  74%|███████▍  | 87252/117208 [04:07<00:45, 651.31it/s]
+Running loglikelihood requests:  75%|███████▍  | 87345/117208 [04:07<00:45, 651.94it/s]
+Running loglikelihood requests:  75%|███████▍  | 87438/117208 [04:08<00:45, 652.37it/s]
+Running loglikelihood requests:  75%|███████▍  | 87534/117208 [04:08<00:45, 658.38it/s]
+Running loglikelihood requests:  75%|███████▍  | 87630/117208 [04:08<00:44, 662.92it/s]
+Running loglikelihood requests:  75%|███████▍  | 87726/117208 [04:08<00:44, 666.05it/s]
+Running loglikelihood requests:  75%|███████▍  | 87822/117208 [04:08<00:43, 668.58it/s]
+Running loglikelihood requests:  75%|███████▌  | 87918/117208 [04:08<00:43, 670.39it/s]
+Running loglikelihood requests:  75%|███████▌  | 88014/117208 [04:08<00:43, 671.47it/s]
+Running loglikelihood requests:  75%|███████▌  | 88086/117208 [04:09<00:46, 624.35it/s]
+Running loglikelihood requests:  75%|███████▌  | 88173/117208 [04:09<00:46, 621.32it/s]
+Running loglikelihood requests:  75%|███████▌  | 88269/117208 [04:09<00:45, 636.47it/s]
+Running loglikelihood requests:  75%|███████▌  | 88365/117208 [04:09<00:44, 647.53it/s]
+Running loglikelihood requests:  75%|███████▌  | 88458/117208 [04:09<00:44, 649.58it/s]
+Running loglikelihood requests:  76%|███████▌  | 88554/117208 [04:09<00:43, 656.87it/s]
+Running loglikelihood requests:  76%|███████▌  | 88650/117208 [04:09<00:43, 661.56it/s]
+Running loglikelihood requests:  76%|███████▌  | 88746/117208 [04:10<00:42, 664.42it/s]
+Running loglikelihood requests:  76%|███████▌  | 88830/117208 [04:10<00:44, 643.93it/s]
+Running loglikelihood requests:  76%|███████▌  | 88926/117208 [04:10<00:43, 653.62it/s]
+Running loglikelihood requests:  76%|███████▌  | 89022/117208 [04:10<00:42, 659.32it/s]
+Running loglikelihood requests:  76%|███████▌  | 89118/117208 [04:10<00:42, 664.31it/s]
+Running loglikelihood requests:  76%|███████▌  | 89214/117208 [04:10<00:41, 667.61it/s]
+Running loglikelihood requests:  76%|███████▌  | 89314/117208 [04:10<00:41, 677.75it/s]
+Running loglikelihood requests:  76%|███████▋  | 89410/117208 [04:11<00:41, 677.00it/s]
+Running loglikelihood requests:  76%|███████▋  | 89506/117208 [04:11<00:40, 676.47it/s]
+Running loglikelihood requests:  76%|███████▋  | 89574/117208 [04:11<00:44, 620.63it/s]
+Running loglikelihood requests:  76%|███████▋  | 89653/117208 [04:11<00:45, 603.09it/s]
+Running loglikelihood requests:  77%|███████▋  | 89737/117208 [04:11<00:45, 600.59it/s]
+Running loglikelihood requests:  77%|███████▋  | 89827/117208 [04:11<00:44, 608.62it/s]
+Running loglikelihood requests:  77%|███████▋  | 89923/117208 [04:11<00:43, 627.35it/s]
+Running loglikelihood requests:  77%|███████▋  | 90019/117208 [04:12<00:42, 641.18it/s]
+Running loglikelihood requests:  77%|███████▋  | 90115/117208 [04:12<00:41, 651.53it/s]
+Running loglikelihood requests:  77%|███████▋  | 90196/117208 [04:12<00:42, 629.12it/s]
+Running loglikelihood requests:  77%|███████▋  | 90296/117208 [04:12<00:41, 648.05it/s]
+Running loglikelihood requests:  77%|███████▋  | 90392/117208 [04:12<00:40, 657.48it/s]
+Running loglikelihood requests:  77%|███████▋  | 90488/117208 [04:12<00:40, 663.68it/s]
+Running loglikelihood requests:  77%|███████▋  | 90584/117208 [04:12<00:39, 668.12it/s]
+Running loglikelihood requests:  77%|███████▋  | 90680/117208 [04:13<00:39, 670.64it/s]
+Running loglikelihood requests:  77%|███████▋  | 90776/117208 [04:13<00:39, 673.25it/s]
+Running loglikelihood requests:  78%|███████▊  | 90872/117208 [04:13<00:39, 674.56it/s]
+Running loglikelihood requests:  78%|███████▊  | 90940/117208 [04:13<00:42, 619.36it/s]
+Running loglikelihood requests:  78%|███████▊  | 91010/117208 [04:13<00:44, 585.08it/s]
+Running loglikelihood requests:  78%|███████▊  | 91088/117208 [04:13<00:45, 576.44it/s]
+Running loglikelihood requests:  78%|███████▊  | 91181/117208 [04:13<00:43, 600.88it/s]
+Running loglikelihood requests:  78%|███████▊  | 91277/117208 [04:14<00:41, 623.78it/s]
+Running loglikelihood requests:  78%|███████▊  | 91373/117208 [04:14<00:40, 639.52it/s]
+Running loglikelihood requests:  78%|███████▊  | 91469/117208 [04:14<00:39, 650.42it/s]
+Running loglikelihood requests:  78%|███████▊  | 91553/117208 [04:14<00:40, 634.79it/s]
+Running loglikelihood requests:  78%|███████▊  | 91646/117208 [04:14<00:39, 642.17it/s]
+Running loglikelihood requests:  78%|███████▊  | 91742/117208 [04:14<00:39, 652.68it/s]
+Running loglikelihood requests:  78%|███████▊  | 91838/117208 [04:14<00:38, 660.63it/s]
+Running loglikelihood requests:  78%|███████▊  | 91934/117208 [04:15<00:37, 666.25it/s]
+Running loglikelihood requests:  79%|███████▊  | 92030/117208 [04:15<00:37, 670.16it/s]
+Running loglikelihood requests:  79%|███████▊  | 92126/117208 [04:15<00:37, 673.18it/s]
+Running loglikelihood requests:  79%|███████▊  | 92222/117208 [04:15<00:36, 675.40it/s]
+Running loglikelihood requests:  79%|███████▉  | 92306/117208 [04:15<00:38, 652.65it/s]
+Running loglikelihood requests:  79%|███████▉  | 92372/117208 [04:15<00:41, 599.02it/s]
+Running loglikelihood requests:  79%|███████▉  | 92436/117208 [04:15<00:44, 559.50it/s]
+Running loglikelihood requests:  79%|███████▉  | 92523/117208 [04:15<00:42, 577.22it/s]
+Running loglikelihood requests:  79%|███████▉  | 92616/117208 [04:16<00:40, 601.90it/s]
+Running loglikelihood requests:  79%|███████▉  | 92712/117208 [04:16<00:39, 625.52it/s]
+Running loglikelihood requests:  79%|███████▉  | 92808/117208 [04:16<00:38, 642.06it/s]
+Running loglikelihood requests:  79%|███████▉  | 92904/117208 [04:16<00:37, 653.51it/s]
+Running loglikelihood requests:  79%|███████▉  | 92982/117208 [04:16<00:38, 626.27it/s]
+Running loglikelihood requests:  79%|███████▉  | 93082/117208 [04:16<00:37, 650.88it/s]
+Running loglikelihood requests:  79%|███████▉  | 93178/117208 [04:16<00:36, 660.17it/s]
+Running loglikelihood requests:  80%|███████▉  | 93274/117208 [04:17<00:35, 666.24it/s]
+Running loglikelihood requests:  80%|███████▉  | 93370/117208 [04:17<00:35, 670.50it/s]
+Running loglikelihood requests:  80%|███████▉  | 93466/117208 [04:17<00:35, 672.35it/s]
+Running loglikelihood requests:  80%|███████▉  | 93562/117208 [04:17<00:35, 675.23it/s]
+Running loglikelihood requests:  80%|███████▉  | 93658/117208 [04:17<00:34, 677.26it/s]
+Running loglikelihood requests:  80%|███████▉  | 93745/117208 [04:17<00:35, 660.90it/s]
+Running loglikelihood requests:  80%|████████  | 93812/117208 [04:17<00:38, 607.45it/s]
+Running loglikelihood requests:  80%|████████  | 93877/117208 [04:18<00:41, 567.94it/s]
+Running loglikelihood requests:  80%|████████  | 93964/117208 [04:18<00:39, 584.00it/s]
+Running loglikelihood requests:  80%|████████  | 94060/117208 [04:18<00:37, 613.51it/s]
+Running loglikelihood requests:  80%|████████  | 94156/117208 [04:18<00:36, 634.58it/s]
+Running loglikelihood requests:  80%|████████  | 94252/117208 [04:18<00:35, 648.90it/s]
+Running loglikelihood requests:  80%|████████  | 94333/117208 [04:18<00:36, 629.62it/s]
+Running loglikelihood requests:  81%|████████  | 94433/117208 [04:18<00:34, 653.74it/s]
+Running loglikelihood requests:  81%|████████  | 94533/117208 [04:19<00:33, 671.12it/s]
+Running loglikelihood requests:  81%|████████  | 94629/117208 [04:19<00:33, 675.04it/s]
+Running loglikelihood requests:  81%|████████  | 94725/117208 [04:19<00:33, 677.99it/s]
+Running loglikelihood requests:  81%|████████  | 94821/117208 [04:19<00:32, 679.69it/s]
+Running loglikelihood requests:  81%|████████  | 94917/117208 [04:19<00:32, 680.89it/s]
+Running loglikelihood requests:  81%|████████  | 95013/117208 [04:19<00:32, 682.43it/s]
+Running loglikelihood requests:  81%|████████  | 95109/117208 [04:19<00:32, 683.50it/s]
+Running loglikelihood requests:  81%|████████  | 95178/117208 [04:20<00:35, 628.70it/s]
+Running loglikelihood requests:  81%|████████▏ | 95241/117208 [04:20<00:37, 578.84it/s]
+Running loglikelihood requests:  81%|████████▏ | 95299/117208 [04:20<00:41, 533.35it/s]
+Running loglikelihood requests:  81%|████████▏ | 95391/117208 [04:20<00:38, 570.64it/s]
+Running loglikelihood requests:  81%|████████▏ | 95481/117208 [04:20<00:36, 592.84it/s]
+Running loglikelihood requests:  82%|████████▏ | 95577/117208 [04:20<00:34, 619.32it/s]
+Running loglikelihood requests:  82%|████████▏ | 95658/117208 [04:20<00:35, 609.37it/s]
+Running loglikelihood requests:  82%|████████▏ | 95745/117208 [04:21<00:34, 614.39it/s]
+Running loglikelihood requests:  82%|████████▏ | 95841/117208 [04:21<00:33, 636.34it/s]
+Running loglikelihood requests:  82%|████████▏ | 95937/117208 [04:21<00:32, 651.68it/s]
+Running loglikelihood requests:  82%|████████▏ | 96033/117208 [04:21<00:31, 662.30it/s]
+Running loglikelihood requests:  82%|████████▏ | 96129/117208 [04:21<00:31, 670.49it/s]
+Running loglikelihood requests:  82%|████████▏ | 96225/117208 [04:21<00:31, 676.05it/s]
+Running loglikelihood requests:  82%|████████▏ | 96321/117208 [04:21<00:30, 679.51it/s]
+Running loglikelihood requests:  82%|████████▏ | 96417/117208 [04:22<00:30, 682.05it/s]
+Running loglikelihood requests:  82%|████████▏ | 96498/117208 [04:22<00:31, 653.93it/s]
+Running loglikelihood requests:  82%|████████▏ | 96564/117208 [04:22<00:39, 527.85it/s]
+Running loglikelihood requests:  82%|████████▏ | 96648/117208 [04:22<00:37, 549.18it/s]
+Running loglikelihood requests:  83%|████████▎ | 96735/117208 [04:22<00:35, 570.93it/s]
+Running loglikelihood requests:  83%|████████▎ | 96831/117208 [04:22<00:33, 604.71it/s]
+Running loglikelihood requests:  83%|████████▎ | 96927/117208 [04:22<00:32, 628.72it/s]
+Running loglikelihood requests:  83%|████████▎ | 97023/117208 [04:23<00:31, 645.66it/s]
+Running loglikelihood requests:  83%|████████▎ | 97110/117208 [04:23<00:31, 630.27it/s]
+Running loglikelihood requests:  83%|████████▎ | 97203/117208 [04:23<00:32, 622.45it/s]
+Running loglikelihood requests:  83%|████████▎ | 97299/117208 [04:23<00:31, 622.42it/s]
+Running loglikelihood requests:  83%|████████▎ | 97395/117208 [04:23<00:31, 622.49it/s]
+Running loglikelihood requests:  83%|████████▎ | 97491/117208 [04:23<00:31, 623.25it/s]
+Running loglikelihood requests:  83%|████████▎ | 97587/117208 [04:23<00:31, 623.27it/s]
+Running loglikelihood requests:  83%|████████▎ | 97683/117208 [04:24<00:31, 623.54it/s]
+Running loglikelihood requests:  83%|████████▎ | 97779/117208 [04:24<00:31, 623.84it/s]
+Running loglikelihood requests:  84%|████████▎ | 97875/117208 [04:24<00:30, 623.80it/s]
+Running loglikelihood requests:  84%|████████▎ | 97938/117208 [04:24<00:34, 561.38it/s]
+Running loglikelihood requests:  84%|████████▎ | 97999/117208 [04:24<00:37, 515.05it/s]
+Running loglikelihood requests:  84%|████████▎ | 98068/117208 [04:24<00:38, 496.30it/s]
+Running loglikelihood requests:  84%|████████▎ | 98158/117208 [04:25<00:36, 522.87it/s]
+Running loglikelihood requests:  84%|████████▍ | 98251/117208 [04:25<00:34, 547.69it/s]
+Running loglikelihood requests:  84%|████████▍ | 98347/117208 [04:25<00:33, 570.02it/s]
+Running loglikelihood requests:  84%|████████▍ | 98443/117208 [04:25<00:32, 585.21it/s]
+Running loglikelihood requests:  84%|████████▍ | 98524/117208 [04:25<00:32, 569.35it/s]
+Running loglikelihood requests:  84%|████████▍ | 98620/117208 [04:25<00:31, 585.85it/s]
+Running loglikelihood requests:  84%|████████▍ | 98716/117208 [04:25<00:30, 597.27it/s]
+Running loglikelihood requests:  84%|████████▍ | 98812/117208 [04:26<00:30, 605.27it/s]
+Running loglikelihood requests:  84%|████████▍ | 98908/117208 [04:26<00:29, 611.18it/s]
+Running loglikelihood requests:  84%|████████▍ | 99004/117208 [04:26<00:29, 614.62it/s]
+Running loglikelihood requests:  85%|████████▍ | 99100/117208 [04:26<00:29, 617.04it/s]
+Running loglikelihood requests:  85%|████████▍ | 99196/117208 [04:26<00:29, 619.64it/s]
+Running loglikelihood requests:  85%|████████▍ | 99277/117208 [04:26<00:30, 593.30it/s]
+Running loglikelihood requests:  85%|████████▍ | 99337/117208 [04:27<00:33, 533.81it/s]
+Running loglikelihood requests:  85%|████████▍ | 99391/117208 [04:27<00:36, 482.59it/s]
+Running loglikelihood requests:  85%|████████▍ | 99468/117208 [04:27<00:36, 489.19it/s]
+Running loglikelihood requests:  85%|████████▍ | 99555/117208 [04:27<00:34, 513.10it/s]
+Running loglikelihood requests:  85%|████████▌ | 99639/117208 [04:27<00:33, 523.64it/s]
+Running loglikelihood requests:  85%|████████▌ | 99735/117208 [04:27<00:31, 553.68it/s]
+Running loglikelihood requests:  85%|████████▌ | 99831/117208 [04:27<00:30, 566.32it/s]
+Running loglikelihood requests:  85%|████████▌ | 99915/117208 [04:28<00:30, 562.83it/s]
+Running loglikelihood requests:  85%|████████▌ | 100011/117208 [04:28<00:29, 581.68it/s]
+Running loglikelihood requests:  85%|████████▌ | 100107/117208 [04:28<00:28, 595.03it/s]
+Running loglikelihood requests:  85%|████████▌ | 100203/117208 [04:28<00:28, 605.37it/s]
+Running loglikelihood requests:  86%|████████▌ | 100299/117208 [04:28<00:27, 611.74it/s]
+Running loglikelihood requests:  86%|████████▌ | 100395/117208 [04:28<00:27, 616.51it/s]
+Running loglikelihood requests:  86%|████████▌ | 100491/117208 [04:29<00:26, 620.18it/s]
+Running loglikelihood requests:  86%|████████▌ | 100587/117208 [04:29<00:26, 622.12it/s]
+Running loglikelihood requests:  86%|████████▌ | 100659/117208 [04:29<00:28, 579.23it/s]
+Running loglikelihood requests:  86%|████████▌ | 100717/117208 [04:29<00:31, 519.55it/s]
+Running loglikelihood requests:  86%|████████▌ | 100769/117208 [04:29<00:35, 469.33it/s]
+Running loglikelihood requests:  86%|████████▌ | 100822/117208 [04:29<00:37, 435.73it/s]
+Running loglikelihood requests:  86%|████████▌ | 100906/117208 [04:29<00:34, 470.41it/s]
+Running loglikelihood requests:  86%|████████▌ | 101002/117208 [04:30<00:31, 517.72it/s]
+Running loglikelihood requests:  86%|████████▋ | 101098/117208 [04:30<00:29, 550.37it/s]
+Running loglikelihood requests:  86%|████████▋ | 101194/117208 [04:30<00:27, 573.23it/s]
+Running loglikelihood requests:  86%|████████▋ | 101275/117208 [04:30<00:28, 562.58it/s]
+Running loglikelihood requests:  86%|████████▋ | 101356/117208 [04:30<00:28, 556.33it/s]
+Running loglikelihood requests:  87%|████████▋ | 101452/117208 [04:30<00:27, 580.55it/s]
+Running loglikelihood requests:  87%|████████▋ | 101548/117208 [04:30<00:26, 597.71it/s]
+Running loglikelihood requests:  87%|████████▋ | 101644/117208 [04:31<00:25, 610.20it/s]
+Running loglikelihood requests:  87%|████████▋ | 101740/117208 [04:31<00:25, 617.99it/s]
+Running loglikelihood requests:  87%|████████▋ | 101836/117208 [04:31<00:24, 624.15it/s]
+Running loglikelihood requests:  87%|████████▋ | 101932/117208 [04:31<00:24, 627.75it/s]
+Running loglikelihood requests:  87%|████████▋ | 102028/117208 [04:31<00:24, 631.12it/s]
+Running loglikelihood requests:  87%|████████▋ | 102092/117208 [04:31<00:26, 571.89it/s]
+Running loglikelihood requests:  87%|████████▋ | 102150/117208 [04:32<00:29, 518.99it/s]
+Running loglikelihood requests:  87%|████████▋ | 102229/117208 [04:32<00:28, 522.52it/s]
+Running loglikelihood requests:  87%|████████▋ | 102325/117208 [04:32<00:26, 556.91it/s]
+Running loglikelihood requests:  87%|████████▋ | 102418/117208 [04:32<00:25, 575.09it/s]
+Running loglikelihood requests:  87%|████████▋ | 102514/117208 [04:32<00:24, 594.12it/s]
+Running loglikelihood requests:  88%|████████▊ | 102598/117208 [04:32<00:24, 584.71it/s]
+Running loglikelihood requests:  88%|████████▊ | 102673/117208 [04:32<00:25, 561.61it/s]
+Running loglikelihood requests:  88%|████████▊ | 102769/117208 [04:33<00:24, 585.43it/s]
+Running loglikelihood requests:  88%|████████▊ | 102865/117208 [04:33<00:23, 602.02it/s]
+Running loglikelihood requests:  88%|████████▊ | 102961/117208 [04:33<00:23, 613.83it/s]
+Running loglikelihood requests:  88%|████████▊ | 103057/117208 [04:33<00:22, 621.64it/s]
+Running loglikelihood requests:  88%|████████▊ | 103153/117208 [04:33<00:22, 627.38it/s]
+Running loglikelihood requests:  88%|████████▊ | 103249/117208 [04:33<00:22, 631.47it/s]
+Running loglikelihood requests:  88%|████████▊ | 103321/117208 [04:33<00:23, 589.04it/s]
+Running loglikelihood requests:  88%|████████▊ | 103380/117208 [04:34<00:25, 532.08it/s]
+Running loglikelihood requests:  88%|████████▊ | 103434/117208 [04:34<00:28, 483.93it/s]
+Running loglikelihood requests:  88%|████████▊ | 103508/117208 [04:34<00:28, 489.12it/s]
+Running loglikelihood requests:  88%|████████▊ | 103595/117208 [04:34<00:26, 517.36it/s]
+Running loglikelihood requests:  88%|████████▊ | 103691/117208 [04:34<00:24, 554.79it/s]
+Running loglikelihood requests:  89%|████████▊ | 103787/117208 [04:34<00:23, 579.96it/s]
+Running loglikelihood requests:  89%|████████▊ | 103865/117208 [04:35<00:23, 565.01it/s]
+Running loglikelihood requests:  89%|████████▊ | 103965/117208 [04:35<00:22, 595.59it/s]
+Running loglikelihood requests:  89%|████████▉ | 104061/117208 [04:35<00:21, 610.29it/s]
+Running loglikelihood requests:  89%|████████▉ | 104157/117208 [04:35<00:21, 619.98it/s]
+Running loglikelihood requests:  89%|████████▉ | 104253/117208 [04:35<00:20, 627.26it/s]
+Running loglikelihood requests:  89%|████████▉ | 104349/117208 [04:35<00:20, 631.93it/s]
+Running loglikelihood requests:  89%|████████▉ | 104445/117208 [04:35<00:20, 635.32it/s]
+Running loglikelihood requests:  89%|████████▉ | 104509/117208 [04:36<00:22, 575.77it/s]
+Running loglikelihood requests:  89%|████████▉ | 104567/117208 [04:36<00:24, 522.78it/s]
+Running loglikelihood requests:  89%|████████▉ | 104620/117208 [04:36<00:26, 476.17it/s]
+Running loglikelihood requests:  89%|████████▉ | 104704/117208 [04:36<00:24, 503.48it/s]
+Running loglikelihood requests:  89%|████████▉ | 104800/117208 [04:36<00:22, 545.47it/s]
+Running loglikelihood requests:  89%|████████▉ | 104896/117208 [04:36<00:21, 574.84it/s]
+Running loglikelihood requests:  90%|████████▉ | 104965/117208 [04:36<00:22, 535.22it/s]
+Running loglikelihood requests:  90%|████████▉ | 105061/117208 [04:37<00:21, 560.69it/s]
+Running loglikelihood requests:  90%|████████▉ | 105157/117208 [04:37<00:20, 578.85it/s]
+Running loglikelihood requests:  90%|████████▉ | 105253/117208 [04:37<00:20, 591.26it/s]
+Running loglikelihood requests:  90%|████████▉ | 105349/117208 [04:37<00:19, 599.33it/s]
+Running loglikelihood requests:  90%|████████▉ | 105445/117208 [04:37<00:19, 605.01it/s]
+Running loglikelihood requests:  90%|█████████ | 105506/117208 [04:37<00:24, 476.38it/s]
+Running loglikelihood requests:  90%|█████████ | 105571/117208 [04:38<00:25, 462.75it/s]
+Running loglikelihood requests:  90%|█████████ | 105652/117208 [04:38<00:24, 480.80it/s]
+Running loglikelihood requests:  90%|█████████ | 105739/117208 [04:38<00:22, 504.52it/s]
+Running loglikelihood requests:  90%|█████████ | 105835/117208 [04:38<00:21, 537.56it/s]
+Running loglikelihood requests:  90%|█████████ | 105931/117208 [04:38<00:20, 561.66it/s]
+Running loglikelihood requests:  90%|█████████ | 105997/117208 [04:38<00:21, 525.34it/s]
+Running loglikelihood requests:  91%|█████████ | 106093/117208 [04:39<00:20, 554.03it/s]
+Running loglikelihood requests:  91%|█████████ | 106189/117208 [04:39<00:19, 574.61it/s]
+Running loglikelihood requests:  91%|█████████ | 106285/117208 [04:39<00:18, 589.06it/s]
+Running loglikelihood requests:  91%|█████████ | 106381/117208 [04:39<00:18, 599.12it/s]
+Running loglikelihood requests:  91%|█████████ | 106468/117208 [04:39<00:18, 589.07it/s]
+Running loglikelihood requests:  91%|█████████ | 106527/117208 [04:39<00:23, 463.49it/s]
+Running loglikelihood requests:  91%|█████████ | 106577/117208 [04:40<00:24, 428.05it/s]
+Running loglikelihood requests:  91%|█████████ | 106639/117208 [04:40<00:24, 422.82it/s]
+Running loglikelihood requests:  91%|█████████ | 106729/117208 [04:40<00:22, 469.25it/s]
+Running loglikelihood requests:  91%|█████████ | 106825/117208 [04:40<00:20, 514.27it/s]
+Running loglikelihood requests:  91%|█████████ | 106897/117208 [04:40<00:20, 515.54it/s]
+Running loglikelihood requests:  91%|█████████▏| 106993/117208 [04:40<00:18, 563.68it/s]
+Running loglikelihood requests:  91%|█████████▏| 107089/117208 [04:40<00:16, 598.87it/s]
+Running loglikelihood requests:  91%|█████████▏| 107185/117208 [04:41<00:16, 624.18it/s]
+Running loglikelihood requests:  92%|█████████▏| 107281/117208 [04:41<00:15, 642.10it/s]
+Running loglikelihood requests:  92%|█████████▏| 107368/117208 [04:41<00:15, 637.22it/s]
+Running loglikelihood requests:  92%|█████████▏| 107432/117208 [04:41<00:19, 514.43it/s]
+Running loglikelihood requests:  92%|█████████▏| 107489/117208 [04:41<00:19, 488.25it/s]
+Running loglikelihood requests:  92%|█████████▏| 107570/117208 [04:41<00:18, 514.67it/s]
+Running loglikelihood requests:  92%|█████████▏| 107657/117208 [04:41<00:17, 545.61it/s]
+Running loglikelihood requests:  92%|█████████▏| 107753/117208 [04:42<00:16, 586.34it/s]
+Running loglikelihood requests:  92%|█████████▏| 107849/117208 [04:42<00:15, 614.61it/s]
+Running loglikelihood requests:  92%|█████████▏| 107921/117208 [04:42<00:15, 586.22it/s]
+Running loglikelihood requests:  92%|█████████▏| 108017/117208 [04:42<00:14, 616.69it/s]
+Running loglikelihood requests:  92%|█████████▏| 108113/117208 [04:42<00:14, 638.10it/s]
+Running loglikelihood requests:  92%|█████████▏| 108209/117208 [04:42<00:13, 653.48it/s]
+Running loglikelihood requests:  92%|█████████▏| 108275/117208 [04:42<00:14, 603.48it/s]
+Running loglikelihood requests:  92%|█████████▏| 108336/117208 [04:43<00:18, 487.06it/s]
+Running loglikelihood requests:  92%|█████████▏| 108392/117208 [04:43<00:18, 466.64it/s]
+Running loglikelihood requests:  93%|█████████▎| 108467/117208 [04:43<00:17, 488.06it/s]
+Running loglikelihood requests:  93%|█████████▎| 108563/117208 [04:43<00:15, 545.19it/s]
+Running loglikelihood requests:  93%|█████████▎| 108626/117208 [04:43<00:16, 520.65it/s]
+Running loglikelihood requests:  93%|█████████▎| 108722/117208 [04:43<00:14, 570.10it/s]
+Running loglikelihood requests:  93%|█████████▎| 108818/117208 [04:43<00:13, 605.13it/s]
+Running loglikelihood requests:  93%|█████████▎| 108914/117208 [04:44<00:13, 630.68it/s]
+Running loglikelihood requests:  93%|█████████▎| 108978/117208 [04:44<00:14, 582.74it/s]
+Running loglikelihood requests:  93%|█████████▎| 109037/117208 [04:44<00:17, 471.13it/s]
+Running loglikelihood requests:  93%|█████████▎| 109087/117208 [04:44<00:18, 444.35it/s]
+Running loglikelihood requests:  93%|█████████▎| 109175/117208 [04:44<00:16, 498.35it/s]
+Running loglikelihood requests:  93%|█████████▎| 109271/117208 [04:44<00:14, 553.36it/s]
+Running loglikelihood requests:  93%|█████████▎| 109343/117208 [04:44<00:14, 547.55it/s]
+Running loglikelihood requests:  93%|█████████▎| 109439/117208 [04:45<00:13, 591.83it/s]
+Running loglikelihood requests:  93%|█████████▎| 109535/117208 [04:45<00:12, 623.53it/s]
+Running loglikelihood requests:  94%|█████████▎| 109598/117208 [04:45<00:13, 578.65it/s]
+Running loglikelihood requests:  94%|█████████▎| 109657/117208 [04:45<00:16, 470.56it/s]
+Running loglikelihood requests:  94%|█████████▎| 109707/117208 [04:45<00:16, 445.15it/s]
+Running loglikelihood requests:  94%|█████████▎| 109766/117208 [04:45<00:16, 443.51it/s]
+Running loglikelihood requests:  94%|█████████▎| 109844/117208 [04:45<00:15, 480.85it/s]
+Running loglikelihood requests:  94%|█████████▍| 109940/117208 [04:46<00:13, 544.38it/s]
+Running loglikelihood requests:  94%|█████████▍| 110003/117208 [04:46<00:13, 523.60it/s]
+Running loglikelihood requests:  94%|█████████▍| 110099/117208 [04:46<00:12, 576.69it/s]
+Running loglikelihood requests:  94%|█████████▍| 110195/117208 [04:46<00:11, 613.99it/s]
+Running loglikelihood requests:  94%|█████████▍| 110257/117208 [04:46<00:13, 499.31it/s]
+Running loglikelihood requests:  94%|█████████▍| 110310/117208 [04:46<00:16, 417.79it/s]
+Running loglikelihood requests:  94%|█████████▍| 110355/117208 [04:47<00:17, 398.45it/s]
+Running loglikelihood requests:  94%|█████████▍| 110442/117208 [04:47<00:14, 463.46it/s]
+Running loglikelihood requests:  94%|█████████▍| 110535/117208 [04:47<00:12, 524.10it/s]
+Running loglikelihood requests:  94%|█████████▍| 110598/117208 [04:47<00:12, 509.70it/s]
+Running loglikelihood requests:  94%|█████████▍| 110694/117208 [04:47<00:11, 565.84it/s]
+Running loglikelihood requests:  95%|█████████▍| 110781/117208 [04:47<00:10, 589.39it/s]
+Running loglikelihood requests:  95%|█████████▍| 110841/117208 [04:47<00:13, 481.44it/s]
+Running loglikelihood requests:  95%|█████████▍| 110892/117208 [04:48<00:15, 404.83it/s]
+Running loglikelihood requests:  95%|█████████▍| 110967/117208 [04:48<00:14, 443.83it/s]
+Running loglikelihood requests:  95%|█████████▍| 111051/117208 [04:48<00:12, 493.92it/s]
+Running loglikelihood requests:  95%|█████████▍| 111126/117208 [04:48<00:11, 536.16it/s]
+Running loglikelihood requests:  95%|█████████▍| 111198/117208 [04:48<00:10, 564.09it/s]
+Running loglikelihood requests:  95%|█████████▍| 111257/117208 [04:48<00:12, 492.19it/s]
+Running loglikelihood requests:  95%|█████████▍| 111309/117208 [04:48<00:13, 435.36it/s]
+Running loglikelihood requests:  95%|█████████▌| 111383/117208 [04:49<00:11, 487.65it/s]
+Running loglikelihood requests:  95%|█████████▌| 111458/117208 [04:49<00:10, 532.76it/s]
+Running loglikelihood requests:  95%|█████████▌| 111514/117208 [04:49<00:10, 525.57it/s]
+Running loglikelihood requests:  95%|█████████▌| 111569/117208 [04:49<00:13, 411.04it/s]
+Running loglikelihood requests:  95%|█████████▌| 111615/117208 [04:49<00:13, 412.35it/s]
+Running loglikelihood requests:  95%|█████████▌| 111666/117208 [04:49<00:13, 424.15it/s]
+Running loglikelihood requests:  95%|█████████▌| 111717/117208 [04:49<00:12, 432.64it/s]
+Running loglikelihood requests:  95%|█████████▌| 111792/117208 [04:49<00:10, 494.05it/s]
+Running loglikelihood requests:  95%|█████████▌| 111844/117208 [04:50<00:13, 384.41it/s]
+Running loglikelihood requests:  95%|█████████▌| 111887/117208 [04:50<00:13, 385.50it/s]
+Running loglikelihood requests:  96%|█████████▌| 111944/117208 [04:50<00:12, 420.13it/s]
+Running loglikelihood requests:  96%|█████████▌| 112010/117208 [04:50<00:11, 465.74it/s]
+Running loglikelihood requests:  96%|█████████▌| 112060/117208 [04:50<00:11, 463.25it/s]
+Running loglikelihood requests:  96%|█████████▌| 112109/117208 [04:50<00:12, 405.77it/s]
+Running loglikelihood requests:  96%|█████████▌| 112152/117208 [04:50<00:14, 359.83it/s]
+Running loglikelihood requests:  96%|█████████▌| 112198/117208 [04:51<00:13, 376.34it/s]
+Running loglikelihood requests:  96%|█████████▌| 112261/117208 [04:51<00:11, 427.01it/s]
+Running loglikelihood requests:  96%|█████████▌| 112306/117208 [04:51<00:13, 374.20it/s]
+Running loglikelihood requests:  96%|█████████▌| 112346/117208 [04:51<00:14, 333.38it/s]
+Running loglikelihood requests:  96%|█████████▌| 112382/117208 [04:51<00:14, 333.98it/s]
+Running loglikelihood requests:  96%|█████████▌| 112426/117208 [04:51<00:13, 352.68it/s]
+Running loglikelihood requests:  96%|█████████▌| 112471/117208 [04:51<00:12, 368.99it/s]
+Running loglikelihood requests:  96%|█████████▌| 112509/117208 [04:51<00:12, 364.70it/s]
+Running loglikelihood requests:  96%|█████████▌| 112547/117208 [04:52<00:14, 318.84it/s]
+Running loglikelihood requests:  96%|█████████▌| 112581/117208 [04:52<00:14, 318.70it/s]
+Running loglikelihood requests:  96%|█████████▌| 112614/117208 [04:52<00:14, 316.32it/s]
+Running loglikelihood requests:  96%|█████████▌| 112667/117208 [04:52<00:12, 362.63it/s]
+Running loglikelihood requests:  96%|█████████▌| 112704/117208 [04:52<00:12, 358.02it/s]
+Running loglikelihood requests:  96%|█████████▌| 112741/117208 [04:52<00:12, 354.37it/s]
+Running loglikelihood requests:  96%|█████████▌| 112777/117208 [04:52<00:14, 306.19it/s]
+Running loglikelihood requests:  96%|█████████▌| 112809/117208 [04:52<00:14, 304.24it/s]
+Running loglikelihood requests:  96%|█████████▋| 112841/117208 [04:53<00:16, 267.79it/s]
+Running loglikelihood requests:  96%|█████████▋| 112879/117208 [04:53<00:14, 290.22it/s]
+Running loglikelihood requests:  96%|█████████▋| 112918/117208 [04:53<00:13, 309.82it/s]
+Running loglikelihood requests:  96%|█████████▋| 112951/117208 [04:53<00:13, 309.66it/s]
+Running loglikelihood requests:  96%|█████████▋| 112983/117208 [04:53<00:13, 305.91it/s]
+Running loglikelihood requests:  96%|█████████▋| 113015/117208 [04:53<00:15, 267.07it/s]
+Running loglikelihood requests:  96%|█████████▋| 113043/117208 [04:53<00:15, 266.18it/s]
+Running loglikelihood requests:  96%|█████████▋| 113077/117208 [04:53<00:14, 278.90it/s]
+Running loglikelihood requests:  97%|█████████▋| 113106/117208 [04:53<00:14, 274.05it/s]
+Running loglikelihood requests:  97%|█████████▋| 113134/117208 [04:54<00:15, 268.39it/s]
+Running loglikelihood requests:  97%|█████████▋| 113162/117208 [04:54<00:15, 264.01it/s]
+Running loglikelihood requests:  97%|█████████▋| 113189/117208 [04:54<00:15, 258.80it/s]
+Running loglikelihood requests:  97%|█████████▋| 113215/117208 [04:54<00:15, 252.51it/s]
+Running loglikelihood requests:  97%|█████████▋| 113254/117208 [04:54<00:13, 283.52it/s]
+Running loglikelihood requests:  97%|█████████▋| 113283/117208 [04:54<00:14, 277.86it/s]
+Running loglikelihood requests:  97%|█████████▋| 113311/117208 [04:54<00:14, 271.82it/s]
+Running loglikelihood requests:  97%|█████████▋| 113339/117208 [04:54<00:14, 267.49it/s]
+Running loglikelihood requests:  97%|█████████▋| 113366/117208 [04:54<00:14, 261.80it/s]
+Running loglikelihood requests:  97%|█████████▋| 113402/117208 [04:55<00:13, 284.27it/s]
+Running loglikelihood requests:  97%|█████████▋| 113431/117208 [04:55<00:13, 282.75it/s]
+Running loglikelihood requests:  97%|█████████▋| 113460/117208 [04:55<00:13, 281.74it/s]
+Running loglikelihood requests:  97%|█████████▋| 113489/117208 [04:55<00:13, 280.71it/s]
+Running loglikelihood requests:  97%|█████████▋| 113518/117208 [04:55<00:13, 280.25it/s]
+Running loglikelihood requests:  97%|█████████▋| 113547/117208 [04:55<00:13, 279.83it/s]
+Running loglikelihood requests:  97%|█████████▋| 113575/117208 [04:55<00:13, 275.72it/s]
+Running loglikelihood requests:  97%|█████████▋| 113603/117208 [04:55<00:13, 273.02it/s]
+Running loglikelihood requests:  97%|█████████▋| 113631/117208 [04:55<00:13, 271.29it/s]
+Running loglikelihood requests:  97%|█████████▋| 113659/117208 [04:55<00:13, 269.97it/s]
+Running loglikelihood requests:  97%|█████████▋| 113686/117208 [04:56<00:13, 264.20it/s]
+Running loglikelihood requests:  97%|█████████▋| 113713/117208 [04:56<00:13, 260.67it/s]
+Running loglikelihood requests:  97%|█████████▋| 113740/117208 [04:56<00:13, 259.39it/s]
+Running loglikelihood requests:  97%|█████████▋| 113766/117208 [04:56<00:13, 255.92it/s]
+Running loglikelihood requests:  97%|█████████▋| 113792/117208 [04:56<00:13, 253.37it/s]
+Running loglikelihood requests:  97%|█████████▋| 113818/117208 [04:56<00:13, 251.16it/s]
+Running loglikelihood requests:  97%|█████████▋| 113844/117208 [04:56<00:13, 250.32it/s]
+Running loglikelihood requests:  97%|█████████▋| 113870/117208 [04:56<00:13, 249.79it/s]
+Running loglikelihood requests:  97%|█████████▋| 113895/117208 [04:56<00:13, 246.65it/s]
+Running loglikelihood requests:  97%|█████████▋| 113921/117208 [04:57<00:13, 244.15it/s]
+Running loglikelihood requests:  97%|█████████▋| 113949/117208 [04:57<00:12, 251.22it/s]
+Running loglikelihood requests:  97%|█████████▋| 113977/117208 [04:57<00:12, 256.49it/s]
+Running loglikelihood requests:  97%|█████████▋| 114006/117208 [04:57<00:12, 263.04it/s]
+Running loglikelihood requests:  97%|█████████▋| 114033/117208 [04:57<00:12, 261.98it/s]
+Running loglikelihood requests:  97%|█████████▋| 114060/117208 [04:57<00:12, 260.79it/s]
+Running loglikelihood requests:  97%|█████████▋| 114087/117208 [04:57<00:11, 260.11it/s]
+Running loglikelihood requests:  97%|█████████▋| 114114/117208 [04:57<00:11, 259.84it/s]
+Running loglikelihood requests:  97%|█████████▋| 114140/117208 [04:57<00:11, 256.38it/s]
+Running loglikelihood requests:  97%|█████████▋| 114166/117208 [04:57<00:11, 254.92it/s]
+Running loglikelihood requests:  97%|█████████▋| 114192/117208 [04:58<00:11, 253.65it/s]
+Running loglikelihood requests:  97%|█████████▋| 114218/117208 [04:58<00:11, 253.14it/s]
+Running loglikelihood requests:  97%|█████████▋| 114244/117208 [04:58<00:11, 252.60it/s]
+Running loglikelihood requests:  97%|█████████▋| 114270/117208 [04:58<00:13, 220.65it/s]
+Running loglikelihood requests:  98%|█████████▊| 114293/117208 [04:58<00:13, 220.60it/s]
+Running loglikelihood requests:  98%|█████████▊| 114317/117208 [04:58<00:12, 223.76it/s]
+Running loglikelihood requests:  98%|█████████▊| 114341/117208 [04:58<00:12, 225.86it/s]
+Running loglikelihood requests:  98%|█████████▊| 114368/117208 [04:58<00:12, 235.66it/s]
+Running loglikelihood requests:  98%|█████████▊| 114392/117208 [04:58<00:12, 234.57it/s]
+Running loglikelihood requests:  98%|█████████▊| 114416/117208 [04:59<00:11, 233.77it/s]
+Running loglikelihood requests:  98%|█████████▊| 114440/117208 [04:59<00:11, 233.57it/s]
+Running loglikelihood requests:  98%|█████████▊| 114464/117208 [04:59<00:11, 233.30it/s]
+Running loglikelihood requests:  98%|█████████▊| 114488/117208 [04:59<00:11, 233.01it/s]
+Running loglikelihood requests:  98%|█████████▊| 114512/117208 [04:59<00:11, 232.33it/s]
+Running loglikelihood requests:  98%|█████████▊| 114540/117208 [04:59<00:10, 243.50it/s]
+Running loglikelihood requests:  98%|█████████▊| 114565/117208 [04:59<00:10, 243.00it/s]
+Running loglikelihood requests:  98%|█████████▊| 114594/117208 [04:59<00:10, 254.31it/s]
+Running loglikelihood requests:  98%|█████████▊| 114620/117208 [04:59<00:10, 253.73it/s]
+Running loglikelihood requests:  98%|█████████▊| 114646/117208 [04:59<00:10, 253.26it/s]
+Running loglikelihood requests:  98%|█████████▊| 114672/117208 [05:00<00:10, 252.32it/s]
+Running loglikelihood requests:  98%|█████████▊| 114698/117208 [05:00<00:09, 252.05it/s]
+Running loglikelihood requests:  98%|█████████▊| 114724/117208 [05:00<00:09, 251.47it/s]
+Running loglikelihood requests:  98%|█████████▊| 114750/117208 [05:00<00:09, 251.81it/s]
+Running loglikelihood requests:  98%|█████████▊| 114776/117208 [05:00<00:09, 251.84it/s]
+Running loglikelihood requests:  98%|█████████▊| 114802/117208 [05:00<00:09, 252.10it/s]
+Running loglikelihood requests:  98%|█████████▊| 114828/117208 [05:00<00:09, 252.13it/s]
+Running loglikelihood requests:  98%|█████████▊| 114854/117208 [05:00<00:09, 252.29it/s]
+Running loglikelihood requests:  98%|█████████▊| 114880/117208 [05:00<00:10, 220.93it/s]
+Running loglikelihood requests:  98%|█████████▊| 114904/117208 [05:01<00:10, 225.37it/s]
+Running loglikelihood requests:  98%|█████████▊| 114928/117208 [05:01<00:09, 228.46it/s]
+Running loglikelihood requests:  98%|█████████▊| 114961/117208 [05:01<00:08, 255.31it/s]
+Running loglikelihood requests:  98%|█████████▊| 114987/117208 [05:01<00:08, 254.84it/s]
+Running loglikelihood requests:  98%|█████████▊| 115013/117208 [05:01<00:08, 254.93it/s]
+Running loglikelihood requests:  98%|█████████▊| 115039/117208 [05:01<00:08, 255.38it/s]
+Running loglikelihood requests:  98%|█████████▊| 115065/117208 [05:01<00:08, 255.92it/s]
+Running loglikelihood requests:  98%|█████████▊| 115091/117208 [05:01<00:08, 256.35it/s]
+Running loglikelihood requests:  98%|█████████▊| 115117/117208 [05:01<00:08, 256.69it/s]
+Running loglikelihood requests:  98%|█████████▊| 115143/117208 [05:01<00:08, 256.91it/s]
+Running loglikelihood requests:  98%|█████████▊| 115169/117208 [05:02<00:07, 256.14it/s]
+Running loglikelihood requests:  98%|█████████▊| 115195/117208 [05:02<00:09, 222.76it/s]
+Running loglikelihood requests:  98%|█████████▊| 115219/117208 [05:02<00:08, 225.57it/s]
+Running loglikelihood requests:  98%|█████████▊| 115249/117208 [05:02<00:08, 243.09it/s]
+Running loglikelihood requests:  98%|█████████▊| 115279/117208 [05:02<00:07, 256.05it/s]
+Running loglikelihood requests:  98%|█████████▊| 115306/117208 [05:02<00:07, 257.75it/s]
+Running loglikelihood requests:  98%|█████████▊| 115334/117208 [05:02<00:07, 260.97it/s]
+Running loglikelihood requests:  98%|█████████▊| 115361/117208 [05:02<00:07, 260.89it/s]
+Running loglikelihood requests:  98%|█████████▊| 115388/117208 [05:02<00:06, 261.20it/s]
+Running loglikelihood requests:  98%|█████████▊| 115415/117208 [05:03<00:06, 261.35it/s]
+Running loglikelihood requests:  98%|█████████▊| 115442/117208 [05:03<00:07, 228.84it/s]
+Running loglikelihood requests:  99%|█████████▊| 115469/117208 [05:03<00:07, 237.79it/s]
+Running loglikelihood requests:  99%|█████████▊| 115494/117208 [05:03<00:07, 239.43it/s]
+Running loglikelihood requests:  99%|█████████▊| 115521/117208 [05:03<00:06, 245.84it/s]
+Running loglikelihood requests:  99%|█████████▊| 115546/117208 [05:03<00:06, 244.79it/s]
+Running loglikelihood requests:  99%|█████████▊| 115571/117208 [05:03<00:06, 244.09it/s]
+Running loglikelihood requests:  99%|█████████▊| 115596/117208 [05:03<00:06, 243.50it/s]
+Running loglikelihood requests:  99%|█████████▊| 115621/117208 [05:03<00:06, 243.40it/s]
+Running loglikelihood requests:  99%|█████████▊| 115646/117208 [05:04<00:06, 243.14it/s]
+Running loglikelihood requests:  99%|█████████▊| 115671/117208 [05:04<00:06, 241.53it/s]
+Running loglikelihood requests:  99%|█████████▊| 115696/117208 [05:04<00:06, 242.97it/s]
+Running loglikelihood requests:  99%|█████████▊| 115721/117208 [05:04<00:06, 243.93it/s]
+Running loglikelihood requests:  99%|█████████▉| 115746/117208 [05:04<00:05, 244.71it/s]
+Running loglikelihood requests:  99%|█████████▉| 115776/117208 [05:04<00:05, 259.58it/s]
+Running loglikelihood requests:  99%|█████████▉| 115802/117208 [05:04<00:05, 258.63it/s]
+Running loglikelihood requests:  99%|█████████▉| 115833/117208 [05:04<00:05, 271.62it/s]
+Running loglikelihood requests:  99%|█████████▉| 115861/117208 [05:04<00:04, 271.85it/s]
+Running loglikelihood requests:  99%|█████████▉| 115889/117208 [05:04<00:04, 272.76it/s]
+Running loglikelihood requests:  99%|█████████▉| 115917/117208 [05:05<00:05, 238.92it/s]
+Running loglikelihood requests:  99%|█████████▉| 115942/117208 [05:05<00:05, 241.15it/s]
+Running loglikelihood requests:  99%|█████████▉| 115967/117208 [05:05<00:05, 242.45it/s]
+Running loglikelihood requests:  99%|█████████▉| 115992/117208 [05:05<00:04, 243.26it/s]
+Running loglikelihood requests:  99%|█████████▉| 116022/117208 [05:05<00:04, 257.84it/s]
+Running loglikelihood requests:  99%|█████████▉| 116050/117208 [05:05<00:04, 263.08it/s]
+Running loglikelihood requests:  99%|█████████▉| 116077/117208 [05:05<00:04, 263.96it/s]
+Running loglikelihood requests:  99%|█████████▉| 116104/117208 [05:05<00:04, 264.56it/s]
+Running loglikelihood requests:  99%|█████████▉| 116131/117208 [05:05<00:04, 264.91it/s]
+Running loglikelihood requests:  99%|█████████▉| 116158/117208 [05:06<00:04, 231.61it/s]
+Running loglikelihood requests:  99%|█████████▉| 116182/117208 [05:06<00:04, 233.04it/s]
+Running loglikelihood requests:  99%|█████████▉| 116206/117208 [05:06<00:04, 234.14it/s]
+Running loglikelihood requests:  99%|█████████▉| 116230/117208 [05:06<00:04, 235.19it/s]
+Running loglikelihood requests:  99%|█████████▉| 116257/117208 [05:06<00:03, 244.22it/s]
+Running loglikelihood requests:  99%|█████████▉| 116282/117208 [05:06<00:03, 245.02it/s]
+Running loglikelihood requests:  99%|█████████▉| 116307/117208 [05:06<00:03, 245.74it/s]
+Running loglikelihood requests:  99%|█████████▉| 116334/117208 [05:06<00:03, 251.38it/s]
+Running loglikelihood requests:  99%|█████████▉| 116360/117208 [05:06<00:03, 252.35it/s]
+Running loglikelihood requests:  99%|█████████▉| 116386/117208 [05:06<00:03, 253.69it/s]
+Running loglikelihood requests:  99%|█████████▉| 116412/117208 [05:07<00:03, 251.18it/s]
+Running loglikelihood requests:  99%|█████████▉| 116438/117208 [05:07<00:03, 252.22it/s]
+Running loglikelihood requests:  99%|█████████▉| 116464/117208 [05:07<00:02, 252.92it/s]
+Running loglikelihood requests:  99%|█████████▉| 116490/117208 [05:07<00:02, 253.93it/s]
+Running loglikelihood requests:  99%|█████████▉| 116516/117208 [05:07<00:02, 254.77it/s]
+Running loglikelihood requests:  99%|█████████▉| 116542/117208 [05:07<00:02, 255.14it/s]
+Running loglikelihood requests:  99%|█████████▉| 116568/117208 [05:07<00:02, 255.65it/s]
+Running loglikelihood requests:  99%|█████████▉| 116594/117208 [05:07<00:02, 256.13it/s]
+Running loglikelihood requests:  99%|█████████▉| 116620/117208 [05:07<00:02, 255.87it/s]
+Running loglikelihood requests: 100%|█████████▉| 116646/117208 [05:07<00:02, 255.45it/s]
+Running loglikelihood requests: 100%|█████████▉| 116672/117208 [05:08<00:02, 254.90it/s]
+Running loglikelihood requests: 100%|█████████▉| 116698/117208 [05:08<00:02, 223.25it/s]
+Running loglikelihood requests: 100%|█████████▉| 116725/117208 [05:08<00:02, 234.69it/s]
+Running loglikelihood requests: 100%|█████████▉| 116752/117208 [05:08<00:01, 242.81it/s]
+Running loglikelihood requests: 100%|█████████▉| 116779/117208 [05:08<00:01, 248.71it/s]
+Running loglikelihood requests: 100%|█████████▉| 116805/117208 [05:08<00:01, 250.73it/s]
+Running loglikelihood requests: 100%|█████████▉| 116831/117208 [05:08<00:01, 251.88it/s]
+Running loglikelihood requests: 100%|█████████▉| 116857/117208 [05:08<00:01, 254.03it/s]
+Running loglikelihood requests: 100%|█████████▉| 116888/117208 [05:08<00:01, 270.35it/s]
+Running loglikelihood requests: 100%|█████████▉| 116918/117208 [05:09<00:01, 279.10it/s]
+Running loglikelihood requests: 100%|█████████▉| 116947/117208 [05:09<00:00, 282.07it/s]
+Running loglikelihood requests: 100%|█████████▉| 116976/117208 [05:09<00:00, 247.81it/s]
+Running loglikelihood requests: 100%|█████████▉| 117002/117208 [05:09<00:00, 250.40it/s]
+Running loglikelihood requests: 100%|█████████▉| 117028/117208 [05:09<00:00, 251.96it/s]
+Running loglikelihood requests: 100%|█████████▉| 117061/117208 [05:09<00:00, 272.27it/s]
+Running loglikelihood requests: 100%|█████████▉| 117096/117208 [05:09<00:00, 292.55it/s]
+Running loglikelihood requests: 100%|█████████▉| 117126/117208 [05:09<00:00, 291.44it/s]
+Running loglikelihood requests: 100%|█████████▉| 117164/117208 [05:09<00:00, 314.21it/s]
+Running loglikelihood requests: 100%|█████████▉| 117203/117208 [05:10<00:00, 279.20it/s]
+Running loglikelihood requests: 100%|██████████| 117208/117208 [05:10<00:00, 377.96it/s]
+hf ({'pretrained': 'unsloth/Qwen3-4B-Base'}), gen_kwargs: ({}), limit: 400.0, num_fewshot: None, batch_size: 12
+|                       Tasks                        |Version|Filter|n-shot| Metric |   |Value |   |Stderr|
+|----------------------------------------------------|------:|------|-----:|--------|---|-----:|---|------|
+|arc_challenge                                       |      1|none  |     0|acc     |↑  |0.4525|±  |0.0249|
+|                                                    |       |none  |     0|acc_norm|↑  |0.4975|±  |0.0250|
+|arc_easy                                            |      1|none  |     0|acc     |↑  |0.7625|±  |0.0213|
+|                                                    |       |none  |     0|acc_norm|↑  |0.7550|±  |0.0215|
+|hellaswag                                           |      1|none  |     0|acc     |↑  |0.4925|±  |0.0250|
+|                                                    |       |none  |     0|acc_norm|↑  |0.6225|±  |0.0243|
+|kmmlu                                               |      2|none  |      |acc     |↑  |0.4756|±  |0.0039|
+| - kmmlu_applied_science                            |      2|none  |      |acc     |↑  |0.4587|±  |0.0071|
+|  - kmmlu_aviation_engineering_and_maintenance      |      2|none  |     0|acc     |↑  |0.4475|±  |0.0249|
+|  - kmmlu_electronics_engineering                   |      2|none  |     0|acc     |↑  |0.6500|±  |0.0239|
+|  - kmmlu_energy_management                         |      2|none  |     0|acc     |↑  |0.4000|±  |0.0245|
+|  - kmmlu_environmental_science                     |      2|none  |     0|acc     |↑  |0.3875|±  |0.0244|
+|  - kmmlu_gas_technology_and_engineering            |      2|none  |     0|acc     |↑  |0.3775|±  |0.0243|
+|  - kmmlu_geomatics                                 |      2|none  |     0|acc     |↑  |0.4325|±  |0.0248|
+|  - kmmlu_industrial_engineer                       |      2|none  |     0|acc     |↑  |0.4275|±  |0.0248|
+|  - kmmlu_machine_design_and_manufacturing          |      2|none  |     0|acc     |↑  |0.5200|±  |0.0250|
+|  - kmmlu_maritime_engineering                      |      2|none  |     0|acc     |↑  |0.4050|±  |0.0246|
+|  - kmmlu_nondestructive_testing                    |      2|none  |     0|acc     |↑  |0.4825|±  |0.0250|
+|  - kmmlu_railway_and_automotive_engineering        |      2|none  |     0|acc     |↑  |0.3875|±  |0.0244|
+|  - kmmlu_telecommunications_and_wireless_technology|      2|none  |     0|acc     |↑  |0.5875|±  |0.0246|
+| - kmmlu_humss                                      |      2|none  |      |acc     |↑  |0.4806|±  |0.0094|
+|  - kmmlu_accounting                                |      2|none  |     0|acc     |↑  |0.4900|±  |0.0502|
+|  - kmmlu_criminal_law                              |      2|none  |     0|acc     |↑  |0.3900|±  |0.0346|
+|  - kmmlu_economics                                 |      2|none  |     0|acc     |↑  |0.5615|±  |0.0437|
+|  - kmmlu_education                                 |      2|none  |     0|acc     |↑  |0.6500|±  |0.0479|
+|  - kmmlu_korean_history                            |      2|none  |     0|acc     |↑  |0.2400|±  |0.0429|
+|  - kmmlu_law                                       |      2|none  |     0|acc     |↑  |0.3875|±  |0.0244|
+|  - kmmlu_management                                |      2|none  |     0|acc     |↑  |0.5300|±  |0.0250|
+|  - kmmlu_political_science_and_sociology           |      2|none  |     0|acc     |↑  |0.5467|±  |0.0288|
+|  - kmmlu_psychology                                |      2|none  |     0|acc     |↑  |0.4275|±  |0.0248|
+|  - kmmlu_social_welfare                            |      2|none  |     0|acc     |↑  |0.5850|±  |0.0247|
+|  - kmmlu_taxation                                  |      2|none  |     0|acc     |↑  |0.4350|±  |0.0351|
+| - kmmlu_other                                      |      2|none  |      |acc     |↑  |0.4772|±  |0.0081|
+|  - kmmlu_agricultural_sciences                     |      2|none  |     0|acc     |↑  |0.3625|±  |0.0241|
+|  - kmmlu_construction                              |      2|none  |     0|acc     |↑  |0.3925|±  |0.0244|
+|  - kmmlu_fashion                                   |      2|none  |     0|acc     |↑  |0.4575|±  |0.0249|
+|  - kmmlu_food_processing                           |      2|none  |     0|acc     |↑  |0.3900|±  |0.0244|
+|  - kmmlu_health                                    |      2|none  |     0|acc     |↑  |0.6300|±  |0.0485|
+|  - kmmlu_interior_architecture_and_design          |      2|none  |     0|acc     |↑  |0.6025|±  |0.0245|
+|  - kmmlu_marketing                                 |      2|none  |     0|acc     |↑  |0.7600|±  |0.0214|
+|  - kmmlu_patent                                    |      2|none  |     0|acc     |↑  |0.4600|±  |0.0501|
+|  - kmmlu_public_safety                             |      2|none  |     0|acc     |↑  |0.4025|±  |0.0246|
+|  - kmmlu_real_estate                               |      2|none  |     0|acc     |↑  |0.4850|±  |0.0354|
+|  - kmmlu_refrigerating_machinery                   |      2|none  |     0|acc     |↑  |0.4125|±  |0.0246|
+| - kmmlu_stem                                       |      2|none  |      |acc     |↑  |0.4898|±  |0.0073|
+|  - kmmlu_biology                                   |      2|none  |     0|acc     |↑  |0.3225|±  |0.0234|
+|  - kmmlu_chemical_engineering                      |      2|none  |     0|acc     |↑  |0.4875|±  |0.0250|
+|  - kmmlu_chemistry                                 |      2|none  |     0|acc     |↑  |0.5175|±  |0.0250|
+|  - kmmlu_civil_engineering                         |      2|none  |     0|acc     |↑  |0.3825|±  |0.0243|
+|  - kmmlu_computer_science                          |      2|none  |     0|acc     |↑  |0.7500|±  |0.0217|
+|  - kmmlu_ecology                                   |      2|none  |     0|acc     |↑  |0.5425|±  |0.0249|
+|  - kmmlu_electrical_engineering                    |      2|none  |     0|acc     |↑  |0.3550|±  |0.0240|
+|  - kmmlu_information_technology                    |      2|none  |     0|acc     |↑  |0.7500|±  |0.0217|
+|  - kmmlu_materials_engineering                     |      2|none  |     0|acc     |↑  |0.4950|±  |0.0250|
+|  - kmmlu_math                                      |      2|none  |     0|acc     |↑  |0.3333|±  |0.0273|
+|  - kmmlu_mechanical_engineering                    |      2|none  |     0|acc     |↑  |0.4125|±  |0.0246|
+|kobest_boolq                                        |      1|none  |     0|acc     |↑  |0.6675|±  |0.0236|
+|                                                    |       |none  |     0|f1      |↑  |0.6248|±  |   N/A|
+|kobest_copa                                         |      1|none  |     0|acc     |↑  |0.6475|±  |0.0239|
+|                                                    |       |none  |     0|f1      |↑  |0.6474|±  |   N/A|
+|kobest_hellaswag                                    |      1|none  |     0|acc     |↑  |0.4400|±  |0.0249|
+|                                                    |       |none  |     0|acc_norm|↑  |0.5825|±  |0.0247|
+|                                                    |       |none  |     0|f1      |↑  |0.4329|±  |   N/A|
+|mmlu                                                |      2|none  |      |acc     |↑  |0.7404|±  |0.0039|
+| - humanities                                       |      2|none  |      |acc     |↑  |0.6931|±  |0.0078|
+|  - formal_logic                                    |      1|none  |     0|acc     |↑  |0.5794|±  |0.0442|
+|  - high_school_european_history                    |      1|none  |     0|acc     |↑  |0.7818|±  |0.0323|
+|  - high_school_us_history                          |      1|none  |     0|acc     |↑  |0.8284|±  |0.0265|
+|  - high_school_world_history                       |      1|none  |     0|acc     |↑  |0.8439|±  |0.0236|
+|  - international_law                               |      1|none  |     0|acc     |↑  |0.8017|±  |0.0364|
+|  - jurisprudence                                   |      1|none  |     0|acc     |↑  |0.7963|±  |0.0389|
+|  - logical_fallacies                               |      1|none  |     0|acc     |↑  |0.8405|±  |0.0288|
+|  - moral_disputes                                  |      1|none  |     0|acc     |↑  |0.7543|±  |0.0232|
+|  - moral_scenarios                                 |      1|none  |     0|acc     |↑  |0.3475|±  |0.0238|
+|  - philosophy                                      |      1|none  |     0|acc     |↑  |0.7588|±  |0.0243|
+|  - prehistory                                      |      1|none  |     0|acc     |↑  |0.7870|±  |0.0228|
+|  - professional_law                                |      1|none  |     0|acc     |↑  |0.5300|±  |0.0250|
+|  - world_religions                                 |      1|none  |     0|acc     |↑  |0.8070|±  |0.0303|
+| - other                                            |      2|none  |      |acc     |↑  |0.7438|±  |0.0081|
+|  - business_ethics                                 |      1|none  |     0|acc     |↑  |0.7500|±  |0.0435|
+|  - clinical_knowledge                              |      1|none  |     0|acc     |↑  |0.7774|±  |0.0256|
+|  - college_medicine                                |      1|none  |     0|acc     |↑  |0.7341|±  |0.0337|
+|  - global_facts                                    |      1|none  |     0|acc     |↑  |0.4300|±  |0.0498|
+|  - human_aging                                     |      1|none  |     0|acc     |↑  |0.7489|±  |0.0291|
+|  - management                                      |      1|none  |     0|acc     |↑  |0.8932|±  |0.0306|
+|  - marketing                                       |      1|none  |     0|acc     |↑  |0.9145|±  |0.0183|
+|  - medical_genetics                                |      1|none  |     0|acc     |↑  |0.8000|±  |0.0402|
+|  - miscellaneous                                   |      1|none  |     0|acc     |↑  |0.8200|±  |0.0192|
+|  - nutrition                                       |      1|none  |     0|acc     |↑  |0.7745|±  |0.0239|
+|  - professional_accounting                         |      1|none  |     0|acc     |↑  |0.5709|±  |0.0295|
+|  - professional_medicine                           |      1|none  |     0|acc     |↑  |0.7757|±  |0.0253|
+|  - virology                                        |      1|none  |     0|acc     |↑  |0.5120|±  |0.0389|
+| - social sciences                                  |      2|none  |      |acc     |↑  |0.8202|±  |0.0072|
+|  - econometrics                                    |      1|none  |     0|acc     |↑  |0.6579|±  |0.0446|
+|  - high_school_geography                           |      1|none  |     0|acc     |↑  |0.8737|±  |0.0237|
+|  - high_school_government_and_politics             |      1|none  |     0|acc     |↑  |0.8756|±  |0.0238|
+|  - high_school_macroeconomics                      |      1|none  |     0|acc     |↑  |0.8077|±  |0.0200|
+|  - high_school_microeconomics                      |      1|none  |     0|acc     |↑  |0.8992|±  |0.0196|
+|  - high_school_psychology                          |      1|none  |     0|acc     |↑  |0.9025|±  |0.0149|
+|  - human_sexuality                                 |      1|none  |     0|acc     |↑  |0.7863|±  |0.0360|
+|  - professional_psychology                         |      1|none  |     0|acc     |↑  |0.7475|±  |0.0217|
+|  - public_relations                                |      1|none  |     0|acc     |↑  |0.6818|±  |0.0446|
+|  - security_studies                                |      1|none  |     0|acc     |↑  |0.7673|±  |0.0270|
+|  - sociology                                       |      1|none  |     0|acc     |↑  |0.8458|±  |0.0255|
+|  - us_foreign_policy                               |      1|none  |     0|acc     |↑  |0.8900|±  |0.0314|
+| - stem                                             |      2|none  |      |acc     |↑  |0.7149|±  |0.0078|
+|  - abstract_algebra                                |      1|none  |     0|acc     |↑  |0.4700|±  |0.0502|
+|  - anatomy                                         |      1|none  |     0|acc     |↑  |0.6889|±  |0.0400|
+|  - astronomy                                       |      1|none  |     0|acc     |↑  |0.8421|±  |0.0297|
+|  - college_biology                                 |      1|none  |     0|acc     |↑  |0.8403|±  |0.0306|
+|  - college_chemistry                               |      1|none  |     0|acc     |↑  |0.5600|±  |0.0499|
+|  - college_computer_science                        |      1|none  |     0|acc     |↑  |0.6600|±  |0.0476|
+|  - college_mathematics                             |      1|none  |     0|acc     |↑  |0.5200|±  |0.0502|
+|  - college_physics                                 |      1|none  |     0|acc     |↑  |0.5686|±  |0.0493|
+|  - computer_security                               |      1|none  |     0|acc     |↑  |0.8300|±  |0.0378|
+|  - conceptual_physics                              |      1|none  |     0|acc     |↑  |0.7957|±  |0.0264|
+|  - electrical_engineering                          |      1|none  |     0|acc     |↑  |0.7517|±  |0.0360|
+|  - elementary_mathematics                          |      1|none  |     0|acc     |↑  |0.7116|±  |0.0233|
+|  - high_school_biology                             |      1|none  |     0|acc     |↑  |0.9161|±  |0.0158|
+|  - high_school_chemistry                           |      1|none  |     0|acc     |↑  |0.7192|±  |0.0316|
+|  - high_school_computer_science                    |      1|none  |     0|acc     |↑  |0.8700|±  |0.0338|
+|  - high_school_mathematics                         |      1|none  |     0|acc     |↑  |0.5222|±  |0.0305|
+|  - high_school_physics                             |      1|none  |     0|acc     |↑  |0.6755|±  |0.0382|
+|  - high_school_statistics                          |      1|none  |     0|acc     |↑  |0.7222|±  |0.0305|
+|  - machine_learning                                |      1|none  |     0|acc     |↑  |0.6161|±  |0.0462|
+|winogrande                                          |      1|none  |     0|acc     |↑  |0.7375|±  |0.0220|
+
+|         Groups         |Version|Filter|n-shot|Metric|   |Value |   |Stderr|
+|------------------------|------:|------|------|------|---|-----:|---|-----:|
+|kmmlu                   |      2|none  |      |acc   |↑  |0.4756|±  |0.0039|
+| - kmmlu_applied_science|      2|none  |      |acc   |↑  |0.4587|±  |0.0071|
+| - kmmlu_humss          |      2|none  |      |acc   |↑  |0.4806|±  |0.0094|
+| - kmmlu_other          |      2|none  |      |acc   |↑  |0.4772|±  |0.0081|
+| - kmmlu_stem           |      2|none  |      |acc   |↑  |0.4898|±  |0.0073|
+|mmlu                    |      2|none  |      |acc   |↑  |0.7404|±  |0.0039|
+| - humanities           |      2|none  |      |acc   |↑  |0.6931|±  |0.0078|
+| - other                |      2|none  |      |acc   |↑  |0.7438|±  |0.0081|
+| - social sciences      |      2|none  |      |acc   |↑  |0.8202|±  |0.0072|
+| - stem                 |      2|none  |      |acc   |↑  |0.7149|±  |0.0078|
+
diff --git a/eval/lm_eval/checkpoints/base/unsloth__Qwen3-4B-Base/results_2026-04-12T03-07-45.868213.json b/eval/lm_eval/checkpoints/base/unsloth__Qwen3-4B-Base/results_2026-04-12T03-07-45.868213.json
new file mode 100644
index 0000000..38f2acc
--- /dev/null
+++ b/eval/lm_eval/checkpoints/base/unsloth__Qwen3-4B-Base/results_2026-04-12T03-07-45.868213.json
@@ -0,0 +1,7825 @@
+{
+  "results": {
+    "arc_challenge": {
+      "alias": "arc_challenge",
+      "acc,none": 0.4525,
+      "acc_stderr,none": 0.024918098926991643,
+      "acc_norm,none": 0.4975,
+      "acc_norm_stderr,none": 0.0250309958227734
+    },
+    "arc_easy": {
+      "alias": "arc_easy",
+      "acc,none": 0.7625,
+      "acc_stderr,none": 0.02130420258115865,
+      "acc_norm,none": 0.755,
+      "acc_norm_stderr,none": 0.02153129097913246
+    },
+    "hellaswag": {
+      "alias": "hellaswag",
+      "acc,none": 0.4925,
+      "acc_stderr,none": 0.025028492535438325,
+      "acc_norm,none": 0.6225,
+      "acc_norm_stderr,none": 0.024268431488608636
+    },
+    "kmmlu": {
+      "acc,none": 0.47556707712248864,
+      "acc_stderr,none": 0.003918983222456166,
+      "alias": "kmmlu"
+    },
+    "kmmlu_applied_science": {
+      "acc,none": 0.45875,
+      "acc_stderr,none": 0.007101063857525891,
+      "alias": " - kmmlu_applied_science"
+    },
+    "kmmlu_aviation_engineering_and_maintenance": {
+      "alias": "  - kmmlu_aviation_engineering_and_maintenance",
+      "acc,none": 0.4475,
+      "acc_stderr,none": 0.024892941194307603
+    },
+    "kmmlu_electronics_engineering": {
+      "alias": "  - kmmlu_electronics_engineering",
+      "acc,none": 0.65,
+      "acc_stderr,none": 0.023878346647046
+    },
+    "kmmlu_energy_management": {
+      "alias": "  - kmmlu_energy_management",
+      "acc,none": 0.4,
+      "acc_stderr,none": 0.02452557357939856
+    },
+    "kmmlu_environmental_science": {
+      "alias": "  - kmmlu_environmental_science",
+      "acc,none": 0.3875,
+      "acc_stderr,none": 0.02438947500927543
+    },
+    "kmmlu_gas_technology_and_engineering": {
+      "alias": "  - kmmlu_gas_technology_and_engineering",
+      "acc,none": 0.3775,
+      "acc_stderr,none": 0.02426843148860864
+    },
+    "kmmlu_geomatics": {
+      "alias": "  - kmmlu_geomatics",
+      "acc,none": 0.4325,
+      "acc_stderr,none": 0.024802162065186362
+    },
+    "kmmlu_industrial_engineer": {
+      "alias": "  - kmmlu_industrial_engineer",
+      "acc,none": 0.4275,
+      "acc_stderr,none": 0.024766769210836766
+    },
+    "kmmlu_machine_design_and_manufacturing": {
+      "alias": "  - kmmlu_machine_design_and_manufacturing",
+      "acc,none": 0.52,
+      "acc_stderr,none": 0.025011275652681887
+    },
+    "kmmlu_maritime_engineering": {
+      "alias": "  - kmmlu_maritime_engineering",
+      "acc,none": 0.405,
+      "acc_stderr,none": 0.024575340657273674
+    },
+    "kmmlu_nondestructive_testing": {
+      "alias": "  - kmmlu_nondestructive_testing",
+      "acc,none": 0.4825,
+      "acc_stderr,none": 0.025015972341295333
+    },
+    "kmmlu_railway_and_automotive_engineering": {
+      "alias": "  - kmmlu_railway_and_automotive_engineering",
+      "acc,none": 0.3875,
+      "acc_stderr,none": 0.02438947500927542
+    },
+    "kmmlu_telecommunications_and_wireless_technology": {
+      "alias": "  - kmmlu_telecommunications_and_wireless_technology",
+      "acc,none": 0.5875,
+      "acc_stderr,none": 0.024645036407943802
+    },
+    "kmmlu_humss": {
+      "acc,none": 0.4805860805860806,
+      "acc_stderr,none": 0.009419825503999339,
+      "alias": " - kmmlu_humss"
+    },
+    "kmmlu_accounting": {
+      "alias": "  - kmmlu_accounting",
+      "acc,none": 0.49,
+      "acc_stderr,none": 0.05024183937956912
+    },
+    "kmmlu_criminal_law": {
+      "alias": "  - kmmlu_criminal_law",
+      "acc,none": 0.39,
+      "acc_stderr,none": 0.03457567623250011
+    },
+    "kmmlu_economics": {
+      "alias": "  - kmmlu_economics",
+      "acc,none": 0.5615384615384615,
+      "acc_stderr,none": 0.04368784779071991
+    },
+    "kmmlu_education": {
+      "alias": "  - kmmlu_education",
+      "acc,none": 0.65,
+      "acc_stderr,none": 0.047937248544110196
+    },
+    "kmmlu_korean_history": {
+      "alias": "  - kmmlu_korean_history",
+      "acc,none": 0.24,
+      "acc_stderr,none": 0.04292346959909284
+    },
+    "kmmlu_law": {
+      "alias": "  - kmmlu_law",
+      "acc,none": 0.3875,
+      "acc_stderr,none": 0.024389475009275435
+    },
+    "kmmlu_management": {
+      "alias": "  - kmmlu_management",
+      "acc,none": 0.53,
+      "acc_stderr,none": 0.02498621173652297
+    },
+    "kmmlu_political_science_and_sociology": {
+      "alias": "  - kmmlu_political_science_and_sociology",
+      "acc,none": 0.5466666666666666,
+      "acc_stderr,none": 0.028789526978043094
+    },
+    "kmmlu_psychology": {
+      "alias": "  - kmmlu_psychology",
+      "acc,none": 0.4275,
+      "acc_stderr,none": 0.02476676921083677
+    },
+    "kmmlu_social_welfare": {
+      "alias": "  - kmmlu_social_welfare",
+      "acc,none": 0.585,
+      "acc_stderr,none": 0.02466695454685353
+    },
+    "kmmlu_taxation": {
+      "alias": "  - kmmlu_taxation",
+      "acc,none": 0.435,
+      "acc_stderr,none": 0.03514328173714407
+    },
+    "kmmlu_other": {
+      "acc,none": 0.4772222222222222,
+      "acc_stderr,none": 0.008073884461069719,
+      "alias": " - kmmlu_other"
+    },
+    "kmmlu_agricultural_sciences": {
+      "alias": "  - kmmlu_agricultural_sciences",
+      "acc,none": 0.3625,
+      "acc_stderr,none": 0.024066207238097725
+    },
+    "kmmlu_construction": {
+      "alias": "  - kmmlu_construction",
+      "acc,none": 0.3925,
+      "acc_stderr,none": 0.024445927747963316
+    },
+    "kmmlu_fashion": {
+      "alias": "  - kmmlu_fashion",
+      "acc,none": 0.4575,
+      "acc_stderr,none": 0.024940719189394073
+    },
+    "kmmlu_food_processing": {
+      "alias": "  - kmmlu_food_processing",
+      "acc,none": 0.39,
+      "acc_stderr,none": 0.024418038445046374
+    },
+    "kmmlu_health": {
+      "alias": "  - kmmlu_health",
+      "acc,none": 0.63,
+      "acc_stderr,none": 0.048523658709391
+    },
+    "kmmlu_interior_architecture_and_design": {
+      "alias": "  - kmmlu_interior_architecture_and_design",
+      "acc,none": 0.6025,
+      "acc_stderr,none": 0.024499693108404712
+    },
+    "kmmlu_marketing": {
+      "alias": "  - kmmlu_marketing",
+      "acc,none": 0.76,
+      "acc_stderr,none": 0.021380899352993952
+    },
+    "kmmlu_patent": {
+      "alias": "  - kmmlu_patent",
+      "acc,none": 0.46,
+      "acc_stderr,none": 0.05009082659620332
+    },
+    "kmmlu_public_safety": {
+      "alias": "  - kmmlu_public_safety",
+      "acc,none": 0.4025,
+      "acc_stderr,none": 0.024550788746396206
+    },
+    "kmmlu_real_estate": {
+      "alias": "  - kmmlu_real_estate",
+      "acc,none": 0.485,
+      "acc_stderr,none": 0.03542810683297719
+    },
+    "kmmlu_refrigerating_machinery": {
+      "alias": "  - kmmlu_refrigerating_machinery",
+      "acc,none": 0.4125,
+      "acc_stderr,none": 0.024645036407943802
+    },
+    "kmmlu_stem": {
+      "acc,none": 0.4897674418604651,
+      "acc_stderr,none": 0.007312394370135803,
+      "alias": " - kmmlu_stem"
+    },
+    "kmmlu_biology": {
+      "alias": "  - kmmlu_biology",
+      "acc,none": 0.3225,
+      "acc_stderr,none": 0.023400926978618723
+    },
+    "kmmlu_chemical_engineering": {
+      "alias": "  - kmmlu_chemical_engineering",
+      "acc,none": 0.4875,
+      "acc_stderr,none": 0.025023485209500245
+    },
+    "kmmlu_chemistry": {
+      "alias": "  - kmmlu_chemistry",
+      "acc,none": 0.5175,
+      "acc_stderr,none": 0.02501597234129533
+    },
+    "kmmlu_civil_engineering": {
+      "alias": "  - kmmlu_civil_engineering",
+      "acc,none": 0.3825,
+      "acc_stderr,none": 0.024330316186072946
+    },
+    "kmmlu_computer_science": {
+      "alias": "  - kmmlu_computer_science",
+      "acc,none": 0.75,
+      "acc_stderr,none": 0.021677749238103
+    },
+    "kmmlu_ecology": {
+      "alias": "  - kmmlu_ecology",
+      "acc,none": 0.5425,
+      "acc_stderr,none": 0.024940719189394077
+    },
+    "kmmlu_electrical_engineering": {
+      "alias": "  - kmmlu_electrical_engineering",
+      "acc,none": 0.355,
+      "acc_stderr,none": 0.023955629410456463
+    },
+    "kmmlu_information_technology": {
+      "alias": "  - kmmlu_information_technology",
+      "acc,none": 0.75,
+      "acc_stderr,none": 0.021677749238103
+    },
+    "kmmlu_materials_engineering": {
+      "alias": "  - kmmlu_materials_engineering",
+      "acc,none": 0.495,
+      "acc_stderr,none": 0.025030057119361453
+    },
+    "kmmlu_math": {
+      "alias": "  - kmmlu_math",
+      "acc,none": 0.3333333333333333,
+      "acc_stderr,none": 0.027262027336984396
+    },
+    "kmmlu_mechanical_engineering": {
+      "alias": "  - kmmlu_mechanical_engineering",
+      "acc,none": 0.4125,
+      "acc_stderr,none": 0.024645036407943802
+    },
+    "kobest_boolq": {
+      "alias": "kobest_boolq",
+      "acc,none": 0.6675,
+      "acc_stderr,none": 0.023584952830141535,
+      "f1,none": 0.6247575383530242,
+      "f1_stderr,none": "N/A"
+    },
+    "kobest_copa": {
+      "alias": "kobest_copa",
+      "acc,none": 0.6475,
+      "acc_stderr,none": 0.023917346710791564,
+      "f1,none": 0.6473920138042275,
+      "f1_stderr,none": "N/A"
+    },
+    "kobest_hellaswag": {
+      "alias": "kobest_hellaswag",
+      "acc,none": 0.44,
+      "acc_stderr,none": 0.02485042976789583,
+      "f1,none": 0.4328647077786627,
+      "f1_stderr,none": "N/A",
+      "acc_norm,none": 0.5825,
+      "acc_norm_stderr,none": 0.024688218756390913
+    },
+    "mmlu": {
+      "acc,none": 0.7404266255461321,
+      "acc_stderr,none": 0.003869340083262106,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "acc,none": 0.6931079323797139,
+      "acc_stderr,none": 0.0077779673157217745,
+      "alias": " - humanities"
+    },
+    "mmlu_formal_logic": {
+      "alias": "  - formal_logic",
+      "acc,none": 0.5793650793650794,
+      "acc_stderr,none": 0.04415438226743745
+    },
+    "mmlu_high_school_european_history": {
+      "alias": "  - high_school_european_history",
+      "acc,none": 0.7818181818181819,
+      "acc_stderr,none": 0.03225078108306289
+    },
+    "mmlu_high_school_us_history": {
+      "alias": "  - high_school_us_history",
+      "acc,none": 0.8284313725490197,
+      "acc_stderr,none": 0.02646056956124065
+    },
+    "mmlu_high_school_world_history": {
+      "alias": "  - high_school_world_history",
+      "acc,none": 0.8438818565400844,
+      "acc_stderr,none": 0.023627159460318684
+    },
+    "mmlu_international_law": {
+      "alias": "  - international_law",
+      "acc,none": 0.8016528925619835,
+      "acc_stderr,none": 0.03640118271990946
+    },
+    "mmlu_jurisprudence": {
+      "alias": "  - jurisprudence",
+      "acc,none": 0.7962962962962963,
+      "acc_stderr,none": 0.03893542518824847
+    },
+    "mmlu_logical_fallacies": {
+      "alias": "  - logical_fallacies",
+      "acc,none": 0.8404907975460123,
+      "acc_stderr,none": 0.02876748172598387
+    },
+    "mmlu_moral_disputes": {
+      "alias": "  - moral_disputes",
+      "acc,none": 0.7543352601156069,
+      "acc_stderr,none": 0.023176298203992
+    },
+    "mmlu_moral_scenarios": {
+      "alias": "  - moral_scenarios",
+      "acc,none": 0.3475,
+      "acc_stderr,none": 0.023838625698390636
+    },
+    "mmlu_philosophy": {
+      "alias": "  - philosophy",
+      "acc,none": 0.7588424437299035,
+      "acc_stderr,none": 0.02429659403476343
+    },
+    "mmlu_prehistory": {
+      "alias": "  - prehistory",
+      "acc,none": 0.7870370370370371,
+      "acc_stderr,none": 0.02277971908873339
+    },
+    "mmlu_professional_law": {
+      "alias": "  - professional_law",
+      "acc,none": 0.53,
+      "acc_stderr,none": 0.02498621173652297
+    },
+    "mmlu_world_religions": {
+      "alias": "  - world_religions",
+      "acc,none": 0.8070175438596491,
+      "acc_stderr,none": 0.030267457554898458
+    },
+    "mmlu_other": {
+      "acc,none": 0.7437591776798825,
+      "acc_stderr,none": 0.008056333552095894,
+      "alias": " - other"
+    },
+    "mmlu_business_ethics": {
+      "alias": "  - business_ethics",
+      "acc,none": 0.75,
+      "acc_stderr,none": 0.04351941398892446
+    },
+    "mmlu_clinical_knowledge": {
+      "alias": "  - clinical_knowledge",
+      "acc,none": 0.7773584905660378,
+      "acc_stderr,none": 0.0256042334708991
+    },
+    "mmlu_college_medicine": {
+      "alias": "  - college_medicine",
+      "acc,none": 0.7341040462427746,
+      "acc_stderr,none": 0.03368762932259431
+    },
+    "mmlu_global_facts": {
+      "alias": "  - global_facts",
+      "acc,none": 0.43,
+      "acc_stderr,none": 0.04975698519562429
+    },
+    "mmlu_human_aging": {
+      "alias": "  - human_aging",
+      "acc,none": 0.7488789237668162,
+      "acc_stderr,none": 0.02910522083322461
+    },
+    "mmlu_management": {
+      "alias": "  - management",
+      "acc,none": 0.8932038834951457,
+      "acc_stderr,none": 0.030581088928331356
+    },
+    "mmlu_marketing": {
+      "alias": "  - marketing",
+      "acc,none": 0.9145299145299145,
+      "acc_stderr,none": 0.018315891685625862
+    },
+    "mmlu_medical_genetics": {
+      "alias": "  - medical_genetics",
+      "acc,none": 0.8,
+      "acc_stderr,none": 0.04020151261036846
+    },
+    "mmlu_miscellaneous": {
+      "alias": "  - miscellaneous",
+      "acc,none": 0.82,
+      "acc_stderr,none": 0.01923342954415769
+    },
+    "mmlu_nutrition": {
+      "alias": "  - nutrition",
+      "acc,none": 0.7745098039215687,
+      "acc_stderr,none": 0.023929155517351277
+    },
+    "mmlu_professional_accounting": {
+      "alias": "  - professional_accounting",
+      "acc,none": 0.5709219858156028,
+      "acc_stderr,none": 0.02952591430255856
+    },
+    "mmlu_professional_medicine": {
+      "alias": "  - professional_medicine",
+      "acc,none": 0.7757352941176471,
+      "acc_stderr,none": 0.025336848563332365
+    },
+    "mmlu_virology": {
+      "alias": "  - virology",
+      "acc,none": 0.5120481927710844,
+      "acc_stderr,none": 0.03891364495835817
+    },
+    "mmlu_social_sciences": {
+      "acc,none": 0.8202205882352941,
+      "acc_stderr,none": 0.007248431086566561,
+      "alias": " - social sciences"
+    },
+    "mmlu_econometrics": {
+      "alias": "  - econometrics",
+      "acc,none": 0.6578947368421053,
+      "acc_stderr,none": 0.04462917535336937
+    },
+    "mmlu_high_school_geography": {
+      "alias": "  - high_school_geography",
+      "acc,none": 0.8737373737373737,
+      "acc_stderr,none": 0.02366435940288024
+    },
+    "mmlu_high_school_government_and_politics": {
+      "alias": "  - high_school_government_and_politics",
+      "acc,none": 0.8756476683937824,
+      "acc_stderr,none": 0.023814477086593556
+    },
+    "mmlu_high_school_macroeconomics": {
+      "alias": "  - high_school_macroeconomics",
+      "acc,none": 0.8076923076923077,
+      "acc_stderr,none": 0.019982347208637292
+    },
+    "mmlu_high_school_microeconomics": {
+      "alias": "  - high_school_microeconomics",
+      "acc,none": 0.8991596638655462,
+      "acc_stderr,none": 0.019559663430480802
+    },
+    "mmlu_high_school_psychology": {
+      "alias": "  - high_school_psychology",
+      "acc,none": 0.9025,
+      "acc_stderr,none": 0.0148504449187799
+    },
+    "mmlu_human_sexuality": {
+      "alias": "  - human_sexuality",
+      "acc,none": 0.7862595419847328,
+      "acc_stderr,none": 0.035954616117746904
+    },
+    "mmlu_professional_psychology": {
+      "alias": "  - professional_psychology",
+      "acc,none": 0.7475,
+      "acc_stderr,none": 0.0217495282695941
+    },
+    "mmlu_public_relations": {
+      "alias": "  - public_relations",
+      "acc,none": 0.6818181818181818,
+      "acc_stderr,none": 0.04461272175910509
+    },
+    "mmlu_security_studies": {
+      "alias": "  - security_studies",
+      "acc,none": 0.7673469387755102,
+      "acc_stderr,none": 0.02704925791589618
+    },
+    "mmlu_sociology": {
+      "alias": "  - sociology",
+      "acc,none": 0.845771144278607,
+      "acc_stderr,none": 0.02553843336857833
+    },
+    "mmlu_us_foreign_policy": {
+      "alias": "  - us_foreign_policy",
+      "acc,none": 0.89,
+      "acc_stderr,none": 0.03144660377352203
+    },
+    "mmlu_stem": {
+      "acc,none": 0.7148747224865207,
+      "acc_stderr,none": 0.007751851248299227,
+      "alias": " - stem"
+    },
+    "mmlu_abstract_algebra": {
+      "alias": "  - abstract_algebra",
+      "acc,none": 0.47,
+      "acc_stderr,none": 0.050161355804659205
+    },
+    "mmlu_anatomy": {
+      "alias": "  - anatomy",
+      "acc,none": 0.6888888888888889,
+      "acc_stderr,none": 0.03999262876617723
+    },
+    "mmlu_astronomy": {
+      "alias": "  - astronomy",
+      "acc,none": 0.8421052631578947,
+      "acc_stderr,none": 0.02967416752010141
+    },
+    "mmlu_college_biology": {
+      "alias": "  - college_biology",
+      "acc,none": 0.8402777777777778,
+      "acc_stderr,none": 0.030635578972093267
+    },
+    "mmlu_college_chemistry": {
+      "alias": "  - college_chemistry",
+      "acc,none": 0.56,
+      "acc_stderr,none": 0.049888765156985884
+    },
+    "mmlu_college_computer_science": {
+      "alias": "  - college_computer_science",
+      "acc,none": 0.66,
+      "acc_stderr,none": 0.04760952285695237
+    },
+    "mmlu_college_mathematics": {
+      "alias": "  - college_mathematics",
+      "acc,none": 0.52,
+      "acc_stderr,none": 0.050211673156867795
+    },
+    "mmlu_college_physics": {
+      "alias": "  - college_physics",
+      "acc,none": 0.5686274509803921,
+      "acc_stderr,none": 0.04928099597287534
+    },
+    "mmlu_computer_security": {
+      "alias": "  - computer_security",
+      "acc,none": 0.83,
+      "acc_stderr,none": 0.0377525168068637
+    },
+    "mmlu_conceptual_physics": {
+      "alias": "  - conceptual_physics",
+      "acc,none": 0.7957446808510639,
+      "acc_stderr,none": 0.026355158413349428
+    },
+    "mmlu_electrical_engineering": {
+      "alias": "  - electrical_engineering",
+      "acc,none": 0.7517241379310344,
+      "acc_stderr,none": 0.036001056927277716
+    },
+    "mmlu_elementary_mathematics": {
+      "alias": "  - elementary_mathematics",
+      "acc,none": 0.7116402116402116,
+      "acc_stderr,none": 0.023330654054535903
+    },
+    "mmlu_high_school_biology": {
+      "alias": "  - high_school_biology",
+      "acc,none": 0.9161290322580645,
+      "acc_stderr,none": 0.015769027496775653
+    },
+    "mmlu_high_school_chemistry": {
+      "alias": "  - high_school_chemistry",
+      "acc,none": 0.7192118226600985,
+      "acc_stderr,none": 0.03161856335358611
+    },
+    "mmlu_high_school_computer_science": {
+      "alias": "  - high_school_computer_science",
+      "acc,none": 0.87,
+      "acc_stderr,none": 0.03379976689896309
+    },
+    "mmlu_high_school_mathematics": {
+      "alias": "  - high_school_mathematics",
+      "acc,none": 0.5222222222222223,
+      "acc_stderr,none": 0.030455413985678408
+    },
+    "mmlu_high_school_physics": {
+      "alias": "  - high_school_physics",
+      "acc,none": 0.6754966887417219,
+      "acc_stderr,none": 0.038227469376587525
+    },
+    "mmlu_high_school_statistics": {
+      "alias": "  - high_school_statistics",
+      "acc,none": 0.7222222222222222,
+      "acc_stderr,none": 0.030546745264953185
+    },
+    "mmlu_machine_learning": {
+      "alias": "  - machine_learning",
+      "acc,none": 0.6160714285714286,
+      "acc_stderr,none": 0.04616143075028546
+    },
+    "winogrande": {
+      "alias": "winogrande",
+      "acc,none": 0.7375,
+      "acc_stderr,none": 0.022027196108925243
+    }
+  },
+  "groups": {
+    "kmmlu": {
+      "acc,none": 0.47556707712248864,
+      "acc_stderr,none": 0.003918983222456166,
+      "alias": "kmmlu"
+    },
+    "kmmlu_applied_science": {
+      "acc,none": 0.45875,
+      "acc_stderr,none": 0.007101063857525891,
+      "alias": " - kmmlu_applied_science"
+    },
+    "kmmlu_humss": {
+      "acc,none": 0.4805860805860806,
+      "acc_stderr,none": 0.009419825503999339,
+      "alias": " - kmmlu_humss"
+    },
+    "kmmlu_other": {
+      "acc,none": 0.4772222222222222,
+      "acc_stderr,none": 0.008073884461069719,
+      "alias": " - kmmlu_other"
+    },
+    "kmmlu_stem": {
+      "acc,none": 0.4897674418604651,
+      "acc_stderr,none": 0.007312394370135803,
+      "alias": " - kmmlu_stem"
+    },
+    "mmlu": {
+      "acc,none": 0.7404266255461321,
+      "acc_stderr,none": 0.003869340083262106,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "acc,none": 0.6931079323797139,
+      "acc_stderr,none": 0.0077779673157217745,
+      "alias": " - humanities"
+    },
+    "mmlu_other": {
+      "acc,none": 0.7437591776798825,
+      "acc_stderr,none": 0.008056333552095894,
+      "alias": " - other"
+    },
+    "mmlu_social_sciences": {
+      "acc,none": 0.8202205882352941,
+      "acc_stderr,none": 0.007248431086566561,
+      "alias": " - social sciences"
+    },
+    "mmlu_stem": {
+      "acc,none": 0.7148747224865207,
+      "acc_stderr,none": 0.007751851248299227,
+      "alias": " - stem"
+    }
+  },
+  "group_subtasks": {
+    "mmlu_humanities": [
+      "mmlu_formal_logic",
+      "mmlu_high_school_european_history",
+      "mmlu_high_school_us_history",
+      "mmlu_high_school_world_history",
+      "mmlu_international_law",
+      "mmlu_jurisprudence",
+      "mmlu_logical_fallacies",
+      "mmlu_moral_disputes",
+      "mmlu_moral_scenarios",
+      "mmlu_philosophy",
+      "mmlu_prehistory",
+      "mmlu_professional_law",
+      "mmlu_world_religions"
+    ],
+    "mmlu_social_sciences": [
+      "mmlu_econometrics",
+      "mmlu_high_school_geography",
+      "mmlu_high_school_government_and_politics",
+      "mmlu_high_school_macroeconomics",
+      "mmlu_high_school_microeconomics",
+      "mmlu_high_school_psychology",
+      "mmlu_human_sexuality",
+      "mmlu_professional_psychology",
+      "mmlu_public_relations",
+      "mmlu_security_studies",
+      "mmlu_sociology",
+      "mmlu_us_foreign_policy"
+    ],
+    "mmlu_other": [
+      "mmlu_business_ethics",
+      "mmlu_clinical_knowledge",
+      "mmlu_college_medicine",
+      "mmlu_global_facts",
+      "mmlu_human_aging",
+      "mmlu_management",
+      "mmlu_marketing",
+      "mmlu_medical_genetics",
+      "mmlu_miscellaneous",
+      "mmlu_nutrition",
+      "mmlu_professional_accounting",
+      "mmlu_professional_medicine",
+      "mmlu_virology"
+    ],
+    "mmlu_stem": [
+      "mmlu_abstract_algebra",
+      "mmlu_anatomy",
+      "mmlu_astronomy",
+      "mmlu_college_biology",
+      "mmlu_college_chemistry",
+      "mmlu_college_computer_science",
+      "mmlu_college_mathematics",
+      "mmlu_college_physics",
+      "mmlu_computer_security",
+      "mmlu_conceptual_physics",
+      "mmlu_electrical_engineering",
+      "mmlu_elementary_mathematics",
+      "mmlu_high_school_biology",
+      "mmlu_high_school_chemistry",
+      "mmlu_high_school_computer_science",
+      "mmlu_high_school_mathematics",
+      "mmlu_high_school_physics",
+      "mmlu_high_school_statistics",
+      "mmlu_machine_learning"
+    ],
+    "mmlu": [
+      "mmlu_stem",
+      "mmlu_other",
+      "mmlu_social_sciences",
+      "mmlu_humanities"
+    ],
+    "hellaswag": [],
+    "arc_easy": [],
+    "arc_challenge": [],
+    "winogrande": [],
+    "kmmlu_humss": [
+      "kmmlu_accounting",
+      "kmmlu_criminal_law",
+      "kmmlu_economics",
+      "kmmlu_education",
+      "kmmlu_korean_history",
+      "kmmlu_law",
+      "kmmlu_management",
+      "kmmlu_political_science_and_sociology",
+      "kmmlu_psychology",
+      "kmmlu_social_welfare",
+      "kmmlu_taxation"
+    ],
+    "kmmlu_applied_science": [
+      "kmmlu_aviation_engineering_and_maintenance",
+      "kmmlu_electronics_engineering",
+      "kmmlu_energy_management",
+      "kmmlu_environmental_science",
+      "kmmlu_gas_technology_and_engineering",
+      "kmmlu_geomatics",
+      "kmmlu_industrial_engineer",
+      "kmmlu_machine_design_and_manufacturing",
+      "kmmlu_maritime_engineering",
+      "kmmlu_nondestructive_testing",
+      "kmmlu_railway_and_automotive_engineering",
+      "kmmlu_telecommunications_and_wireless_technology"
+    ],
+    "kmmlu_other": [
+      "kmmlu_agricultural_sciences",
+      "kmmlu_construction",
+      "kmmlu_fashion",
+      "kmmlu_food_processing",
+      "kmmlu_health",
+      "kmmlu_interior_architecture_and_design",
+      "kmmlu_marketing",
+      "kmmlu_patent",
+      "kmmlu_public_safety",
+      "kmmlu_real_estate",
+      "kmmlu_refrigerating_machinery"
+    ],
+    "kmmlu_stem": [
+      "kmmlu_biology",
+      "kmmlu_chemical_engineering",
+      "kmmlu_chemistry",
+      "kmmlu_civil_engineering",
+      "kmmlu_computer_science",
+      "kmmlu_ecology",
+      "kmmlu_electrical_engineering",
+      "kmmlu_information_technology",
+      "kmmlu_materials_engineering",
+      "kmmlu_math",
+      "kmmlu_mechanical_engineering"
+    ],
+    "kmmlu": [
+      "kmmlu_stem",
+      "kmmlu_other",
+      "kmmlu_applied_science",
+      "kmmlu_humss"
+    ],
+    "kobest_boolq": [],
+    "kobest_copa": [],
+    "kobest_hellaswag": []
+  },
+  "configs": {
+    "arc_challenge": {
+      "task": "arc_challenge",
+      "tag": [
+        "ai2_arc"
+      ],
+      "dataset_path": "allenai/ai2_arc",
+      "dataset_name": "ARC-Challenge",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "Question: {{question}}\nAnswer:",
+      "doc_to_target": "{{choices.label.index(answerKey)}}",
+      "unsafe_code": false,
+      "doc_to_choice": "{{choices.text}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "Question: {{question}}\nAnswer:",
+        "doc_to_choice": "{{choices.text}}",
+        "doc_to_target": "{{choices.label.index(answerKey)}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "arc_easy": {
+      "task": "arc_easy",
+      "tag": [
+        "ai2_arc"
+      ],
+      "dataset_path": "allenai/ai2_arc",
+      "dataset_name": "ARC-Easy",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "Question: {{question}}\nAnswer:",
+      "doc_to_target": "{{choices.label.index(answerKey)}}",
+      "unsafe_code": false,
+      "doc_to_choice": "{{choices.text}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "Question: {{question}}\nAnswer:",
+        "doc_to_choice": "{{choices.text}}",
+        "doc_to_target": "{{choices.label.index(answerKey)}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "hellaswag": {
+      "task": "hellaswag",
+      "tag": [
+        "multiple_choice"
+      ],
+      "dataset_path": "Rowan/hellaswag",
+      "training_split": "train",
+      "validation_split": "validation",
+      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc):\n        ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n        out_doc = {\n            \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n            \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n            \"gold\": int(doc[\"label\"]),\n        }\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
+      "doc_to_text": "{{query}}",
+      "doc_to_target": "{{label}}",
+      "unsafe_code": false,
+      "doc_to_choice": "choices",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": "<function process_docs at 0x73f0acb5a700>",
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{query}}",
+        "doc_to_choice": "choices",
+        "doc_to_target": "{{label}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_accounting": {
+      "task": "kmmlu_accounting",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Accounting",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_agricultural_sciences": {
+      "task": "kmmlu_agricultural_sciences",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Agricultural-Sciences",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_aviation_engineering_and_maintenance": {
+      "task": "kmmlu_aviation_engineering_and_maintenance",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Aviation-Engineering-and-Maintenance",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_biology": {
+      "task": "kmmlu_biology",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_chemical_engineering": {
+      "task": "kmmlu_chemical_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Chemical-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_chemistry": {
+      "task": "kmmlu_chemistry",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_civil_engineering": {
+      "task": "kmmlu_civil_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Civil-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_computer_science": {
+      "task": "kmmlu_computer_science",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Computer-Science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_construction": {
+      "task": "kmmlu_construction",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Construction",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_criminal_law": {
+      "task": "kmmlu_criminal_law",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Criminal-Law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_ecology": {
+      "task": "kmmlu_ecology",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Ecology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_economics": {
+      "task": "kmmlu_economics",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Economics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_education": {
+      "task": "kmmlu_education",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Education",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_electrical_engineering": {
+      "task": "kmmlu_electrical_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Electrical-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_electronics_engineering": {
+      "task": "kmmlu_electronics_engineering",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Electronics-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_energy_management": {
+      "task": "kmmlu_energy_management",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Energy-Management",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_environmental_science": {
+      "task": "kmmlu_environmental_science",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Environmental-Science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_fashion": {
+      "task": "kmmlu_fashion",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Fashion",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_food_processing": {
+      "task": "kmmlu_food_processing",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Food-Processing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_gas_technology_and_engineering": {
+      "task": "kmmlu_gas_technology_and_engineering",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Gas-Technology-and-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_geomatics": {
+      "task": "kmmlu_geomatics",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Geomatics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_health": {
+      "task": "kmmlu_health",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Health",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_industrial_engineer": {
+      "task": "kmmlu_industrial_engineer",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Industrial-Engineer",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_information_technology": {
+      "task": "kmmlu_information_technology",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Information-Technology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_interior_architecture_and_design": {
+      "task": "kmmlu_interior_architecture_and_design",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Interior-Architecture-and-Design",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_korean_history": {
+      "task": "kmmlu_korean_history",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Korean-History",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_law": {
+      "task": "kmmlu_law",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_machine_design_and_manufacturing": {
+      "task": "kmmlu_machine_design_and_manufacturing",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Machine-Design-and-Manufacturing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_management": {
+      "task": "kmmlu_management",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Management",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_maritime_engineering": {
+      "task": "kmmlu_maritime_engineering",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Maritime-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_marketing": {
+      "task": "kmmlu_marketing",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Marketing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_materials_engineering": {
+      "task": "kmmlu_materials_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Materials-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_math": {
+      "task": "kmmlu_math",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Math",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_mechanical_engineering": {
+      "task": "kmmlu_mechanical_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Mechanical-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_nondestructive_testing": {
+      "task": "kmmlu_nondestructive_testing",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Nondestructive-Testing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_patent": {
+      "task": "kmmlu_patent",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Patent",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_political_science_and_sociology": {
+      "task": "kmmlu_political_science_and_sociology",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Political-Science-and-Sociology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_psychology": {
+      "task": "kmmlu_psychology",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_public_safety": {
+      "task": "kmmlu_public_safety",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Public-Safety",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_railway_and_automotive_engineering": {
+      "task": "kmmlu_railway_and_automotive_engineering",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Railway-and-Automotive-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_real_estate": {
+      "task": "kmmlu_real_estate",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Real-Estate",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_refrigerating_machinery": {
+      "task": "kmmlu_refrigerating_machinery",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Refrigerating-Machinery",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_social_welfare": {
+      "task": "kmmlu_social_welfare",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Social-Welfare",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_taxation": {
+      "task": "kmmlu_taxation",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Taxation",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_telecommunications_and_wireless_technology": {
+      "task": "kmmlu_telecommunications_and_wireless_technology",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Telecommunications-and-Wireless-Technology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kobest_boolq": {
+      "task": "kobest_boolq",
+      "dataset_path": "skt/kobest_v1",
+      "dataset_name": "boolq",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ",
+      "doc_to_target": "{{label}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "아니오",
+        "예"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ",
+        "doc_to_choice": [
+          "아니오",
+          "예"
+        ],
+        "doc_to_target": "{{label}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "f1",
+          "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+          "average": "macro",
+          "hf_evaluate": true,
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kobest_copa": {
+      "task": "kobest_copa",
+      "dataset_path": "skt/kobest_v1",
+      "dataset_name": "copa",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "def copa_doc_to_text(doc: dict) -> str:\n    connector = {\"원인\": \" 왜냐하면\", \"결과\": \" 그래서\"}[doc[\"question\"].strip()]\n    return f\"\"\"{doc[\"premise\"]} {connector}\"\"\"\n",
+      "doc_to_target": "def copa_doc_to_target(doc: dict) -> str:\n    correct_choice = doc[\"alternative_1\"] if doc[\"label\"] == 0 else doc[\"alternative_2\"]\n    return f\"\"\"{correct_choice}\"\"\"\n",
+      "unsafe_code": false,
+      "doc_to_choice": "def copa_doc_to_choice(doc: dict) -> list:\n    return [f\"\"\"{doc[\"alternative_1\"]}\"\"\", f\"\"\"{doc[\"alternative_2\"]}\"\"\"]\n",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "<function copa_doc_to_text at 0x73f083a685e0>",
+        "doc_to_choice": "<function copa_doc_to_choice at 0x73f083a69120>",
+        "doc_to_target": "<function copa_doc_to_target at 0x73f083a68c20>",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "f1",
+          "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+          "average": "macro",
+          "hf_evaluate": true,
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "kobest_hellaswag": {
+      "task": "kobest_hellaswag",
+      "dataset_path": "skt/kobest_v1",
+      "dataset_name": "hellaswag",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "process_docs": "def hellaswag_process_doc(doc: Dataset) -> Dataset:\n    def preprocessor(dataset):\n        return {\n            \"query\": f\"\"\"문장: {dataset[\"context\"]}\"\"\",\n            \"choices\": [\n                dataset[\"ending_1\"],\n                dataset[\"ending_2\"],\n                dataset[\"ending_3\"],\n                dataset[\"ending_4\"],\n            ],\n            \"gold\": int(dataset[\"label\"]),\n        }\n\n    return doc.map(preprocessor)\n",
+      "doc_to_text": "{{query}}",
+      "doc_to_target": "{{label}}",
+      "unsafe_code": false,
+      "doc_to_choice": "choices",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": "<function hellaswag_process_doc at 0x73f083a69940>",
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{query}}",
+        "doc_to_choice": "choices",
+        "doc_to_target": "{{label}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "f1",
+          "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+          "average": "macro",
+          "hf_evaluate": true,
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_abstract_algebra": {
+      "task": "mmlu_abstract_algebra",
+      "task_alias": "abstract_algebra",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "abstract_algebra",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_anatomy": {
+      "task": "mmlu_anatomy",
+      "task_alias": "anatomy",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "anatomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_astronomy": {
+      "task": "mmlu_astronomy",
+      "task_alias": "astronomy",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "astronomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_business_ethics": {
+      "task": "mmlu_business_ethics",
+      "task_alias": "business_ethics",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "business_ethics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_clinical_knowledge": {
+      "task": "mmlu_clinical_knowledge",
+      "task_alias": "clinical_knowledge",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "clinical_knowledge",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_biology": {
+      "task": "mmlu_college_biology",
+      "task_alias": "college_biology",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_chemistry": {
+      "task": "mmlu_college_chemistry",
+      "task_alias": "college_chemistry",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_computer_science": {
+      "task": "mmlu_college_computer_science",
+      "task_alias": "college_computer_science",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_mathematics": {
+      "task": "mmlu_college_mathematics",
+      "task_alias": "college_mathematics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_medicine": {
+      "task": "mmlu_college_medicine",
+      "task_alias": "college_medicine",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_physics": {
+      "task": "mmlu_college_physics",
+      "task_alias": "college_physics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_computer_security": {
+      "task": "mmlu_computer_security",
+      "task_alias": "computer_security",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "computer_security",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_conceptual_physics": {
+      "task": "mmlu_conceptual_physics",
+      "task_alias": "conceptual_physics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "conceptual_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_econometrics": {
+      "task": "mmlu_econometrics",
+      "task_alias": "econometrics",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "econometrics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_electrical_engineering": {
+      "task": "mmlu_electrical_engineering",
+      "task_alias": "electrical_engineering",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "electrical_engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_elementary_mathematics": {
+      "task": "mmlu_elementary_mathematics",
+      "task_alias": "elementary_mathematics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "elementary_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_formal_logic": {
+      "task": "mmlu_formal_logic",
+      "task_alias": "formal_logic",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "formal_logic",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_global_facts": {
+      "task": "mmlu_global_facts",
+      "task_alias": "global_facts",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "global_facts",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_biology": {
+      "task": "mmlu_high_school_biology",
+      "task_alias": "high_school_biology",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_chemistry": {
+      "task": "mmlu_high_school_chemistry",
+      "task_alias": "high_school_chemistry",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_computer_science": {
+      "task": "mmlu_high_school_computer_science",
+      "task_alias": "high_school_computer_science",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_european_history": {
+      "task": "mmlu_high_school_european_history",
+      "task_alias": "high_school_european_history",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_european_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_geography": {
+      "task": "mmlu_high_school_geography",
+      "task_alias": "high_school_geography",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_geography",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_government_and_politics": {
+      "task": "mmlu_high_school_government_and_politics",
+      "task_alias": "high_school_government_and_politics",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_government_and_politics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_macroeconomics": {
+      "task": "mmlu_high_school_macroeconomics",
+      "task_alias": "high_school_macroeconomics",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_macroeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_mathematics": {
+      "task": "mmlu_high_school_mathematics",
+      "task_alias": "high_school_mathematics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_microeconomics": {
+      "task": "mmlu_high_school_microeconomics",
+      "task_alias": "high_school_microeconomics",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_microeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_physics": {
+      "task": "mmlu_high_school_physics",
+      "task_alias": "high_school_physics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_psychology": {
+      "task": "mmlu_high_school_psychology",
+      "task_alias": "high_school_psychology",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_statistics": {
+      "task": "mmlu_high_school_statistics",
+      "task_alias": "high_school_statistics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_statistics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_us_history": {
+      "task": "mmlu_high_school_us_history",
+      "task_alias": "high_school_us_history",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_us_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_world_history": {
+      "task": "mmlu_high_school_world_history",
+      "task_alias": "high_school_world_history",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_world_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_human_aging": {
+      "task": "mmlu_human_aging",
+      "task_alias": "human_aging",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "human_aging",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_human_sexuality": {
+      "task": "mmlu_human_sexuality",
+      "task_alias": "human_sexuality",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "human_sexuality",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_international_law": {
+      "task": "mmlu_international_law",
+      "task_alias": "international_law",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "international_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about international law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_jurisprudence": {
+      "task": "mmlu_jurisprudence",
+      "task_alias": "jurisprudence",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "jurisprudence",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_logical_fallacies": {
+      "task": "mmlu_logical_fallacies",
+      "task_alias": "logical_fallacies",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "logical_fallacies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_machine_learning": {
+      "task": "mmlu_machine_learning",
+      "task_alias": "machine_learning",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "machine_learning",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_management": {
+      "task": "mmlu_management",
+      "task_alias": "management",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "management",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about management.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_marketing": {
+      "task": "mmlu_marketing",
+      "task_alias": "marketing",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "marketing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_medical_genetics": {
+      "task": "mmlu_medical_genetics",
+      "task_alias": "medical_genetics",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "medical_genetics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_miscellaneous": {
+      "task": "mmlu_miscellaneous",
+      "task_alias": "miscellaneous",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "miscellaneous",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_moral_disputes": {
+      "task": "mmlu_moral_disputes",
+      "task_alias": "moral_disputes",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "moral_disputes",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_moral_scenarios": {
+      "task": "mmlu_moral_scenarios",
+      "task_alias": "moral_scenarios",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "moral_scenarios",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_nutrition": {
+      "task": "mmlu_nutrition",
+      "task_alias": "nutrition",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "nutrition",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_philosophy": {
+      "task": "mmlu_philosophy",
+      "task_alias": "philosophy",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "philosophy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_prehistory": {
+      "task": "mmlu_prehistory",
+      "task_alias": "prehistory",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "prehistory",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_professional_accounting": {
+      "task": "mmlu_professional_accounting",
+      "task_alias": "professional_accounting",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "professional_accounting",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_professional_law": {
+      "task": "mmlu_professional_law",
+      "task_alias": "professional_law",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "professional_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_professional_medicine": {
+      "task": "mmlu_professional_medicine",
+      "task_alias": "professional_medicine",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "professional_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_professional_psychology": {
+      "task": "mmlu_professional_psychology",
+      "task_alias": "professional_psychology",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "professional_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_public_relations": {
+      "task": "mmlu_public_relations",
+      "task_alias": "public_relations",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "public_relations",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_security_studies": {
+      "task": "mmlu_security_studies",
+      "task_alias": "security_studies",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "security_studies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_sociology": {
+      "task": "mmlu_sociology",
+      "task_alias": "sociology",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "sociology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_us_foreign_policy": {
+      "task": "mmlu_us_foreign_policy",
+      "task_alias": "us_foreign_policy",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "us_foreign_policy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_virology": {
+      "task": "mmlu_virology",
+      "task_alias": "virology",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "virology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about virology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_world_religions": {
+      "task": "mmlu_world_religions",
+      "task_alias": "world_religions",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "world_religions",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    },
+    "winogrande": {
+      "task": "winogrande",
+      "dataset_path": "allenai/winogrande",
+      "dataset_name": "winogrande_xl",
+      "training_split": "train",
+      "validation_split": "validation",
+      "doc_to_text": "def doc_to_text(doc):\n    answer_to_num = {\"1\": 0, \"2\": 1}\n    return answer_to_num[doc[\"answer\"]]\n",
+      "doc_to_target": "def doc_to_target(doc):\n    idx = doc[\"sentence\"].index(\"_\") + 1\n    return doc[\"sentence\"][idx:].strip()\n",
+      "unsafe_code": false,
+      "doc_to_choice": "def doc_to_choice(doc):\n    idx = doc[\"sentence\"].index(\"_\")\n    options = [doc[\"option1\"], doc[\"option2\"]]\n    return [doc[\"sentence\"][:idx] + opt for opt in options]\n",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "<function doc_to_text at 0x73f093d3ca40>",
+        "doc_to_choice": "<function doc_to_choice at 0x73f093d3d080>",
+        "doc_to_target": "<function doc_to_target at 0x73f093d3ce00>",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "sentence",
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "unsloth/Qwen3-4B-Base",
+        "trust_remote_code": true
+      }
+    }
+  },
+  "versions": {
+    "arc_challenge": 1.0,
+    "arc_easy": 1.0,
+    "hellaswag": 1.0,
+    "kmmlu": 2.0,
+    "kmmlu_accounting": 2.0,
+    "kmmlu_agricultural_sciences": 2.0,
+    "kmmlu_applied_science": 2.0,
+    "kmmlu_aviation_engineering_and_maintenance": 2.0,
+    "kmmlu_biology": 2.0,
+    "kmmlu_chemical_engineering": 2.0,
+    "kmmlu_chemistry": 2.0,
+    "kmmlu_civil_engineering": 2.0,
+    "kmmlu_computer_science": 2.0,
+    "kmmlu_construction": 2.0,
+    "kmmlu_criminal_law": 2.0,
+    "kmmlu_ecology": 2.0,
+    "kmmlu_economics": 2.0,
+    "kmmlu_education": 2.0,
+    "kmmlu_electrical_engineering": 2.0,
+    "kmmlu_electronics_engineering": 2.0,
+    "kmmlu_energy_management": 2.0,
+    "kmmlu_environmental_science": 2.0,
+    "kmmlu_fashion": 2.0,
+    "kmmlu_food_processing": 2.0,
+    "kmmlu_gas_technology_and_engineering": 2.0,
+    "kmmlu_geomatics": 2.0,
+    "kmmlu_health": 2.0,
+    "kmmlu_humss": 2.0,
+    "kmmlu_industrial_engineer": 2.0,
+    "kmmlu_information_technology": 2.0,
+    "kmmlu_interior_architecture_and_design": 2.0,
+    "kmmlu_korean_history": 2.0,
+    "kmmlu_law": 2.0,
+    "kmmlu_machine_design_and_manufacturing": 2.0,
+    "kmmlu_management": 2.0,
+    "kmmlu_maritime_engineering": 2.0,
+    "kmmlu_marketing": 2.0,
+    "kmmlu_materials_engineering": 2.0,
+    "kmmlu_math": 2.0,
+    "kmmlu_mechanical_engineering": 2.0,
+    "kmmlu_nondestructive_testing": 2.0,
+    "kmmlu_other": 2.0,
+    "kmmlu_patent": 2.0,
+    "kmmlu_political_science_and_sociology": 2.0,
+    "kmmlu_psychology": 2.0,
+    "kmmlu_public_safety": 2.0,
+    "kmmlu_railway_and_automotive_engineering": 2.0,
+    "kmmlu_real_estate": 2.0,
+    "kmmlu_refrigerating_machinery": 2.0,
+    "kmmlu_social_welfare": 2.0,
+    "kmmlu_stem": 2.0,
+    "kmmlu_taxation": 2.0,
+    "kmmlu_telecommunications_and_wireless_technology": 2.0,
+    "kobest_boolq": 1.0,
+    "kobest_copa": 1.0,
+    "kobest_hellaswag": 1.0,
+    "mmlu": 2,
+    "mmlu_abstract_algebra": 1.0,
+    "mmlu_anatomy": 1.0,
+    "mmlu_astronomy": 1.0,
+    "mmlu_business_ethics": 1.0,
+    "mmlu_clinical_knowledge": 1.0,
+    "mmlu_college_biology": 1.0,
+    "mmlu_college_chemistry": 1.0,
+    "mmlu_college_computer_science": 1.0,
+    "mmlu_college_mathematics": 1.0,
+    "mmlu_college_medicine": 1.0,
+    "mmlu_college_physics": 1.0,
+    "mmlu_computer_security": 1.0,
+    "mmlu_conceptual_physics": 1.0,
+    "mmlu_econometrics": 1.0,
+    "mmlu_electrical_engineering": 1.0,
+    "mmlu_elementary_mathematics": 1.0,
+    "mmlu_formal_logic": 1.0,
+    "mmlu_global_facts": 1.0,
+    "mmlu_high_school_biology": 1.0,
+    "mmlu_high_school_chemistry": 1.0,
+    "mmlu_high_school_computer_science": 1.0,
+    "mmlu_high_school_european_history": 1.0,
+    "mmlu_high_school_geography": 1.0,
+    "mmlu_high_school_government_and_politics": 1.0,
+    "mmlu_high_school_macroeconomics": 1.0,
+    "mmlu_high_school_mathematics": 1.0,
+    "mmlu_high_school_microeconomics": 1.0,
+    "mmlu_high_school_physics": 1.0,
+    "mmlu_high_school_psychology": 1.0,
+    "mmlu_high_school_statistics": 1.0,
+    "mmlu_high_school_us_history": 1.0,
+    "mmlu_high_school_world_history": 1.0,
+    "mmlu_human_aging": 1.0,
+    "mmlu_human_sexuality": 1.0,
+    "mmlu_humanities": 2,
+    "mmlu_international_law": 1.0,
+    "mmlu_jurisprudence": 1.0,
+    "mmlu_logical_fallacies": 1.0,
+    "mmlu_machine_learning": 1.0,
+    "mmlu_management": 1.0,
+    "mmlu_marketing": 1.0,
+    "mmlu_medical_genetics": 1.0,
+    "mmlu_miscellaneous": 1.0,
+    "mmlu_moral_disputes": 1.0,
+    "mmlu_moral_scenarios": 1.0,
+    "mmlu_nutrition": 1.0,
+    "mmlu_other": 2,
+    "mmlu_philosophy": 1.0,
+    "mmlu_prehistory": 1.0,
+    "mmlu_professional_accounting": 1.0,
+    "mmlu_professional_law": 1.0,
+    "mmlu_professional_medicine": 1.0,
+    "mmlu_professional_psychology": 1.0,
+    "mmlu_public_relations": 1.0,
+    "mmlu_security_studies": 1.0,
+    "mmlu_social_sciences": 2,
+    "mmlu_sociology": 1.0,
+    "mmlu_stem": 2,
+    "mmlu_us_foreign_policy": 1.0,
+    "mmlu_virology": 1.0,
+    "mmlu_world_religions": 1.0,
+    "winogrande": 1.0
+  },
+  "n-shot": {
+    "arc_challenge": 0,
+    "arc_easy": 0,
+    "hellaswag": 0,
+    "kmmlu_accounting": 0,
+    "kmmlu_agricultural_sciences": 0,
+    "kmmlu_aviation_engineering_and_maintenance": 0,
+    "kmmlu_biology": 0,
+    "kmmlu_chemical_engineering": 0,
+    "kmmlu_chemistry": 0,
+    "kmmlu_civil_engineering": 0,
+    "kmmlu_computer_science": 0,
+    "kmmlu_construction": 0,
+    "kmmlu_criminal_law": 0,
+    "kmmlu_ecology": 0,
+    "kmmlu_economics": 0,
+    "kmmlu_education": 0,
+    "kmmlu_electrical_engineering": 0,
+    "kmmlu_electronics_engineering": 0,
+    "kmmlu_energy_management": 0,
+    "kmmlu_environmental_science": 0,
+    "kmmlu_fashion": 0,
+    "kmmlu_food_processing": 0,
+    "kmmlu_gas_technology_and_engineering": 0,
+    "kmmlu_geomatics": 0,
+    "kmmlu_health": 0,
+    "kmmlu_industrial_engineer": 0,
+    "kmmlu_information_technology": 0,
+    "kmmlu_interior_architecture_and_design": 0,
+    "kmmlu_korean_history": 0,
+    "kmmlu_law": 0,
+    "kmmlu_machine_design_and_manufacturing": 0,
+    "kmmlu_management": 0,
+    "kmmlu_maritime_engineering": 0,
+    "kmmlu_marketing": 0,
+    "kmmlu_materials_engineering": 0,
+    "kmmlu_math": 0,
+    "kmmlu_mechanical_engineering": 0,
+    "kmmlu_nondestructive_testing": 0,
+    "kmmlu_patent": 0,
+    "kmmlu_political_science_and_sociology": 0,
+    "kmmlu_psychology": 0,
+    "kmmlu_public_safety": 0,
+    "kmmlu_railway_and_automotive_engineering": 0,
+    "kmmlu_real_estate": 0,
+    "kmmlu_refrigerating_machinery": 0,
+    "kmmlu_social_welfare": 0,
+    "kmmlu_taxation": 0,
+    "kmmlu_telecommunications_and_wireless_technology": 0,
+    "kobest_boolq": 0,
+    "kobest_copa": 0,
+    "kobest_hellaswag": 0,
+    "mmlu_abstract_algebra": 0,
+    "mmlu_anatomy": 0,
+    "mmlu_astronomy": 0,
+    "mmlu_business_ethics": 0,
+    "mmlu_clinical_knowledge": 0,
+    "mmlu_college_biology": 0,
+    "mmlu_college_chemistry": 0,
+    "mmlu_college_computer_science": 0,
+    "mmlu_college_mathematics": 0,
+    "mmlu_college_medicine": 0,
+    "mmlu_college_physics": 0,
+    "mmlu_computer_security": 0,
+    "mmlu_conceptual_physics": 0,
+    "mmlu_econometrics": 0,
+    "mmlu_electrical_engineering": 0,
+    "mmlu_elementary_mathematics": 0,
+    "mmlu_formal_logic": 0,
+    "mmlu_global_facts": 0,
+    "mmlu_high_school_biology": 0,
+    "mmlu_high_school_chemistry": 0,
+    "mmlu_high_school_computer_science": 0,
+    "mmlu_high_school_european_history": 0,
+    "mmlu_high_school_geography": 0,
+    "mmlu_high_school_government_and_politics": 0,
+    "mmlu_high_school_macroeconomics": 0,
+    "mmlu_high_school_mathematics": 0,
+    "mmlu_high_school_microeconomics": 0,
+    "mmlu_high_school_physics": 0,
+    "mmlu_high_school_psychology": 0,
+    "mmlu_high_school_statistics": 0,
+    "mmlu_high_school_us_history": 0,
+    "mmlu_high_school_world_history": 0,
+    "mmlu_human_aging": 0,
+    "mmlu_human_sexuality": 0,
+    "mmlu_international_law": 0,
+    "mmlu_jurisprudence": 0,
+    "mmlu_logical_fallacies": 0,
+    "mmlu_machine_learning": 0,
+    "mmlu_management": 0,
+    "mmlu_marketing": 0,
+    "mmlu_medical_genetics": 0,
+    "mmlu_miscellaneous": 0,
+    "mmlu_moral_disputes": 0,
+    "mmlu_moral_scenarios": 0,
+    "mmlu_nutrition": 0,
+    "mmlu_philosophy": 0,
+    "mmlu_prehistory": 0,
+    "mmlu_professional_accounting": 0,
+    "mmlu_professional_law": 0,
+    "mmlu_professional_medicine": 0,
+    "mmlu_professional_psychology": 0,
+    "mmlu_public_relations": 0,
+    "mmlu_security_studies": 0,
+    "mmlu_sociology": 0,
+    "mmlu_us_foreign_policy": 0,
+    "mmlu_virology": 0,
+    "mmlu_world_religions": 0,
+    "winogrande": 0
+  },
+  "higher_is_better": {
+    "arc_challenge": {
+      "acc": true,
+      "acc_norm": true
+    },
+    "arc_easy": {
+      "acc": true,
+      "acc_norm": true
+    },
+    "hellaswag": {
+      "acc": true,
+      "acc_norm": true
+    },
+    "kmmlu": {
+      "acc": true
+    },
+    "kmmlu_accounting": {
+      "acc": true
+    },
+    "kmmlu_agricultural_sciences": {
+      "acc": true
+    },
+    "kmmlu_applied_science": {
+      "acc": true
+    },
+    "kmmlu_aviation_engineering_and_maintenance": {
+      "acc": true
+    },
+    "kmmlu_biology": {
+      "acc": true
+    },
+    "kmmlu_chemical_engineering": {
+      "acc": true
+    },
+    "kmmlu_chemistry": {
+      "acc": true
+    },
+    "kmmlu_civil_engineering": {
+      "acc": true
+    },
+    "kmmlu_computer_science": {
+      "acc": true
+    },
+    "kmmlu_construction": {
+      "acc": true
+    },
+    "kmmlu_criminal_law": {
+      "acc": true
+    },
+    "kmmlu_ecology": {
+      "acc": true
+    },
+    "kmmlu_economics": {
+      "acc": true
+    },
+    "kmmlu_education": {
+      "acc": true
+    },
+    "kmmlu_electrical_engineering": {
+      "acc": true
+    },
+    "kmmlu_electronics_engineering": {
+      "acc": true
+    },
+    "kmmlu_energy_management": {
+      "acc": true
+    },
+    "kmmlu_environmental_science": {
+      "acc": true
+    },
+    "kmmlu_fashion": {
+      "acc": true
+    },
+    "kmmlu_food_processing": {
+      "acc": true
+    },
+    "kmmlu_gas_technology_and_engineering": {
+      "acc": true
+    },
+    "kmmlu_geomatics": {
+      "acc": true
+    },
+    "kmmlu_health": {
+      "acc": true
+    },
+    "kmmlu_humss": {
+      "acc": true
+    },
+    "kmmlu_industrial_engineer": {
+      "acc": true
+    },
+    "kmmlu_information_technology": {
+      "acc": true
+    },
+    "kmmlu_interior_architecture_and_design": {
+      "acc": true
+    },
+    "kmmlu_korean_history": {
+      "acc": true
+    },
+    "kmmlu_law": {
+      "acc": true
+    },
+    "kmmlu_machine_design_and_manufacturing": {
+      "acc": true
+    },
+    "kmmlu_management": {
+      "acc": true
+    },
+    "kmmlu_maritime_engineering": {
+      "acc": true
+    },
+    "kmmlu_marketing": {
+      "acc": true
+    },
+    "kmmlu_materials_engineering": {
+      "acc": true
+    },
+    "kmmlu_math": {
+      "acc": true
+    },
+    "kmmlu_mechanical_engineering": {
+      "acc": true
+    },
+    "kmmlu_nondestructive_testing": {
+      "acc": true
+    },
+    "kmmlu_other": {
+      "acc": true
+    },
+    "kmmlu_patent": {
+      "acc": true
+    },
+    "kmmlu_political_science_and_sociology": {
+      "acc": true
+    },
+    "kmmlu_psychology": {
+      "acc": true
+    },
+    "kmmlu_public_safety": {
+      "acc": true
+    },
+    "kmmlu_railway_and_automotive_engineering": {
+      "acc": true
+    },
+    "kmmlu_real_estate": {
+      "acc": true
+    },
+    "kmmlu_refrigerating_machinery": {
+      "acc": true
+    },
+    "kmmlu_social_welfare": {
+      "acc": true
+    },
+    "kmmlu_stem": {
+      "acc": true
+    },
+    "kmmlu_taxation": {
+      "acc": true
+    },
+    "kmmlu_telecommunications_and_wireless_technology": {
+      "acc": true
+    },
+    "kobest_boolq": {
+      "acc": true,
+      "f1": true
+    },
+    "kobest_copa": {
+      "acc": true,
+      "f1": true
+    },
+    "kobest_hellaswag": {
+      "acc": true,
+      "acc_norm": true,
+      "f1": true
+    },
+    "mmlu": {
+      "acc": true
+    },
+    "mmlu_abstract_algebra": {
+      "acc": true
+    },
+    "mmlu_anatomy": {
+      "acc": true
+    },
+    "mmlu_astronomy": {
+      "acc": true
+    },
+    "mmlu_business_ethics": {
+      "acc": true
+    },
+    "mmlu_clinical_knowledge": {
+      "acc": true
+    },
+    "mmlu_college_biology": {
+      "acc": true
+    },
+    "mmlu_college_chemistry": {
+      "acc": true
+    },
+    "mmlu_college_computer_science": {
+      "acc": true
+    },
+    "mmlu_college_mathematics": {
+      "acc": true
+    },
+    "mmlu_college_medicine": {
+      "acc": true
+    },
+    "mmlu_college_physics": {
+      "acc": true
+    },
+    "mmlu_computer_security": {
+      "acc": true
+    },
+    "mmlu_conceptual_physics": {
+      "acc": true
+    },
+    "mmlu_econometrics": {
+      "acc": true
+    },
+    "mmlu_electrical_engineering": {
+      "acc": true
+    },
+    "mmlu_elementary_mathematics": {
+      "acc": true
+    },
+    "mmlu_formal_logic": {
+      "acc": true
+    },
+    "mmlu_global_facts": {
+      "acc": true
+    },
+    "mmlu_high_school_biology": {
+      "acc": true
+    },
+    "mmlu_high_school_chemistry": {
+      "acc": true
+    },
+    "mmlu_high_school_computer_science": {
+      "acc": true
+    },
+    "mmlu_high_school_european_history": {
+      "acc": true
+    },
+    "mmlu_high_school_geography": {
+      "acc": true
+    },
+    "mmlu_high_school_government_and_politics": {
+      "acc": true
+    },
+    "mmlu_high_school_macroeconomics": {
+      "acc": true
+    },
+    "mmlu_high_school_mathematics": {
+      "acc": true
+    },
+    "mmlu_high_school_microeconomics": {
+      "acc": true
+    },
+    "mmlu_high_school_physics": {
+      "acc": true
+    },
+    "mmlu_high_school_psychology": {
+      "acc": true
+    },
+    "mmlu_high_school_statistics": {
+      "acc": true
+    },
+    "mmlu_high_school_us_history": {
+      "acc": true
+    },
+    "mmlu_high_school_world_history": {
+      "acc": true
+    },
+    "mmlu_human_aging": {
+      "acc": true
+    },
+    "mmlu_human_sexuality": {
+      "acc": true
+    },
+    "mmlu_humanities": {
+      "acc": true
+    },
+    "mmlu_international_law": {
+      "acc": true
+    },
+    "mmlu_jurisprudence": {
+      "acc": true
+    },
+    "mmlu_logical_fallacies": {
+      "acc": true
+    },
+    "mmlu_machine_learning": {
+      "acc": true
+    },
+    "mmlu_management": {
+      "acc": true
+    },
+    "mmlu_marketing": {
+      "acc": true
+    },
+    "mmlu_medical_genetics": {
+      "acc": true
+    },
+    "mmlu_miscellaneous": {
+      "acc": true
+    },
+    "mmlu_moral_disputes": {
+      "acc": true
+    },
+    "mmlu_moral_scenarios": {
+      "acc": true
+    },
+    "mmlu_nutrition": {
+      "acc": true
+    },
+    "mmlu_other": {
+      "acc": true
+    },
+    "mmlu_philosophy": {
+      "acc": true
+    },
+    "mmlu_prehistory": {
+      "acc": true
+    },
+    "mmlu_professional_accounting": {
+      "acc": true
+    },
+    "mmlu_professional_law": {
+      "acc": true
+    },
+    "mmlu_professional_medicine": {
+      "acc": true
+    },
+    "mmlu_professional_psychology": {
+      "acc": true
+    },
+    "mmlu_public_relations": {
+      "acc": true
+    },
+    "mmlu_security_studies": {
+      "acc": true
+    },
+    "mmlu_social_sciences": {
+      "acc": true
+    },
+    "mmlu_sociology": {
+      "acc": true
+    },
+    "mmlu_stem": {
+      "acc": true
+    },
+    "mmlu_us_foreign_policy": {
+      "acc": true
+    },
+    "mmlu_virology": {
+      "acc": true
+    },
+    "mmlu_world_religions": {
+      "acc": true
+    },
+    "winogrande": {
+      "acc": true
+    }
+  },
+  "n-samples": {
+    "kobest_hellaswag": {
+      "original": 500,
+      "effective": 400
+    },
+    "kobest_copa": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kobest_boolq": {
+      "original": 1404,
+      "effective": 400
+    },
+    "kmmlu_biology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_chemical_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_chemistry": {
+      "original": 600,
+      "effective": 400
+    },
+    "kmmlu_civil_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_computer_science": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_ecology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_electrical_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_information_technology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_materials_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_math": {
+      "original": 300,
+      "effective": 300
+    },
+    "kmmlu_mechanical_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_agricultural_sciences": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_construction": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_fashion": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_food_processing": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_health": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_interior_architecture_and_design": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_marketing": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_patent": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_public_safety": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_real_estate": {
+      "original": 200,
+      "effective": 200
+    },
+    "kmmlu_refrigerating_machinery": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_aviation_engineering_and_maintenance": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_electronics_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_energy_management": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_environmental_science": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_gas_technology_and_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_geomatics": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_industrial_engineer": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_machine_design_and_manufacturing": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_maritime_engineering": {
+      "original": 600,
+      "effective": 400
+    },
+    "kmmlu_nondestructive_testing": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_railway_and_automotive_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_telecommunications_and_wireless_technology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_accounting": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_criminal_law": {
+      "original": 200,
+      "effective": 200
+    },
+    "kmmlu_economics": {
+      "original": 130,
+      "effective": 130
+    },
+    "kmmlu_education": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_korean_history": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_law": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_management": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_political_science_and_sociology": {
+      "original": 300,
+      "effective": 300
+    },
+    "kmmlu_psychology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_social_welfare": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_taxation": {
+      "original": 200,
+      "effective": 200
+    },
+    "winogrande": {
+      "original": 1267,
+      "effective": 400
+    },
+    "arc_challenge": {
+      "original": 1172,
+      "effective": 400
+    },
+    "arc_easy": {
+      "original": 2376,
+      "effective": 400
+    },
+    "hellaswag": {
+      "original": 10042,
+      "effective": 400
+    },
+    "mmlu_abstract_algebra": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_anatomy": {
+      "original": 135,
+      "effective": 135
+    },
+    "mmlu_astronomy": {
+      "original": 152,
+      "effective": 152
+    },
+    "mmlu_college_biology": {
+      "original": 144,
+      "effective": 144
+    },
+    "mmlu_college_chemistry": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_college_computer_science": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_college_mathematics": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_college_physics": {
+      "original": 102,
+      "effective": 102
+    },
+    "mmlu_computer_security": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_conceptual_physics": {
+      "original": 235,
+      "effective": 235
+    },
+    "mmlu_electrical_engineering": {
+      "original": 145,
+      "effective": 145
+    },
+    "mmlu_elementary_mathematics": {
+      "original": 378,
+      "effective": 378
+    },
+    "mmlu_high_school_biology": {
+      "original": 310,
+      "effective": 310
+    },
+    "mmlu_high_school_chemistry": {
+      "original": 203,
+      "effective": 203
+    },
+    "mmlu_high_school_computer_science": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_high_school_mathematics": {
+      "original": 270,
+      "effective": 270
+    },
+    "mmlu_high_school_physics": {
+      "original": 151,
+      "effective": 151
+    },
+    "mmlu_high_school_statistics": {
+      "original": 216,
+      "effective": 216
+    },
+    "mmlu_machine_learning": {
+      "original": 112,
+      "effective": 112
+    },
+    "mmlu_business_ethics": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_clinical_knowledge": {
+      "original": 265,
+      "effective": 265
+    },
+    "mmlu_college_medicine": {
+      "original": 173,
+      "effective": 173
+    },
+    "mmlu_global_facts": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_human_aging": {
+      "original": 223,
+      "effective": 223
+    },
+    "mmlu_management": {
+      "original": 103,
+      "effective": 103
+    },
+    "mmlu_marketing": {
+      "original": 234,
+      "effective": 234
+    },
+    "mmlu_medical_genetics": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_miscellaneous": {
+      "original": 783,
+      "effective": 400
+    },
+    "mmlu_nutrition": {
+      "original": 306,
+      "effective": 306
+    },
+    "mmlu_professional_accounting": {
+      "original": 282,
+      "effective": 282
+    },
+    "mmlu_professional_medicine": {
+      "original": 272,
+      "effective": 272
+    },
+    "mmlu_virology": {
+      "original": 166,
+      "effective": 166
+    },
+    "mmlu_econometrics": {
+      "original": 114,
+      "effective": 114
+    },
+    "mmlu_high_school_geography": {
+      "original": 198,
+      "effective": 198
+    },
+    "mmlu_high_school_government_and_politics": {
+      "original": 193,
+      "effective": 193
+    },
+    "mmlu_high_school_macroeconomics": {
+      "original": 390,
+      "effective": 390
+    },
+    "mmlu_high_school_microeconomics": {
+      "original": 238,
+      "effective": 238
+    },
+    "mmlu_high_school_psychology": {
+      "original": 545,
+      "effective": 400
+    },
+    "mmlu_human_sexuality": {
+      "original": 131,
+      "effective": 131
+    },
+    "mmlu_professional_psychology": {
+      "original": 612,
+      "effective": 400
+    },
+    "mmlu_public_relations": {
+      "original": 110,
+      "effective": 110
+    },
+    "mmlu_security_studies": {
+      "original": 245,
+      "effective": 245
+    },
+    "mmlu_sociology": {
+      "original": 201,
+      "effective": 201
+    },
+    "mmlu_us_foreign_policy": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_formal_logic": {
+      "original": 126,
+      "effective": 126
+    },
+    "mmlu_high_school_european_history": {
+      "original": 165,
+      "effective": 165
+    },
+    "mmlu_high_school_us_history": {
+      "original": 204,
+      "effective": 204
+    },
+    "mmlu_high_school_world_history": {
+      "original": 237,
+      "effective": 237
+    },
+    "mmlu_international_law": {
+      "original": 121,
+      "effective": 121
+    },
+    "mmlu_jurisprudence": {
+      "original": 108,
+      "effective": 108
+    },
+    "mmlu_logical_fallacies": {
+      "original": 163,
+      "effective": 163
+    },
+    "mmlu_moral_disputes": {
+      "original": 346,
+      "effective": 346
+    },
+    "mmlu_moral_scenarios": {
+      "original": 895,
+      "effective": 400
+    },
+    "mmlu_philosophy": {
+      "original": 311,
+      "effective": 311
+    },
+    "mmlu_prehistory": {
+      "original": 324,
+      "effective": 324
+    },
+    "mmlu_professional_law": {
+      "original": 1534,
+      "effective": 400
+    },
+    "mmlu_world_religions": {
+      "original": 171,
+      "effective": 171
+    }
+  },
+  "config": {
+    "model": "hf",
+    "model_args": {
+      "pretrained": "unsloth/Qwen3-4B-Base",
+      "trust_remote_code": true
+    },
+    "model_num_parameters": 4022468096,
+    "model_dtype": "torch.bfloat16",
+    "model_revision": "main",
+    "model_sha": "0573b584bc6b32adc84bb9c91bf9b71bea71fc40",
+    "batch_size": "12",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "use_cache": null,
+    "limit": 400.0,
+    "bootstrap_iters": 100000,
+    "gen_kwargs": {},
+    "random_seed": 0,
+    "numpy_seed": 1234,
+    "torch_seed": 1234,
+    "fewshot_seed": 1234
+  },
+  "git_hash": "0ce43af",
+  "date": 1775962695.520946,
+  "pretty_env_info": "PyTorch version: 2.9.0+cu128\nIs debug build: False\nCUDA used to build PyTorch: 12.8\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.5 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 4.1.0\nLibc version: glibc-2.35\n\nPython version: 3.11.14 | packaged by conda-forge | (main, Oct 13 2025, 14:09:32) [GCC 14.3.0] (64-bit runtime)\nPython platform: Linux-6.8.0-90-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.8.93\nCUDA_MODULE_LOADING set to: \nGPU models and configuration: GPU 0: NVIDIA RTX PRO 6000 Blackwell Workstation Edition\nNvidia driver version: 590.48.01\ncuDNN version: Probably one of the following:\n/usr/lib/x86_64-linux-gnu/libcudnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_runtime_compiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_graph.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_heuristic.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops.so.9.8.0\nIs XPU available: False\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:                         x86_64\nCPU op-mode(s):                       32-bit, 64-bit\nAddress sizes:                        43 bits physical, 48 bits virtual\nByte Order:                           Little Endian\nCPU(s):                               192\nOn-line CPU(s) list:                  0-191\nVendor ID:                            AuthenticAMD\nModel name:                           AMD EPYC 7642 48-Core Processor\nCPU family:                           23\nModel:                                49\nThread(s) per core:                   2\nCore(s) per socket:                   48\nSocket(s):                            2\nStepping:                             0\nFrequency boost:                      enabled\nCPU max MHz:                          2300.0000\nCPU min MHz:                          1500.0000\nBogoMIPS:                             4600.15\nFlags:                                fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sev sev_es ibpb_exit_to_user\nVirtualization:                       AMD-V\nL1d cache:                            3 MiB (96 instances)\nL1i cache:                            3 MiB (96 instances)\nL2 cache:                             48 MiB (96 instances)\nL3 cache:                             512 MiB (32 instances)\nNUMA node(s):                         2\nNUMA node0 CPU(s):                    0-47,96-143\nNUMA node1 CPU(s):                    48-95,144-191\nVulnerability Gather data sampling:   Not affected\nVulnerability Itlb multihit:          Not affected\nVulnerability L1tf:                   Not affected\nVulnerability Mds:                    Not affected\nVulnerability Meltdown:               Not affected\nVulnerability Mmio stale data:        Not affected\nVulnerability Reg file data sampling: Not affected\nVulnerability Retbleed:               Mitigation; untrained return thunk; SMT enabled with STIBP protection\nVulnerability Spec rstack overflow:   Mitigation; Safe RET\nVulnerability Spec store bypass:      Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1:             Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:             Mitigation; Retpolines; IBPB conditional; STIBP always-on; RSB filling; PBRSB-eIBRS Not affected; BHI Not affected\nVulnerability Srbds:                  Not affected\nVulnerability Tsx async abort:        Not affected\nVulnerability Vmscape:                Mitigation; IBPB before exit to userspace\n\nVersions of relevant libraries:\n[pip3] executorch==1.0.1\n[pip3] numpy==2.2.6\n[pip3] nvidia-cublas-cu12==12.8.4.1\n[pip3] nvidia-cuda-cupti-cu12==12.8.90\n[pip3] nvidia-cuda-nvrtc-cu12==12.8.93\n[pip3] nvidia-cuda-runtime-cu12==12.8.90\n[pip3] nvidia-cudnn-cu12==9.10.2.21\n[pip3] nvidia-cudnn-frontend==1.17.0\n[pip3] nvidia-cufft-cu12==11.3.3.83\n[pip3] nvidia-curand-cu12==10.3.9.90\n[pip3] nvidia-cusolver-cu12==11.7.3.90\n[pip3] nvidia-cusparse-cu12==12.5.8.93\n[pip3] nvidia-cusparselt-cu12==0.7.1\n[pip3] nvidia-nccl-cu12==2.27.5\n[pip3] nvidia-nvjitlink-cu12==12.8.93\n[pip3] nvidia-nvtx-cu12==12.8.90\n[pip3] optree==0.17.0\n[pip3] pytorch_tokenizers==1.0.1\n[pip3] torch==2.9.0+cu128\n[pip3] torch_c_dlpack_ext==0.1.4\n[pip3] torch-stoi==0.2.3\n[pip3] torchao==0.14.0\n[pip3] torchaudio==2.9.0+cu128\n[pip3] torchcodec==0.9.1\n[pip3] torchelastic==0.2.2\n[pip3] torchvision==0.24.0+cu128\n[pip3] triton==3.5.0\n[pip3] triton_kernels==1.0.0\n[conda] No relevant packages",
+  "transformers_version": "5.5.3",
+  "lm_eval_version": "0.4.11",
+  "upper_git_hash": null,
+  "tokenizer_pad_token": [
+    "<|vision_pad|>",
+    "151654"
+  ],
+  "tokenizer_eos_token": [
+    "<|endoftext|>",
+    "151643"
+  ],
+  "tokenizer_bos_token": [
+    null,
+    "None"
+  ],
+  "eot_token_id": 151643,
+  "max_length": 32768,
+  "task_hashes": {},
+  "model_source": "hf",
+  "model_name": "unsloth/Qwen3-4B-Base",
+  "model_name_sanitized": "unsloth__Qwen3-4B-Base",
+  "system_instruction": null,
+  "system_instruction_sha": null,
+  "fewshot_as_multiturn": null,
+  "chat_template": null,
+  "chat_template_sha": null,
+  "total_evaluation_time_seconds": "573.7631184216589"
+}
\ No newline at end of file
diff --git a/eval/lm_eval/checkpoints/cpt/__home__unsloth__scp_stage1_cpt__artifacts__cpt_full_96gb_qwen3_4b__checkpoints/results_2026-04-12T02-57-53.684526.json b/eval/lm_eval/checkpoints/cpt/__home__unsloth__scp_stage1_cpt__artifacts__cpt_full_96gb_qwen3_4b__checkpoints/results_2026-04-12T02-57-53.684526.json
new file mode 100644
index 0000000..2b94719
--- /dev/null
+++ b/eval/lm_eval/checkpoints/cpt/__home__unsloth__scp_stage1_cpt__artifacts__cpt_full_96gb_qwen3_4b__checkpoints/results_2026-04-12T02-57-53.684526.json
@@ -0,0 +1,7825 @@
+{
+  "results": {
+    "arc_challenge": {
+      "alias": "arc_challenge",
+      "acc,none": 0.4825,
+      "acc_stderr,none": 0.025015972341295333,
+      "acc_norm,none": 0.5325,
+      "acc_norm_stderr,none": 0.024978374105060028
+    },
+    "arc_easy": {
+      "alias": "arc_easy",
+      "acc,none": 0.78,
+      "acc_stderr,none": 0.020738254217024313,
+      "acc_norm,none": 0.795,
+      "acc_norm_stderr,none": 0.020210359883399975
+    },
+    "hellaswag": {
+      "alias": "hellaswag",
+      "acc,none": 0.4975,
+      "acc_stderr,none": 0.025030995822773405,
+      "acc_norm,none": 0.63,
+      "acc_norm_stderr,none": 0.024170447375168467
+    },
+    "kmmlu": {
+      "acc,none": 0.4692806221646144,
+      "acc_stderr,none": 0.0039182515413587,
+      "alias": "kmmlu"
+    },
+    "kmmlu_applied_science": {
+      "acc,none": 0.45375,
+      "acc_stderr,none": 0.007111885914543827,
+      "alias": " - kmmlu_applied_science"
+    },
+    "kmmlu_aviation_engineering_and_maintenance": {
+      "alias": "  - kmmlu_aviation_engineering_and_maintenance",
+      "acc,none": 0.46,
+      "acc_stderr,none": 0.024951079956135092
+    },
+    "kmmlu_electronics_engineering": {
+      "alias": "  - kmmlu_electronics_engineering",
+      "acc,none": 0.6275,
+      "acc_stderr,none": 0.0242038000082031
+    },
+    "kmmlu_energy_management": {
+      "alias": "  - kmmlu_energy_management",
+      "acc,none": 0.395,
+      "acc_stderr,none": 0.0244731452227279
+    },
+    "kmmlu_environmental_science": {
+      "alias": "  - kmmlu_environmental_science",
+      "acc,none": 0.37,
+      "acc_stderr,none": 0.024170447375168453
+    },
+    "kmmlu_gas_technology_and_engineering": {
+      "alias": "  - kmmlu_gas_technology_and_engineering",
+      "acc,none": 0.405,
+      "acc_stderr,none": 0.024575340657273674
+    },
+    "kmmlu_geomatics": {
+      "alias": "  - kmmlu_geomatics",
+      "acc,none": 0.425,
+      "acc_stderr,none": 0.024748104405776187
+    },
+    "kmmlu_industrial_engineer": {
+      "alias": "  - kmmlu_industrial_engineer",
+      "acc,none": 0.4275,
+      "acc_stderr,none": 0.024766769210836766
+    },
+    "kmmlu_machine_design_and_manufacturing": {
+      "alias": "  - kmmlu_machine_design_and_manufacturing",
+      "acc,none": 0.4975,
+      "acc_stderr,none": 0.025030995822773395
+    },
+    "kmmlu_maritime_engineering": {
+      "alias": "  - kmmlu_maritime_engineering",
+      "acc,none": 0.4075,
+      "acc_stderr,none": 0.02459923129797198
+    },
+    "kmmlu_nondestructive_testing": {
+      "alias": "  - kmmlu_nondestructive_testing",
+      "acc,none": 0.475,
+      "acc_stderr,none": 0.024999999999999994
+    },
+    "kmmlu_railway_and_automotive_engineering": {
+      "alias": "  - kmmlu_railway_and_automotive_engineering",
+      "acc,none": 0.3825,
+      "acc_stderr,none": 0.024330316186072946
+    },
+    "kmmlu_telecommunications_and_wireless_technology": {
+      "alias": "  - kmmlu_telecommunications_and_wireless_technology",
+      "acc,none": 0.5725,
+      "acc_stderr,none": 0.02476676921083677
+    },
+    "kmmlu_humss": {
+      "acc,none": 0.4776556776556777,
+      "acc_stderr,none": 0.00943997794327789,
+      "alias": " - kmmlu_humss"
+    },
+    "kmmlu_accounting": {
+      "alias": "  - kmmlu_accounting",
+      "acc,none": 0.5,
+      "acc_stderr,none": 0.050251890762960605
+    },
+    "kmmlu_criminal_law": {
+      "alias": "  - kmmlu_criminal_law",
+      "acc,none": 0.39,
+      "acc_stderr,none": 0.03457567623250012
+    },
+    "kmmlu_economics": {
+      "alias": "  - kmmlu_economics",
+      "acc,none": 0.5461538461538461,
+      "acc_stderr,none": 0.04383459241436368
+    },
+    "kmmlu_education": {
+      "alias": "  - kmmlu_education",
+      "acc,none": 0.64,
+      "acc_stderr,none": 0.048241815132442176
+    },
+    "kmmlu_korean_history": {
+      "alias": "  - kmmlu_korean_history",
+      "acc,none": 0.3,
+      "acc_stderr,none": 0.046056618647183814
+    },
+    "kmmlu_law": {
+      "alias": "  - kmmlu_law",
+      "acc,none": 0.375,
+      "acc_stderr,none": 0.02423646044779629
+    },
+    "kmmlu_management": {
+      "alias": "  - kmmlu_management",
+      "acc,none": 0.5225,
+      "acc_stderr,none": 0.02500595167250431
+    },
+    "kmmlu_political_science_and_sociology": {
+      "alias": "  - kmmlu_political_science_and_sociology",
+      "acc,none": 0.55,
+      "acc_stderr,none": 0.02877080459987894
+    },
+    "kmmlu_psychology": {
+      "alias": "  - kmmlu_psychology",
+      "acc,none": 0.45,
+      "acc_stderr,none": 0.024905837706844923
+    },
+    "kmmlu_social_welfare": {
+      "alias": "  - kmmlu_social_welfare",
+      "acc,none": 0.57,
+      "acc_stderr,none": 0.02478478796128207
+    },
+    "kmmlu_taxation": {
+      "alias": "  - kmmlu_taxation",
+      "acc,none": 0.395,
+      "acc_stderr,none": 0.03465370682892271
+    },
+    "kmmlu_other": {
+      "acc,none": 0.4697222222222222,
+      "acc_stderr,none": 0.008043980393376315,
+      "alias": " - kmmlu_other"
+    },
+    "kmmlu_agricultural_sciences": {
+      "alias": "  - kmmlu_agricultural_sciences",
+      "acc,none": 0.3625,
+      "acc_stderr,none": 0.024066207238097735
+    },
+    "kmmlu_construction": {
+      "alias": "  - kmmlu_construction",
+      "acc,none": 0.4,
+      "acc_stderr,none": 0.024525573579398552
+    },
+    "kmmlu_fashion": {
+      "alias": "  - kmmlu_fashion",
+      "acc,none": 0.45,
+      "acc_stderr,none": 0.024905837706844923
+    },
+    "kmmlu_food_processing": {
+      "alias": "  - kmmlu_food_processing",
+      "acc,none": 0.3675,
+      "acc_stderr,none": 0.024136399679191744
+    },
+    "kmmlu_health": {
+      "alias": "  - kmmlu_health",
+      "acc,none": 0.58,
+      "acc_stderr,none": 0.049604496374885836
+    },
+    "kmmlu_interior_architecture_and_design": {
+      "alias": "  - kmmlu_interior_architecture_and_design",
+      "acc,none": 0.6175,
+      "acc_stderr,none": 0.024330316186072936
+    },
+    "kmmlu_marketing": {
+      "alias": "  - kmmlu_marketing",
+      "acc,none": 0.765,
+      "acc_stderr,none": 0.021226490755055
+    },
+    "kmmlu_patent": {
+      "alias": "  - kmmlu_patent",
+      "acc,none": 0.42,
+      "acc_stderr,none": 0.049604496374885836
+    },
+    "kmmlu_public_safety": {
+      "alias": "  - kmmlu_public_safety",
+      "acc,none": 0.38,
+      "acc_stderr,none": 0.024299715851758236
+    },
+    "kmmlu_real_estate": {
+      "alias": "  - kmmlu_real_estate",
+      "acc,none": 0.45,
+      "acc_stderr,none": 0.03526639466921485
+    },
+    "kmmlu_refrigerating_machinery": {
+      "alias": "  - kmmlu_refrigerating_machinery",
+      "acc,none": 0.41,
+      "acc_stderr,none": 0.02462246259333947
+    },
+    "kmmlu_stem": {
+      "acc,none": 0.48093023255813955,
+      "acc_stderr,none": 0.007306868046626305,
+      "alias": " - kmmlu_stem"
+    },
+    "kmmlu_biology": {
+      "alias": "  - kmmlu_biology",
+      "acc,none": 0.3125,
+      "acc_stderr,none": 0.023204644228784484
+    },
+    "kmmlu_chemical_engineering": {
+      "alias": "  - kmmlu_chemical_engineering",
+      "acc,none": 0.4875,
+      "acc_stderr,none": 0.025023485209500245
+    },
+    "kmmlu_chemistry": {
+      "alias": "  - kmmlu_chemistry",
+      "acc,none": 0.5175,
+      "acc_stderr,none": 0.025015972341295323
+    },
+    "kmmlu_civil_engineering": {
+      "alias": "  - kmmlu_civil_engineering",
+      "acc,none": 0.3925,
+      "acc_stderr,none": 0.024445927747963322
+    },
+    "kmmlu_computer_science": {
+      "alias": "  - kmmlu_computer_science",
+      "acc,none": 0.74,
+      "acc_stderr,none": 0.021959178349484305
+    },
+    "kmmlu_ecology": {
+      "alias": "  - kmmlu_ecology",
+      "acc,none": 0.505,
+      "acc_stderr,none": 0.02503005711936146
+    },
+    "kmmlu_electrical_engineering": {
+      "alias": "  - kmmlu_electrical_engineering",
+      "acc,none": 0.3425,
+      "acc_stderr,none": 0.02375700661717548
+    },
+    "kmmlu_information_technology": {
+      "alias": "  - kmmlu_information_technology",
+      "acc,none": 0.7525,
+      "acc_stderr,none": 0.021605006729678956
+    },
+    "kmmlu_materials_engineering": {
+      "alias": "  - kmmlu_materials_engineering",
+      "acc,none": 0.475,
+      "acc_stderr,none": 0.025
+    },
+    "kmmlu_math": {
+      "alias": "  - kmmlu_math",
+      "acc,none": 0.3333333333333333,
+      "acc_stderr,none": 0.027262027336984393
+    },
+    "kmmlu_mechanical_engineering": {
+      "alias": "  - kmmlu_mechanical_engineering",
+      "acc,none": 0.395,
+      "acc_stderr,none": 0.0244731452227279
+    },
+    "kobest_boolq": {
+      "alias": "kobest_boolq",
+      "acc,none": 0.755,
+      "acc_stderr,none": 0.02153129097913247,
+      "f1,none": 0.7379609080456697,
+      "f1_stderr,none": "N/A"
+    },
+    "kobest_copa": {
+      "alias": "kobest_copa",
+      "acc,none": 0.6525,
+      "acc_stderr,none": 0.023838625698390636,
+      "f1,none": 0.6523935455233165,
+      "f1_stderr,none": "N/A"
+    },
+    "kobest_hellaswag": {
+      "alias": "kobest_hellaswag",
+      "acc,none": 0.4325,
+      "acc_stderr,none": 0.024802162065186355,
+      "f1,none": 0.4264529493583016,
+      "f1_stderr,none": "N/A",
+      "acc_norm,none": 0.565,
+      "acc_norm_stderr,none": 0.024818892876375884
+    },
+    "mmlu": {
+      "acc,none": 0.7352865587252634,
+      "acc_stderr,none": 0.003887849176172822,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "acc,none": 0.6862808842652796,
+      "acc_stderr,none": 0.0077616777391173045,
+      "alias": " - humanities"
+    },
+    "mmlu_formal_logic": {
+      "alias": "  - formal_logic",
+      "acc,none": 0.5873015873015873,
+      "acc_stderr,none": 0.04403438954768177
+    },
+    "mmlu_high_school_european_history": {
+      "alias": "  - high_school_european_history",
+      "acc,none": 0.7818181818181819,
+      "acc_stderr,none": 0.03225078108306289
+    },
+    "mmlu_high_school_us_history": {
+      "alias": "  - high_school_us_history",
+      "acc,none": 0.8186274509803921,
+      "acc_stderr,none": 0.02704462171947408
+    },
+    "mmlu_high_school_world_history": {
+      "alias": "  - high_school_world_history",
+      "acc,none": 0.8481012658227848,
+      "acc_stderr,none": 0.023363878096632453
+    },
+    "mmlu_international_law": {
+      "alias": "  - international_law",
+      "acc,none": 0.8264462809917356,
+      "acc_stderr,none": 0.0345727283691767
+    },
+    "mmlu_jurisprudence": {
+      "alias": "  - jurisprudence",
+      "acc,none": 0.8148148148148148,
+      "acc_stderr,none": 0.03755265865037183
+    },
+    "mmlu_logical_fallacies": {
+      "alias": "  - logical_fallacies",
+      "acc,none": 0.8466257668711656,
+      "acc_stderr,none": 0.02831160144143859
+    },
+    "mmlu_moral_disputes": {
+      "alias": "  - moral_disputes",
+      "acc,none": 0.7543352601156069,
+      "acc_stderr,none": 0.023176298203992005
+    },
+    "mmlu_moral_scenarios": {
+      "alias": "  - moral_scenarios",
+      "acc,none": 0.3225,
+      "acc_stderr,none": 0.023400926978618716
+    },
+    "mmlu_philosophy": {
+      "alias": "  - philosophy",
+      "acc,none": 0.7331189710610932,
+      "acc_stderr,none": 0.025122637608816636
+    },
+    "mmlu_prehistory": {
+      "alias": "  - prehistory",
+      "acc,none": 0.7870370370370371,
+      "acc_stderr,none": 0.02277971908873339
+    },
+    "mmlu_professional_law": {
+      "alias": "  - professional_law",
+      "acc,none": 0.5075,
+      "acc_stderr,none": 0.02502849253543831
+    },
+    "mmlu_world_religions": {
+      "alias": "  - world_religions",
+      "acc,none": 0.8070175438596491,
+      "acc_stderr,none": 0.030267457554898458
+    },
+    "mmlu_other": {
+      "acc,none": 0.7415565345080763,
+      "acc_stderr,none": 0.008104267812218218,
+      "alias": " - other"
+    },
+    "mmlu_business_ethics": {
+      "alias": "  - business_ethics",
+      "acc,none": 0.76,
+      "acc_stderr,none": 0.04292346959909282
+    },
+    "mmlu_clinical_knowledge": {
+      "alias": "  - clinical_knowledge",
+      "acc,none": 0.769811320754717,
+      "acc_stderr,none": 0.025907897122408173
+    },
+    "mmlu_college_medicine": {
+      "alias": "  - college_medicine",
+      "acc,none": 0.7456647398843931,
+      "acc_stderr,none": 0.0332055644308557
+    },
+    "mmlu_global_facts": {
+      "alias": "  - global_facts",
+      "acc,none": 0.44,
+      "acc_stderr,none": 0.0498887651569859
+    },
+    "mmlu_human_aging": {
+      "alias": "  - human_aging",
+      "acc,none": 0.7399103139013453,
+      "acc_stderr,none": 0.029442495585857473
+    },
+    "mmlu_management": {
+      "alias": "  - management",
+      "acc,none": 0.8640776699029126,
+      "acc_stderr,none": 0.0339329572976101
+    },
+    "mmlu_marketing": {
+      "alias": "  - marketing",
+      "acc,none": 0.8931623931623932,
+      "acc_stderr,none": 0.020237149008990932
+    },
+    "mmlu_medical_genetics": {
+      "alias": "  - medical_genetics",
+      "acc,none": 0.8,
+      "acc_stderr,none": 0.04020151261036846
+    },
+    "mmlu_miscellaneous": {
+      "alias": "  - miscellaneous",
+      "acc,none": 0.8225,
+      "acc_stderr,none": 0.019128489820344343
+    },
+    "mmlu_nutrition": {
+      "alias": "  - nutrition",
+      "acc,none": 0.7777777777777778,
+      "acc_stderr,none": 0.02380518652488816
+    },
+    "mmlu_professional_accounting": {
+      "alias": "  - professional_accounting",
+      "acc,none": 0.574468085106383,
+      "acc_stderr,none": 0.029494827600144366
+    },
+    "mmlu_professional_medicine": {
+      "alias": "  - professional_medicine",
+      "acc,none": 0.7757352941176471,
+      "acc_stderr,none": 0.02533684856333236
+    },
+    "mmlu_virology": {
+      "alias": "  - virology",
+      "acc,none": 0.5060240963855421,
+      "acc_stderr,none": 0.038922121953330446
+    },
+    "mmlu_social_sciences": {
+      "acc,none": 0.8158088235294118,
+      "acc_stderr,none": 0.007306038192044323,
+      "alias": " - social sciences"
+    },
+    "mmlu_econometrics": {
+      "alias": "  - econometrics",
+      "acc,none": 0.6578947368421053,
+      "acc_stderr,none": 0.04462917535336937
+    },
+    "mmlu_high_school_geography": {
+      "alias": "  - high_school_geography",
+      "acc,none": 0.8585858585858586,
+      "acc_stderr,none": 0.02482590979334335
+    },
+    "mmlu_high_school_government_and_politics": {
+      "alias": "  - high_school_government_and_politics",
+      "acc,none": 0.8704663212435233,
+      "acc_stderr,none": 0.024233532297758716
+    },
+    "mmlu_high_school_macroeconomics": {
+      "alias": "  - high_school_macroeconomics",
+      "acc,none": 0.8076923076923077,
+      "acc_stderr,none": 0.019982347208637296
+    },
+    "mmlu_high_school_microeconomics": {
+      "alias": "  - high_school_microeconomics",
+      "acc,none": 0.8991596638655462,
+      "acc_stderr,none": 0.019559663430480802
+    },
+    "mmlu_high_school_psychology": {
+      "alias": "  - high_school_psychology",
+      "acc,none": 0.905,
+      "acc_stderr,none": 0.014679107277903242
+    },
+    "mmlu_human_sexuality": {
+      "alias": "  - human_sexuality",
+      "acc,none": 0.7786259541984732,
+      "acc_stderr,none": 0.03641297081313729
+    },
+    "mmlu_professional_psychology": {
+      "alias": "  - professional_psychology",
+      "acc,none": 0.74,
+      "acc_stderr,none": 0.02195917834948431
+    },
+    "mmlu_public_relations": {
+      "alias": "  - public_relations",
+      "acc,none": 0.6727272727272727,
+      "acc_stderr,none": 0.0449429086625209
+    },
+    "mmlu_security_studies": {
+      "alias": "  - security_studies",
+      "acc,none": 0.7428571428571429,
+      "acc_stderr,none": 0.027979823538744546
+    },
+    "mmlu_sociology": {
+      "alias": "  - sociology",
+      "acc,none": 0.8557213930348259,
+      "acc_stderr,none": 0.02484575321230605
+    },
+    "mmlu_us_foreign_policy": {
+      "alias": "  - us_foreign_policy",
+      "acc,none": 0.89,
+      "acc_stderr,none": 0.03144660377352203
+    },
+    "mmlu_stem": {
+      "acc,none": 0.7082143989850935,
+      "acc_stderr,none": 0.007816574368205405,
+      "alias": " - stem"
+    },
+    "mmlu_abstract_algebra": {
+      "alias": "  - abstract_algebra",
+      "acc,none": 0.46,
+      "acc_stderr,none": 0.05009082659620333
+    },
+    "mmlu_anatomy": {
+      "alias": "  - anatomy",
+      "acc,none": 0.7111111111111111,
+      "acc_stderr,none": 0.0391545063041425
+    },
+    "mmlu_astronomy": {
+      "alias": "  - astronomy",
+      "acc,none": 0.8486842105263158,
+      "acc_stderr,none": 0.029162631596843975
+    },
+    "mmlu_college_biology": {
+      "alias": "  - college_biology",
+      "acc,none": 0.8263888888888888,
+      "acc_stderr,none": 0.03167473383795717
+    },
+    "mmlu_college_chemistry": {
+      "alias": "  - college_chemistry",
+      "acc,none": 0.52,
+      "acc_stderr,none": 0.050211673156867795
+    },
+    "mmlu_college_computer_science": {
+      "alias": "  - college_computer_science",
+      "acc,none": 0.68,
+      "acc_stderr,none": 0.04688261722621504
+    },
+    "mmlu_college_mathematics": {
+      "alias": "  - college_mathematics",
+      "acc,none": 0.53,
+      "acc_stderr,none": 0.05016135580465919
+    },
+    "mmlu_college_physics": {
+      "alias": "  - college_physics",
+      "acc,none": 0.5784313725490197,
+      "acc_stderr,none": 0.049135952012745045
+    },
+    "mmlu_computer_security": {
+      "alias": "  - computer_security",
+      "acc,none": 0.83,
+      "acc_stderr,none": 0.03775251680686371
+    },
+    "mmlu_conceptual_physics": {
+      "alias": "  - conceptual_physics",
+      "acc,none": 0.8,
+      "acc_stderr,none": 0.026148818018424506
+    },
+    "mmlu_electrical_engineering": {
+      "alias": "  - electrical_engineering",
+      "acc,none": 0.7586206896551724,
+      "acc_stderr,none": 0.03565998174135302
+    },
+    "mmlu_elementary_mathematics": {
+      "alias": "  - elementary_mathematics",
+      "acc,none": 0.6746031746031746,
+      "acc_stderr,none": 0.024130158299762613
+    },
+    "mmlu_high_school_biology": {
+      "alias": "  - high_school_biology",
+      "acc,none": 0.9,
+      "acc_stderr,none": 0.017066403719657258
+    },
+    "mmlu_high_school_chemistry": {
+      "alias": "  - high_school_chemistry",
+      "acc,none": 0.729064039408867,
+      "acc_stderr,none": 0.03127090713297698
+    },
+    "mmlu_high_school_computer_science": {
+      "alias": "  - high_school_computer_science",
+      "acc,none": 0.85,
+      "acc_stderr,none": 0.0358870281282637
+    },
+    "mmlu_high_school_mathematics": {
+      "alias": "  - high_school_mathematics",
+      "acc,none": 0.5296296296296297,
+      "acc_stderr,none": 0.030431963547936584
+    },
+    "mmlu_high_school_physics": {
+      "alias": "  - high_school_physics",
+      "acc,none": 0.6754966887417219,
+      "acc_stderr,none": 0.03822746937658752
+    },
+    "mmlu_high_school_statistics": {
+      "alias": "  - high_school_statistics",
+      "acc,none": 0.7037037037037037,
+      "acc_stderr,none": 0.031141447823536044
+    },
+    "mmlu_machine_learning": {
+      "alias": "  - machine_learning",
+      "acc,none": 0.5892857142857143,
+      "acc_stderr,none": 0.04669510663875191
+    },
+    "winogrande": {
+      "alias": "winogrande",
+      "acc,none": 0.7225,
+      "acc_stderr,none": 0.022416302137144652
+    }
+  },
+  "groups": {
+    "kmmlu": {
+      "acc,none": 0.4692806221646144,
+      "acc_stderr,none": 0.0039182515413587,
+      "alias": "kmmlu"
+    },
+    "kmmlu_applied_science": {
+      "acc,none": 0.45375,
+      "acc_stderr,none": 0.007111885914543827,
+      "alias": " - kmmlu_applied_science"
+    },
+    "kmmlu_humss": {
+      "acc,none": 0.4776556776556777,
+      "acc_stderr,none": 0.00943997794327789,
+      "alias": " - kmmlu_humss"
+    },
+    "kmmlu_other": {
+      "acc,none": 0.4697222222222222,
+      "acc_stderr,none": 0.008043980393376315,
+      "alias": " - kmmlu_other"
+    },
+    "kmmlu_stem": {
+      "acc,none": 0.48093023255813955,
+      "acc_stderr,none": 0.007306868046626305,
+      "alias": " - kmmlu_stem"
+    },
+    "mmlu": {
+      "acc,none": 0.7352865587252634,
+      "acc_stderr,none": 0.003887849176172822,
+      "alias": "mmlu"
+    },
+    "mmlu_humanities": {
+      "acc,none": 0.6862808842652796,
+      "acc_stderr,none": 0.0077616777391173045,
+      "alias": " - humanities"
+    },
+    "mmlu_other": {
+      "acc,none": 0.7415565345080763,
+      "acc_stderr,none": 0.008104267812218218,
+      "alias": " - other"
+    },
+    "mmlu_social_sciences": {
+      "acc,none": 0.8158088235294118,
+      "acc_stderr,none": 0.007306038192044323,
+      "alias": " - social sciences"
+    },
+    "mmlu_stem": {
+      "acc,none": 0.7082143989850935,
+      "acc_stderr,none": 0.007816574368205405,
+      "alias": " - stem"
+    }
+  },
+  "group_subtasks": {
+    "mmlu_humanities": [
+      "mmlu_formal_logic",
+      "mmlu_high_school_european_history",
+      "mmlu_high_school_us_history",
+      "mmlu_high_school_world_history",
+      "mmlu_international_law",
+      "mmlu_jurisprudence",
+      "mmlu_logical_fallacies",
+      "mmlu_moral_disputes",
+      "mmlu_moral_scenarios",
+      "mmlu_philosophy",
+      "mmlu_prehistory",
+      "mmlu_professional_law",
+      "mmlu_world_religions"
+    ],
+    "mmlu_social_sciences": [
+      "mmlu_econometrics",
+      "mmlu_high_school_geography",
+      "mmlu_high_school_government_and_politics",
+      "mmlu_high_school_macroeconomics",
+      "mmlu_high_school_microeconomics",
+      "mmlu_high_school_psychology",
+      "mmlu_human_sexuality",
+      "mmlu_professional_psychology",
+      "mmlu_public_relations",
+      "mmlu_security_studies",
+      "mmlu_sociology",
+      "mmlu_us_foreign_policy"
+    ],
+    "mmlu_other": [
+      "mmlu_business_ethics",
+      "mmlu_clinical_knowledge",
+      "mmlu_college_medicine",
+      "mmlu_global_facts",
+      "mmlu_human_aging",
+      "mmlu_management",
+      "mmlu_marketing",
+      "mmlu_medical_genetics",
+      "mmlu_miscellaneous",
+      "mmlu_nutrition",
+      "mmlu_professional_accounting",
+      "mmlu_professional_medicine",
+      "mmlu_virology"
+    ],
+    "mmlu_stem": [
+      "mmlu_abstract_algebra",
+      "mmlu_anatomy",
+      "mmlu_astronomy",
+      "mmlu_college_biology",
+      "mmlu_college_chemistry",
+      "mmlu_college_computer_science",
+      "mmlu_college_mathematics",
+      "mmlu_college_physics",
+      "mmlu_computer_security",
+      "mmlu_conceptual_physics",
+      "mmlu_electrical_engineering",
+      "mmlu_elementary_mathematics",
+      "mmlu_high_school_biology",
+      "mmlu_high_school_chemistry",
+      "mmlu_high_school_computer_science",
+      "mmlu_high_school_mathematics",
+      "mmlu_high_school_physics",
+      "mmlu_high_school_statistics",
+      "mmlu_machine_learning"
+    ],
+    "mmlu": [
+      "mmlu_stem",
+      "mmlu_other",
+      "mmlu_social_sciences",
+      "mmlu_humanities"
+    ],
+    "hellaswag": [],
+    "arc_easy": [],
+    "arc_challenge": [],
+    "winogrande": [],
+    "kmmlu_humss": [
+      "kmmlu_accounting",
+      "kmmlu_criminal_law",
+      "kmmlu_economics",
+      "kmmlu_education",
+      "kmmlu_korean_history",
+      "kmmlu_law",
+      "kmmlu_management",
+      "kmmlu_political_science_and_sociology",
+      "kmmlu_psychology",
+      "kmmlu_social_welfare",
+      "kmmlu_taxation"
+    ],
+    "kmmlu_applied_science": [
+      "kmmlu_aviation_engineering_and_maintenance",
+      "kmmlu_electronics_engineering",
+      "kmmlu_energy_management",
+      "kmmlu_environmental_science",
+      "kmmlu_gas_technology_and_engineering",
+      "kmmlu_geomatics",
+      "kmmlu_industrial_engineer",
+      "kmmlu_machine_design_and_manufacturing",
+      "kmmlu_maritime_engineering",
+      "kmmlu_nondestructive_testing",
+      "kmmlu_railway_and_automotive_engineering",
+      "kmmlu_telecommunications_and_wireless_technology"
+    ],
+    "kmmlu_other": [
+      "kmmlu_agricultural_sciences",
+      "kmmlu_construction",
+      "kmmlu_fashion",
+      "kmmlu_food_processing",
+      "kmmlu_health",
+      "kmmlu_interior_architecture_and_design",
+      "kmmlu_marketing",
+      "kmmlu_patent",
+      "kmmlu_public_safety",
+      "kmmlu_real_estate",
+      "kmmlu_refrigerating_machinery"
+    ],
+    "kmmlu_stem": [
+      "kmmlu_biology",
+      "kmmlu_chemical_engineering",
+      "kmmlu_chemistry",
+      "kmmlu_civil_engineering",
+      "kmmlu_computer_science",
+      "kmmlu_ecology",
+      "kmmlu_electrical_engineering",
+      "kmmlu_information_technology",
+      "kmmlu_materials_engineering",
+      "kmmlu_math",
+      "kmmlu_mechanical_engineering"
+    ],
+    "kmmlu": [
+      "kmmlu_stem",
+      "kmmlu_other",
+      "kmmlu_applied_science",
+      "kmmlu_humss"
+    ],
+    "kobest_boolq": [],
+    "kobest_copa": [],
+    "kobest_hellaswag": []
+  },
+  "configs": {
+    "arc_challenge": {
+      "task": "arc_challenge",
+      "tag": [
+        "ai2_arc"
+      ],
+      "dataset_path": "allenai/ai2_arc",
+      "dataset_name": "ARC-Challenge",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "Question: {{question}}\nAnswer:",
+      "doc_to_target": "{{choices.label.index(answerKey)}}",
+      "unsafe_code": false,
+      "doc_to_choice": "{{choices.text}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "Question: {{question}}\nAnswer:",
+        "doc_to_choice": "{{choices.text}}",
+        "doc_to_target": "{{choices.label.index(answerKey)}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "arc_easy": {
+      "task": "arc_easy",
+      "tag": [
+        "ai2_arc"
+      ],
+      "dataset_path": "allenai/ai2_arc",
+      "dataset_name": "ARC-Easy",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "Question: {{question}}\nAnswer:",
+      "doc_to_target": "{{choices.label.index(answerKey)}}",
+      "unsafe_code": false,
+      "doc_to_choice": "{{choices.text}}",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "Question: {{question}}\nAnswer:",
+        "doc_to_choice": "{{choices.text}}",
+        "doc_to_target": "{{choices.label.index(answerKey)}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "Question: {{question}}\nAnswer:",
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "hellaswag": {
+      "task": "hellaswag",
+      "tag": [
+        "multiple_choice"
+      ],
+      "dataset_path": "Rowan/hellaswag",
+      "training_split": "train",
+      "validation_split": "validation",
+      "process_docs": "def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:\n    def _process_doc(doc):\n        ctx = doc[\"ctx_a\"] + \" \" + doc[\"ctx_b\"].capitalize()\n        out_doc = {\n            \"query\": preprocess(doc[\"activity_label\"] + \": \" + ctx),\n            \"choices\": [preprocess(ending) for ending in doc[\"endings\"]],\n            \"gold\": int(doc[\"label\"]),\n        }\n        return out_doc\n\n    return dataset.map(_process_doc)\n",
+      "doc_to_text": "{{query}}",
+      "doc_to_target": "{{label}}",
+      "unsafe_code": false,
+      "doc_to_choice": "choices",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": "<function process_docs at 0x77cd4ebe45e0>",
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{query}}",
+        "doc_to_choice": "choices",
+        "doc_to_target": "{{label}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_accounting": {
+      "task": "kmmlu_accounting",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Accounting",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_agricultural_sciences": {
+      "task": "kmmlu_agricultural_sciences",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Agricultural-Sciences",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_aviation_engineering_and_maintenance": {
+      "task": "kmmlu_aviation_engineering_and_maintenance",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Aviation-Engineering-and-Maintenance",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_biology": {
+      "task": "kmmlu_biology",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_chemical_engineering": {
+      "task": "kmmlu_chemical_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Chemical-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_chemistry": {
+      "task": "kmmlu_chemistry",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_civil_engineering": {
+      "task": "kmmlu_civil_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Civil-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_computer_science": {
+      "task": "kmmlu_computer_science",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Computer-Science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_construction": {
+      "task": "kmmlu_construction",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Construction",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_criminal_law": {
+      "task": "kmmlu_criminal_law",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Criminal-Law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_ecology": {
+      "task": "kmmlu_ecology",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Ecology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_economics": {
+      "task": "kmmlu_economics",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Economics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_education": {
+      "task": "kmmlu_education",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Education",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_electrical_engineering": {
+      "task": "kmmlu_electrical_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Electrical-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_electronics_engineering": {
+      "task": "kmmlu_electronics_engineering",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Electronics-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_energy_management": {
+      "task": "kmmlu_energy_management",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Energy-Management",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_environmental_science": {
+      "task": "kmmlu_environmental_science",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Environmental-Science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_fashion": {
+      "task": "kmmlu_fashion",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Fashion",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_food_processing": {
+      "task": "kmmlu_food_processing",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Food-Processing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_gas_technology_and_engineering": {
+      "task": "kmmlu_gas_technology_and_engineering",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Gas-Technology-and-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_geomatics": {
+      "task": "kmmlu_geomatics",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Geomatics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_health": {
+      "task": "kmmlu_health",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Health",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_industrial_engineer": {
+      "task": "kmmlu_industrial_engineer",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Industrial-Engineer",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_information_technology": {
+      "task": "kmmlu_information_technology",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Information-Technology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_interior_architecture_and_design": {
+      "task": "kmmlu_interior_architecture_and_design",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Interior-Architecture-and-Design",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_korean_history": {
+      "task": "kmmlu_korean_history",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Korean-History",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_law": {
+      "task": "kmmlu_law",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_machine_design_and_manufacturing": {
+      "task": "kmmlu_machine_design_and_manufacturing",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Machine-Design-and-Manufacturing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_management": {
+      "task": "kmmlu_management",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Management",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_maritime_engineering": {
+      "task": "kmmlu_maritime_engineering",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Maritime-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_marketing": {
+      "task": "kmmlu_marketing",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Marketing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_materials_engineering": {
+      "task": "kmmlu_materials_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Materials-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_math": {
+      "task": "kmmlu_math",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Math",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_mechanical_engineering": {
+      "task": "kmmlu_mechanical_engineering",
+      "tag": "kmmlu_stem_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Mechanical-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_nondestructive_testing": {
+      "task": "kmmlu_nondestructive_testing",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Nondestructive-Testing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_patent": {
+      "task": "kmmlu_patent",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Patent",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_political_science_and_sociology": {
+      "task": "kmmlu_political_science_and_sociology",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Political-Science-and-Sociology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_psychology": {
+      "task": "kmmlu_psychology",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_public_safety": {
+      "task": "kmmlu_public_safety",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Public-Safety",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_railway_and_automotive_engineering": {
+      "task": "kmmlu_railway_and_automotive_engineering",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Railway-and-Automotive-Engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_real_estate": {
+      "task": "kmmlu_real_estate",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Real-Estate",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_refrigerating_machinery": {
+      "task": "kmmlu_refrigerating_machinery",
+      "tag": "kmmlu_other_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Refrigerating-Machinery",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_social_welfare": {
+      "task": "kmmlu_social_welfare",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Social-Welfare",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_taxation": {
+      "task": "kmmlu_taxation",
+      "tag": "kmmlu_humss_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Taxation",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kmmlu_telecommunications_and_wireless_technology": {
+      "task": "kmmlu_telecommunications_and_wireless_technology",
+      "tag": "kmmlu_applied_science_tasks",
+      "dataset_path": "HAERAE-HUB/KMMLU",
+      "dataset_name": "Telecommunications-and-Wireless-Technology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+      "doc_to_target": "{{answer-1}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답：",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "{{answer-1}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 2.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kobest_boolq": {
+      "task": "kobest_boolq",
+      "dataset_path": "skt/kobest_v1",
+      "dataset_name": "boolq",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ",
+      "doc_to_target": "{{label}}",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "아니오",
+        "예"
+      ],
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{paragraph}} 질문: {{question}} 답변: ",
+        "doc_to_choice": [
+          "아니오",
+          "예"
+        ],
+        "doc_to_target": "{{label}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "f1",
+          "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+          "average": "macro",
+          "hf_evaluate": true,
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kobest_copa": {
+      "task": "kobest_copa",
+      "dataset_path": "skt/kobest_v1",
+      "dataset_name": "copa",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "doc_to_text": "def copa_doc_to_text(doc: dict) -> str:\n    connector = {\"원인\": \" 왜냐하면\", \"결과\": \" 그래서\"}[doc[\"question\"].strip()]\n    return f\"\"\"{doc[\"premise\"]} {connector}\"\"\"\n",
+      "doc_to_target": "def copa_doc_to_target(doc: dict) -> str:\n    correct_choice = doc[\"alternative_1\"] if doc[\"label\"] == 0 else doc[\"alternative_2\"]\n    return f\"\"\"{correct_choice}\"\"\"\n",
+      "unsafe_code": false,
+      "doc_to_choice": "def copa_doc_to_choice(doc: dict) -> list:\n    return [f\"\"\"{doc[\"alternative_1\"]}\"\"\", f\"\"\"{doc[\"alternative_2\"]}\"\"\"]\n",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "<function copa_doc_to_text at 0x77cb4011d8a0>",
+        "doc_to_choice": "<function copa_doc_to_choice at 0x77cb4011e3e0>",
+        "doc_to_target": "<function copa_doc_to_target at 0x77cb4011dee0>",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "f1",
+          "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+          "average": "macro",
+          "hf_evaluate": true,
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "kobest_hellaswag": {
+      "task": "kobest_hellaswag",
+      "dataset_path": "skt/kobest_v1",
+      "dataset_name": "hellaswag",
+      "training_split": "train",
+      "validation_split": "validation",
+      "test_split": "test",
+      "process_docs": "def hellaswag_process_doc(doc: Dataset) -> Dataset:\n    def preprocessor(dataset):\n        return {\n            \"query\": f\"\"\"문장: {dataset[\"context\"]}\"\"\",\n            \"choices\": [\n                dataset[\"ending_1\"],\n                dataset[\"ending_2\"],\n                dataset[\"ending_3\"],\n                dataset[\"ending_4\"],\n            ],\n            \"gold\": int(dataset[\"label\"]),\n        }\n\n    return doc.map(preprocessor)\n",
+      "doc_to_text": "{{query}}",
+      "doc_to_target": "{{label}}",
+      "unsafe_code": false,
+      "doc_to_choice": "choices",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": "<function hellaswag_process_doc at 0x77cb4011f060>",
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{query}}",
+        "doc_to_choice": "choices",
+        "doc_to_target": "{{label}}",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "acc_norm",
+          "aggregation": "mean",
+          "higher_is_better": true
+        },
+        {
+          "metric": "f1",
+          "aggregation": "def macro_f1_score(items):\n    from sklearn.metrics import f1_score\n\n    unzipped_list = list(zip(*items))\n    golds = unzipped_list[0]\n    preds = unzipped_list[1]\n    fscore = f1_score(golds, preds, average=\"macro\")\n    return fscore\n",
+          "average": "macro",
+          "hf_evaluate": true,
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_abstract_algebra": {
+      "task": "mmlu_abstract_algebra",
+      "task_alias": "abstract_algebra",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "abstract_algebra",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about abstract algebra.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_anatomy": {
+      "task": "mmlu_anatomy",
+      "task_alias": "anatomy",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "anatomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about anatomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_astronomy": {
+      "task": "mmlu_astronomy",
+      "task_alias": "astronomy",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "astronomy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about astronomy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_business_ethics": {
+      "task": "mmlu_business_ethics",
+      "task_alias": "business_ethics",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "business_ethics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about business ethics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_clinical_knowledge": {
+      "task": "mmlu_clinical_knowledge",
+      "task_alias": "clinical_knowledge",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "clinical_knowledge",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about clinical knowledge.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_biology": {
+      "task": "mmlu_college_biology",
+      "task_alias": "college_biology",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_chemistry": {
+      "task": "mmlu_college_chemistry",
+      "task_alias": "college_chemistry",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_computer_science": {
+      "task": "mmlu_college_computer_science",
+      "task_alias": "college_computer_science",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_mathematics": {
+      "task": "mmlu_college_mathematics",
+      "task_alias": "college_mathematics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_medicine": {
+      "task": "mmlu_college_medicine",
+      "task_alias": "college_medicine",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_college_physics": {
+      "task": "mmlu_college_physics",
+      "task_alias": "college_physics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "college_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about college physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_computer_security": {
+      "task": "mmlu_computer_security",
+      "task_alias": "computer_security",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "computer_security",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about computer security.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_conceptual_physics": {
+      "task": "mmlu_conceptual_physics",
+      "task_alias": "conceptual_physics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "conceptual_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about conceptual physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_econometrics": {
+      "task": "mmlu_econometrics",
+      "task_alias": "econometrics",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "econometrics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about econometrics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_electrical_engineering": {
+      "task": "mmlu_electrical_engineering",
+      "task_alias": "electrical_engineering",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "electrical_engineering",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about electrical engineering.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_elementary_mathematics": {
+      "task": "mmlu_elementary_mathematics",
+      "task_alias": "elementary_mathematics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "elementary_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about elementary mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_formal_logic": {
+      "task": "mmlu_formal_logic",
+      "task_alias": "formal_logic",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "formal_logic",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about formal logic.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_global_facts": {
+      "task": "mmlu_global_facts",
+      "task_alias": "global_facts",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "global_facts",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about global facts.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_biology": {
+      "task": "mmlu_high_school_biology",
+      "task_alias": "high_school_biology",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_biology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school biology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_chemistry": {
+      "task": "mmlu_high_school_chemistry",
+      "task_alias": "high_school_chemistry",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_chemistry",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school chemistry.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_computer_science": {
+      "task": "mmlu_high_school_computer_science",
+      "task_alias": "high_school_computer_science",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_computer_science",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school computer science.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_european_history": {
+      "task": "mmlu_high_school_european_history",
+      "task_alias": "high_school_european_history",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_european_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school european history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_geography": {
+      "task": "mmlu_high_school_geography",
+      "task_alias": "high_school_geography",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_geography",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school geography.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_government_and_politics": {
+      "task": "mmlu_high_school_government_and_politics",
+      "task_alias": "high_school_government_and_politics",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_government_and_politics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school government and politics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_macroeconomics": {
+      "task": "mmlu_high_school_macroeconomics",
+      "task_alias": "high_school_macroeconomics",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_macroeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school macroeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_mathematics": {
+      "task": "mmlu_high_school_mathematics",
+      "task_alias": "high_school_mathematics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_mathematics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school mathematics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_microeconomics": {
+      "task": "mmlu_high_school_microeconomics",
+      "task_alias": "high_school_microeconomics",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_microeconomics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school microeconomics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_physics": {
+      "task": "mmlu_high_school_physics",
+      "task_alias": "high_school_physics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_physics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school physics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_psychology": {
+      "task": "mmlu_high_school_psychology",
+      "task_alias": "high_school_psychology",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_statistics": {
+      "task": "mmlu_high_school_statistics",
+      "task_alias": "high_school_statistics",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_statistics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school statistics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_us_history": {
+      "task": "mmlu_high_school_us_history",
+      "task_alias": "high_school_us_history",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_us_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school us history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_high_school_world_history": {
+      "task": "mmlu_high_school_world_history",
+      "task_alias": "high_school_world_history",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "high_school_world_history",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about high school world history.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_human_aging": {
+      "task": "mmlu_human_aging",
+      "task_alias": "human_aging",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "human_aging",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human aging.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_human_sexuality": {
+      "task": "mmlu_human_sexuality",
+      "task_alias": "human_sexuality",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "human_sexuality",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about human sexuality.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_international_law": {
+      "task": "mmlu_international_law",
+      "task_alias": "international_law",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "international_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about international law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_jurisprudence": {
+      "task": "mmlu_jurisprudence",
+      "task_alias": "jurisprudence",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "jurisprudence",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about jurisprudence.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_logical_fallacies": {
+      "task": "mmlu_logical_fallacies",
+      "task_alias": "logical_fallacies",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "logical_fallacies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about logical fallacies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_machine_learning": {
+      "task": "mmlu_machine_learning",
+      "task_alias": "machine_learning",
+      "tag": "mmlu_stem_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "machine_learning",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about machine learning.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_management": {
+      "task": "mmlu_management",
+      "task_alias": "management",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "management",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about management.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_marketing": {
+      "task": "mmlu_marketing",
+      "task_alias": "marketing",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "marketing",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about marketing.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_medical_genetics": {
+      "task": "mmlu_medical_genetics",
+      "task_alias": "medical_genetics",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "medical_genetics",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about medical genetics.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_miscellaneous": {
+      "task": "mmlu_miscellaneous",
+      "task_alias": "miscellaneous",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "miscellaneous",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about miscellaneous.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_moral_disputes": {
+      "task": "mmlu_moral_disputes",
+      "task_alias": "moral_disputes",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "moral_disputes",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral disputes.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_moral_scenarios": {
+      "task": "mmlu_moral_scenarios",
+      "task_alias": "moral_scenarios",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "moral_scenarios",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about moral scenarios.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_nutrition": {
+      "task": "mmlu_nutrition",
+      "task_alias": "nutrition",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "nutrition",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about nutrition.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_philosophy": {
+      "task": "mmlu_philosophy",
+      "task_alias": "philosophy",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "philosophy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about philosophy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_prehistory": {
+      "task": "mmlu_prehistory",
+      "task_alias": "prehistory",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "prehistory",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about prehistory.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_professional_accounting": {
+      "task": "mmlu_professional_accounting",
+      "task_alias": "professional_accounting",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "professional_accounting",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional accounting.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_professional_law": {
+      "task": "mmlu_professional_law",
+      "task_alias": "professional_law",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "professional_law",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional law.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_professional_medicine": {
+      "task": "mmlu_professional_medicine",
+      "task_alias": "professional_medicine",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "professional_medicine",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional medicine.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_professional_psychology": {
+      "task": "mmlu_professional_psychology",
+      "task_alias": "professional_psychology",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "professional_psychology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about professional psychology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_public_relations": {
+      "task": "mmlu_public_relations",
+      "task_alias": "public_relations",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "public_relations",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about public relations.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_security_studies": {
+      "task": "mmlu_security_studies",
+      "task_alias": "security_studies",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "security_studies",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about security studies.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_sociology": {
+      "task": "mmlu_sociology",
+      "task_alias": "sociology",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "sociology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about sociology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_us_foreign_policy": {
+      "task": "mmlu_us_foreign_policy",
+      "task_alias": "us_foreign_policy",
+      "tag": "mmlu_social_sciences_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "us_foreign_policy",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about us foreign policy.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_virology": {
+      "task": "mmlu_virology",
+      "task_alias": "virology",
+      "tag": "mmlu_other_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "virology",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about virology.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "mmlu_world_religions": {
+      "task": "mmlu_world_religions",
+      "task_alias": "world_religions",
+      "tag": "mmlu_humanities_tasks",
+      "dataset_path": "cais/mmlu",
+      "dataset_name": "world_religions",
+      "test_split": "test",
+      "fewshot_split": "dev",
+      "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+      "doc_to_target": "answer",
+      "unsafe_code": false,
+      "doc_to_choice": [
+        "A",
+        "B",
+        "C",
+        "D"
+      ],
+      "description": "The following are multiple choice questions (with answers) about world religions.\n\n",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "first_n",
+        "split": "dev",
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:",
+        "doc_to_choice": [
+          "A",
+          "B",
+          "C",
+          "D"
+        ],
+        "doc_to_target": "answer",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": false,
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    },
+    "winogrande": {
+      "task": "winogrande",
+      "dataset_path": "allenai/winogrande",
+      "dataset_name": "winogrande_xl",
+      "training_split": "train",
+      "validation_split": "validation",
+      "doc_to_text": "def doc_to_text(doc):\n    answer_to_num = {\"1\": 0, \"2\": 1}\n    return answer_to_num[doc[\"answer\"]]\n",
+      "doc_to_target": "def doc_to_target(doc):\n    idx = doc[\"sentence\"].index(\"_\") + 1\n    return doc[\"sentence\"][idx:].strip()\n",
+      "unsafe_code": false,
+      "doc_to_choice": "def doc_to_choice(doc):\n    idx = doc[\"sentence\"].index(\"_\")\n    options = [doc[\"option1\"], doc[\"option2\"]]\n    return [doc[\"sentence\"][:idx] + opt for opt in options]\n",
+      "description": "",
+      "target_delimiter": " ",
+      "fewshot_delimiter": "\n\n",
+      "fewshot_config": {
+        "sampler": "default",
+        "split": null,
+        "process_docs": null,
+        "fewshot_indices": null,
+        "samples": null,
+        "doc_to_text": "<function doc_to_text at 0x77cb4f17d620>",
+        "doc_to_choice": "<function doc_to_choice at 0x77cb4f17dc60>",
+        "doc_to_target": "<function doc_to_target at 0x77cb4f17d9e0>",
+        "gen_prefix": null,
+        "fewshot_delimiter": "\n\n",
+        "target_delimiter": " "
+      },
+      "num_fewshot": 0,
+      "metric_list": [
+        {
+          "metric": "acc",
+          "aggregation": "mean",
+          "higher_is_better": true
+        }
+      ],
+      "output_type": "multiple_choice",
+      "repeats": 1,
+      "should_decontaminate": true,
+      "doc_to_decontamination_query": "sentence",
+      "metadata": {
+        "version": 1.0,
+        "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+        "trust_remote_code": true
+      }
+    }
+  },
+  "versions": {
+    "arc_challenge": 1.0,
+    "arc_easy": 1.0,
+    "hellaswag": 1.0,
+    "kmmlu": 2.0,
+    "kmmlu_accounting": 2.0,
+    "kmmlu_agricultural_sciences": 2.0,
+    "kmmlu_applied_science": 2.0,
+    "kmmlu_aviation_engineering_and_maintenance": 2.0,
+    "kmmlu_biology": 2.0,
+    "kmmlu_chemical_engineering": 2.0,
+    "kmmlu_chemistry": 2.0,
+    "kmmlu_civil_engineering": 2.0,
+    "kmmlu_computer_science": 2.0,
+    "kmmlu_construction": 2.0,
+    "kmmlu_criminal_law": 2.0,
+    "kmmlu_ecology": 2.0,
+    "kmmlu_economics": 2.0,
+    "kmmlu_education": 2.0,
+    "kmmlu_electrical_engineering": 2.0,
+    "kmmlu_electronics_engineering": 2.0,
+    "kmmlu_energy_management": 2.0,
+    "kmmlu_environmental_science": 2.0,
+    "kmmlu_fashion": 2.0,
+    "kmmlu_food_processing": 2.0,
+    "kmmlu_gas_technology_and_engineering": 2.0,
+    "kmmlu_geomatics": 2.0,
+    "kmmlu_health": 2.0,
+    "kmmlu_humss": 2.0,
+    "kmmlu_industrial_engineer": 2.0,
+    "kmmlu_information_technology": 2.0,
+    "kmmlu_interior_architecture_and_design": 2.0,
+    "kmmlu_korean_history": 2.0,
+    "kmmlu_law": 2.0,
+    "kmmlu_machine_design_and_manufacturing": 2.0,
+    "kmmlu_management": 2.0,
+    "kmmlu_maritime_engineering": 2.0,
+    "kmmlu_marketing": 2.0,
+    "kmmlu_materials_engineering": 2.0,
+    "kmmlu_math": 2.0,
+    "kmmlu_mechanical_engineering": 2.0,
+    "kmmlu_nondestructive_testing": 2.0,
+    "kmmlu_other": 2.0,
+    "kmmlu_patent": 2.0,
+    "kmmlu_political_science_and_sociology": 2.0,
+    "kmmlu_psychology": 2.0,
+    "kmmlu_public_safety": 2.0,
+    "kmmlu_railway_and_automotive_engineering": 2.0,
+    "kmmlu_real_estate": 2.0,
+    "kmmlu_refrigerating_machinery": 2.0,
+    "kmmlu_social_welfare": 2.0,
+    "kmmlu_stem": 2.0,
+    "kmmlu_taxation": 2.0,
+    "kmmlu_telecommunications_and_wireless_technology": 2.0,
+    "kobest_boolq": 1.0,
+    "kobest_copa": 1.0,
+    "kobest_hellaswag": 1.0,
+    "mmlu": 2,
+    "mmlu_abstract_algebra": 1.0,
+    "mmlu_anatomy": 1.0,
+    "mmlu_astronomy": 1.0,
+    "mmlu_business_ethics": 1.0,
+    "mmlu_clinical_knowledge": 1.0,
+    "mmlu_college_biology": 1.0,
+    "mmlu_college_chemistry": 1.0,
+    "mmlu_college_computer_science": 1.0,
+    "mmlu_college_mathematics": 1.0,
+    "mmlu_college_medicine": 1.0,
+    "mmlu_college_physics": 1.0,
+    "mmlu_computer_security": 1.0,
+    "mmlu_conceptual_physics": 1.0,
+    "mmlu_econometrics": 1.0,
+    "mmlu_electrical_engineering": 1.0,
+    "mmlu_elementary_mathematics": 1.0,
+    "mmlu_formal_logic": 1.0,
+    "mmlu_global_facts": 1.0,
+    "mmlu_high_school_biology": 1.0,
+    "mmlu_high_school_chemistry": 1.0,
+    "mmlu_high_school_computer_science": 1.0,
+    "mmlu_high_school_european_history": 1.0,
+    "mmlu_high_school_geography": 1.0,
+    "mmlu_high_school_government_and_politics": 1.0,
+    "mmlu_high_school_macroeconomics": 1.0,
+    "mmlu_high_school_mathematics": 1.0,
+    "mmlu_high_school_microeconomics": 1.0,
+    "mmlu_high_school_physics": 1.0,
+    "mmlu_high_school_psychology": 1.0,
+    "mmlu_high_school_statistics": 1.0,
+    "mmlu_high_school_us_history": 1.0,
+    "mmlu_high_school_world_history": 1.0,
+    "mmlu_human_aging": 1.0,
+    "mmlu_human_sexuality": 1.0,
+    "mmlu_humanities": 2,
+    "mmlu_international_law": 1.0,
+    "mmlu_jurisprudence": 1.0,
+    "mmlu_logical_fallacies": 1.0,
+    "mmlu_machine_learning": 1.0,
+    "mmlu_management": 1.0,
+    "mmlu_marketing": 1.0,
+    "mmlu_medical_genetics": 1.0,
+    "mmlu_miscellaneous": 1.0,
+    "mmlu_moral_disputes": 1.0,
+    "mmlu_moral_scenarios": 1.0,
+    "mmlu_nutrition": 1.0,
+    "mmlu_other": 2,
+    "mmlu_philosophy": 1.0,
+    "mmlu_prehistory": 1.0,
+    "mmlu_professional_accounting": 1.0,
+    "mmlu_professional_law": 1.0,
+    "mmlu_professional_medicine": 1.0,
+    "mmlu_professional_psychology": 1.0,
+    "mmlu_public_relations": 1.0,
+    "mmlu_security_studies": 1.0,
+    "mmlu_social_sciences": 2,
+    "mmlu_sociology": 1.0,
+    "mmlu_stem": 2,
+    "mmlu_us_foreign_policy": 1.0,
+    "mmlu_virology": 1.0,
+    "mmlu_world_religions": 1.0,
+    "winogrande": 1.0
+  },
+  "n-shot": {
+    "arc_challenge": 0,
+    "arc_easy": 0,
+    "hellaswag": 0,
+    "kmmlu_accounting": 0,
+    "kmmlu_agricultural_sciences": 0,
+    "kmmlu_aviation_engineering_and_maintenance": 0,
+    "kmmlu_biology": 0,
+    "kmmlu_chemical_engineering": 0,
+    "kmmlu_chemistry": 0,
+    "kmmlu_civil_engineering": 0,
+    "kmmlu_computer_science": 0,
+    "kmmlu_construction": 0,
+    "kmmlu_criminal_law": 0,
+    "kmmlu_ecology": 0,
+    "kmmlu_economics": 0,
+    "kmmlu_education": 0,
+    "kmmlu_electrical_engineering": 0,
+    "kmmlu_electronics_engineering": 0,
+    "kmmlu_energy_management": 0,
+    "kmmlu_environmental_science": 0,
+    "kmmlu_fashion": 0,
+    "kmmlu_food_processing": 0,
+    "kmmlu_gas_technology_and_engineering": 0,
+    "kmmlu_geomatics": 0,
+    "kmmlu_health": 0,
+    "kmmlu_industrial_engineer": 0,
+    "kmmlu_information_technology": 0,
+    "kmmlu_interior_architecture_and_design": 0,
+    "kmmlu_korean_history": 0,
+    "kmmlu_law": 0,
+    "kmmlu_machine_design_and_manufacturing": 0,
+    "kmmlu_management": 0,
+    "kmmlu_maritime_engineering": 0,
+    "kmmlu_marketing": 0,
+    "kmmlu_materials_engineering": 0,
+    "kmmlu_math": 0,
+    "kmmlu_mechanical_engineering": 0,
+    "kmmlu_nondestructive_testing": 0,
+    "kmmlu_patent": 0,
+    "kmmlu_political_science_and_sociology": 0,
+    "kmmlu_psychology": 0,
+    "kmmlu_public_safety": 0,
+    "kmmlu_railway_and_automotive_engineering": 0,
+    "kmmlu_real_estate": 0,
+    "kmmlu_refrigerating_machinery": 0,
+    "kmmlu_social_welfare": 0,
+    "kmmlu_taxation": 0,
+    "kmmlu_telecommunications_and_wireless_technology": 0,
+    "kobest_boolq": 0,
+    "kobest_copa": 0,
+    "kobest_hellaswag": 0,
+    "mmlu_abstract_algebra": 0,
+    "mmlu_anatomy": 0,
+    "mmlu_astronomy": 0,
+    "mmlu_business_ethics": 0,
+    "mmlu_clinical_knowledge": 0,
+    "mmlu_college_biology": 0,
+    "mmlu_college_chemistry": 0,
+    "mmlu_college_computer_science": 0,
+    "mmlu_college_mathematics": 0,
+    "mmlu_college_medicine": 0,
+    "mmlu_college_physics": 0,
+    "mmlu_computer_security": 0,
+    "mmlu_conceptual_physics": 0,
+    "mmlu_econometrics": 0,
+    "mmlu_electrical_engineering": 0,
+    "mmlu_elementary_mathematics": 0,
+    "mmlu_formal_logic": 0,
+    "mmlu_global_facts": 0,
+    "mmlu_high_school_biology": 0,
+    "mmlu_high_school_chemistry": 0,
+    "mmlu_high_school_computer_science": 0,
+    "mmlu_high_school_european_history": 0,
+    "mmlu_high_school_geography": 0,
+    "mmlu_high_school_government_and_politics": 0,
+    "mmlu_high_school_macroeconomics": 0,
+    "mmlu_high_school_mathematics": 0,
+    "mmlu_high_school_microeconomics": 0,
+    "mmlu_high_school_physics": 0,
+    "mmlu_high_school_psychology": 0,
+    "mmlu_high_school_statistics": 0,
+    "mmlu_high_school_us_history": 0,
+    "mmlu_high_school_world_history": 0,
+    "mmlu_human_aging": 0,
+    "mmlu_human_sexuality": 0,
+    "mmlu_international_law": 0,
+    "mmlu_jurisprudence": 0,
+    "mmlu_logical_fallacies": 0,
+    "mmlu_machine_learning": 0,
+    "mmlu_management": 0,
+    "mmlu_marketing": 0,
+    "mmlu_medical_genetics": 0,
+    "mmlu_miscellaneous": 0,
+    "mmlu_moral_disputes": 0,
+    "mmlu_moral_scenarios": 0,
+    "mmlu_nutrition": 0,
+    "mmlu_philosophy": 0,
+    "mmlu_prehistory": 0,
+    "mmlu_professional_accounting": 0,
+    "mmlu_professional_law": 0,
+    "mmlu_professional_medicine": 0,
+    "mmlu_professional_psychology": 0,
+    "mmlu_public_relations": 0,
+    "mmlu_security_studies": 0,
+    "mmlu_sociology": 0,
+    "mmlu_us_foreign_policy": 0,
+    "mmlu_virology": 0,
+    "mmlu_world_religions": 0,
+    "winogrande": 0
+  },
+  "higher_is_better": {
+    "arc_challenge": {
+      "acc": true,
+      "acc_norm": true
+    },
+    "arc_easy": {
+      "acc": true,
+      "acc_norm": true
+    },
+    "hellaswag": {
+      "acc": true,
+      "acc_norm": true
+    },
+    "kmmlu": {
+      "acc": true
+    },
+    "kmmlu_accounting": {
+      "acc": true
+    },
+    "kmmlu_agricultural_sciences": {
+      "acc": true
+    },
+    "kmmlu_applied_science": {
+      "acc": true
+    },
+    "kmmlu_aviation_engineering_and_maintenance": {
+      "acc": true
+    },
+    "kmmlu_biology": {
+      "acc": true
+    },
+    "kmmlu_chemical_engineering": {
+      "acc": true
+    },
+    "kmmlu_chemistry": {
+      "acc": true
+    },
+    "kmmlu_civil_engineering": {
+      "acc": true
+    },
+    "kmmlu_computer_science": {
+      "acc": true
+    },
+    "kmmlu_construction": {
+      "acc": true
+    },
+    "kmmlu_criminal_law": {
+      "acc": true
+    },
+    "kmmlu_ecology": {
+      "acc": true
+    },
+    "kmmlu_economics": {
+      "acc": true
+    },
+    "kmmlu_education": {
+      "acc": true
+    },
+    "kmmlu_electrical_engineering": {
+      "acc": true
+    },
+    "kmmlu_electronics_engineering": {
+      "acc": true
+    },
+    "kmmlu_energy_management": {
+      "acc": true
+    },
+    "kmmlu_environmental_science": {
+      "acc": true
+    },
+    "kmmlu_fashion": {
+      "acc": true
+    },
+    "kmmlu_food_processing": {
+      "acc": true
+    },
+    "kmmlu_gas_technology_and_engineering": {
+      "acc": true
+    },
+    "kmmlu_geomatics": {
+      "acc": true
+    },
+    "kmmlu_health": {
+      "acc": true
+    },
+    "kmmlu_humss": {
+      "acc": true
+    },
+    "kmmlu_industrial_engineer": {
+      "acc": true
+    },
+    "kmmlu_information_technology": {
+      "acc": true
+    },
+    "kmmlu_interior_architecture_and_design": {
+      "acc": true
+    },
+    "kmmlu_korean_history": {
+      "acc": true
+    },
+    "kmmlu_law": {
+      "acc": true
+    },
+    "kmmlu_machine_design_and_manufacturing": {
+      "acc": true
+    },
+    "kmmlu_management": {
+      "acc": true
+    },
+    "kmmlu_maritime_engineering": {
+      "acc": true
+    },
+    "kmmlu_marketing": {
+      "acc": true
+    },
+    "kmmlu_materials_engineering": {
+      "acc": true
+    },
+    "kmmlu_math": {
+      "acc": true
+    },
+    "kmmlu_mechanical_engineering": {
+      "acc": true
+    },
+    "kmmlu_nondestructive_testing": {
+      "acc": true
+    },
+    "kmmlu_other": {
+      "acc": true
+    },
+    "kmmlu_patent": {
+      "acc": true
+    },
+    "kmmlu_political_science_and_sociology": {
+      "acc": true
+    },
+    "kmmlu_psychology": {
+      "acc": true
+    },
+    "kmmlu_public_safety": {
+      "acc": true
+    },
+    "kmmlu_railway_and_automotive_engineering": {
+      "acc": true
+    },
+    "kmmlu_real_estate": {
+      "acc": true
+    },
+    "kmmlu_refrigerating_machinery": {
+      "acc": true
+    },
+    "kmmlu_social_welfare": {
+      "acc": true
+    },
+    "kmmlu_stem": {
+      "acc": true
+    },
+    "kmmlu_taxation": {
+      "acc": true
+    },
+    "kmmlu_telecommunications_and_wireless_technology": {
+      "acc": true
+    },
+    "kobest_boolq": {
+      "acc": true,
+      "f1": true
+    },
+    "kobest_copa": {
+      "acc": true,
+      "f1": true
+    },
+    "kobest_hellaswag": {
+      "acc": true,
+      "acc_norm": true,
+      "f1": true
+    },
+    "mmlu": {
+      "acc": true
+    },
+    "mmlu_abstract_algebra": {
+      "acc": true
+    },
+    "mmlu_anatomy": {
+      "acc": true
+    },
+    "mmlu_astronomy": {
+      "acc": true
+    },
+    "mmlu_business_ethics": {
+      "acc": true
+    },
+    "mmlu_clinical_knowledge": {
+      "acc": true
+    },
+    "mmlu_college_biology": {
+      "acc": true
+    },
+    "mmlu_college_chemistry": {
+      "acc": true
+    },
+    "mmlu_college_computer_science": {
+      "acc": true
+    },
+    "mmlu_college_mathematics": {
+      "acc": true
+    },
+    "mmlu_college_medicine": {
+      "acc": true
+    },
+    "mmlu_college_physics": {
+      "acc": true
+    },
+    "mmlu_computer_security": {
+      "acc": true
+    },
+    "mmlu_conceptual_physics": {
+      "acc": true
+    },
+    "mmlu_econometrics": {
+      "acc": true
+    },
+    "mmlu_electrical_engineering": {
+      "acc": true
+    },
+    "mmlu_elementary_mathematics": {
+      "acc": true
+    },
+    "mmlu_formal_logic": {
+      "acc": true
+    },
+    "mmlu_global_facts": {
+      "acc": true
+    },
+    "mmlu_high_school_biology": {
+      "acc": true
+    },
+    "mmlu_high_school_chemistry": {
+      "acc": true
+    },
+    "mmlu_high_school_computer_science": {
+      "acc": true
+    },
+    "mmlu_high_school_european_history": {
+      "acc": true
+    },
+    "mmlu_high_school_geography": {
+      "acc": true
+    },
+    "mmlu_high_school_government_and_politics": {
+      "acc": true
+    },
+    "mmlu_high_school_macroeconomics": {
+      "acc": true
+    },
+    "mmlu_high_school_mathematics": {
+      "acc": true
+    },
+    "mmlu_high_school_microeconomics": {
+      "acc": true
+    },
+    "mmlu_high_school_physics": {
+      "acc": true
+    },
+    "mmlu_high_school_psychology": {
+      "acc": true
+    },
+    "mmlu_high_school_statistics": {
+      "acc": true
+    },
+    "mmlu_high_school_us_history": {
+      "acc": true
+    },
+    "mmlu_high_school_world_history": {
+      "acc": true
+    },
+    "mmlu_human_aging": {
+      "acc": true
+    },
+    "mmlu_human_sexuality": {
+      "acc": true
+    },
+    "mmlu_humanities": {
+      "acc": true
+    },
+    "mmlu_international_law": {
+      "acc": true
+    },
+    "mmlu_jurisprudence": {
+      "acc": true
+    },
+    "mmlu_logical_fallacies": {
+      "acc": true
+    },
+    "mmlu_machine_learning": {
+      "acc": true
+    },
+    "mmlu_management": {
+      "acc": true
+    },
+    "mmlu_marketing": {
+      "acc": true
+    },
+    "mmlu_medical_genetics": {
+      "acc": true
+    },
+    "mmlu_miscellaneous": {
+      "acc": true
+    },
+    "mmlu_moral_disputes": {
+      "acc": true
+    },
+    "mmlu_moral_scenarios": {
+      "acc": true
+    },
+    "mmlu_nutrition": {
+      "acc": true
+    },
+    "mmlu_other": {
+      "acc": true
+    },
+    "mmlu_philosophy": {
+      "acc": true
+    },
+    "mmlu_prehistory": {
+      "acc": true
+    },
+    "mmlu_professional_accounting": {
+      "acc": true
+    },
+    "mmlu_professional_law": {
+      "acc": true
+    },
+    "mmlu_professional_medicine": {
+      "acc": true
+    },
+    "mmlu_professional_psychology": {
+      "acc": true
+    },
+    "mmlu_public_relations": {
+      "acc": true
+    },
+    "mmlu_security_studies": {
+      "acc": true
+    },
+    "mmlu_social_sciences": {
+      "acc": true
+    },
+    "mmlu_sociology": {
+      "acc": true
+    },
+    "mmlu_stem": {
+      "acc": true
+    },
+    "mmlu_us_foreign_policy": {
+      "acc": true
+    },
+    "mmlu_virology": {
+      "acc": true
+    },
+    "mmlu_world_religions": {
+      "acc": true
+    },
+    "winogrande": {
+      "acc": true
+    }
+  },
+  "n-samples": {
+    "kobest_hellaswag": {
+      "original": 500,
+      "effective": 400
+    },
+    "kobest_copa": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kobest_boolq": {
+      "original": 1404,
+      "effective": 400
+    },
+    "kmmlu_biology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_chemical_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_chemistry": {
+      "original": 600,
+      "effective": 400
+    },
+    "kmmlu_civil_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_computer_science": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_ecology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_electrical_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_information_technology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_materials_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_math": {
+      "original": 300,
+      "effective": 300
+    },
+    "kmmlu_mechanical_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_agricultural_sciences": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_construction": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_fashion": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_food_processing": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_health": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_interior_architecture_and_design": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_marketing": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_patent": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_public_safety": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_real_estate": {
+      "original": 200,
+      "effective": 200
+    },
+    "kmmlu_refrigerating_machinery": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_aviation_engineering_and_maintenance": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_electronics_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_energy_management": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_environmental_science": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_gas_technology_and_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_geomatics": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_industrial_engineer": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_machine_design_and_manufacturing": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_maritime_engineering": {
+      "original": 600,
+      "effective": 400
+    },
+    "kmmlu_nondestructive_testing": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_railway_and_automotive_engineering": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_telecommunications_and_wireless_technology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_accounting": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_criminal_law": {
+      "original": 200,
+      "effective": 200
+    },
+    "kmmlu_economics": {
+      "original": 130,
+      "effective": 130
+    },
+    "kmmlu_education": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_korean_history": {
+      "original": 100,
+      "effective": 100
+    },
+    "kmmlu_law": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_management": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_political_science_and_sociology": {
+      "original": 300,
+      "effective": 300
+    },
+    "kmmlu_psychology": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_social_welfare": {
+      "original": 1000,
+      "effective": 400
+    },
+    "kmmlu_taxation": {
+      "original": 200,
+      "effective": 200
+    },
+    "winogrande": {
+      "original": 1267,
+      "effective": 400
+    },
+    "arc_challenge": {
+      "original": 1172,
+      "effective": 400
+    },
+    "arc_easy": {
+      "original": 2376,
+      "effective": 400
+    },
+    "hellaswag": {
+      "original": 10042,
+      "effective": 400
+    },
+    "mmlu_abstract_algebra": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_anatomy": {
+      "original": 135,
+      "effective": 135
+    },
+    "mmlu_astronomy": {
+      "original": 152,
+      "effective": 152
+    },
+    "mmlu_college_biology": {
+      "original": 144,
+      "effective": 144
+    },
+    "mmlu_college_chemistry": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_college_computer_science": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_college_mathematics": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_college_physics": {
+      "original": 102,
+      "effective": 102
+    },
+    "mmlu_computer_security": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_conceptual_physics": {
+      "original": 235,
+      "effective": 235
+    },
+    "mmlu_electrical_engineering": {
+      "original": 145,
+      "effective": 145
+    },
+    "mmlu_elementary_mathematics": {
+      "original": 378,
+      "effective": 378
+    },
+    "mmlu_high_school_biology": {
+      "original": 310,
+      "effective": 310
+    },
+    "mmlu_high_school_chemistry": {
+      "original": 203,
+      "effective": 203
+    },
+    "mmlu_high_school_computer_science": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_high_school_mathematics": {
+      "original": 270,
+      "effective": 270
+    },
+    "mmlu_high_school_physics": {
+      "original": 151,
+      "effective": 151
+    },
+    "mmlu_high_school_statistics": {
+      "original": 216,
+      "effective": 216
+    },
+    "mmlu_machine_learning": {
+      "original": 112,
+      "effective": 112
+    },
+    "mmlu_business_ethics": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_clinical_knowledge": {
+      "original": 265,
+      "effective": 265
+    },
+    "mmlu_college_medicine": {
+      "original": 173,
+      "effective": 173
+    },
+    "mmlu_global_facts": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_human_aging": {
+      "original": 223,
+      "effective": 223
+    },
+    "mmlu_management": {
+      "original": 103,
+      "effective": 103
+    },
+    "mmlu_marketing": {
+      "original": 234,
+      "effective": 234
+    },
+    "mmlu_medical_genetics": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_miscellaneous": {
+      "original": 783,
+      "effective": 400
+    },
+    "mmlu_nutrition": {
+      "original": 306,
+      "effective": 306
+    },
+    "mmlu_professional_accounting": {
+      "original": 282,
+      "effective": 282
+    },
+    "mmlu_professional_medicine": {
+      "original": 272,
+      "effective": 272
+    },
+    "mmlu_virology": {
+      "original": 166,
+      "effective": 166
+    },
+    "mmlu_econometrics": {
+      "original": 114,
+      "effective": 114
+    },
+    "mmlu_high_school_geography": {
+      "original": 198,
+      "effective": 198
+    },
+    "mmlu_high_school_government_and_politics": {
+      "original": 193,
+      "effective": 193
+    },
+    "mmlu_high_school_macroeconomics": {
+      "original": 390,
+      "effective": 390
+    },
+    "mmlu_high_school_microeconomics": {
+      "original": 238,
+      "effective": 238
+    },
+    "mmlu_high_school_psychology": {
+      "original": 545,
+      "effective": 400
+    },
+    "mmlu_human_sexuality": {
+      "original": 131,
+      "effective": 131
+    },
+    "mmlu_professional_psychology": {
+      "original": 612,
+      "effective": 400
+    },
+    "mmlu_public_relations": {
+      "original": 110,
+      "effective": 110
+    },
+    "mmlu_security_studies": {
+      "original": 245,
+      "effective": 245
+    },
+    "mmlu_sociology": {
+      "original": 201,
+      "effective": 201
+    },
+    "mmlu_us_foreign_policy": {
+      "original": 100,
+      "effective": 100
+    },
+    "mmlu_formal_logic": {
+      "original": 126,
+      "effective": 126
+    },
+    "mmlu_high_school_european_history": {
+      "original": 165,
+      "effective": 165
+    },
+    "mmlu_high_school_us_history": {
+      "original": 204,
+      "effective": 204
+    },
+    "mmlu_high_school_world_history": {
+      "original": 237,
+      "effective": 237
+    },
+    "mmlu_international_law": {
+      "original": 121,
+      "effective": 121
+    },
+    "mmlu_jurisprudence": {
+      "original": 108,
+      "effective": 108
+    },
+    "mmlu_logical_fallacies": {
+      "original": 163,
+      "effective": 163
+    },
+    "mmlu_moral_disputes": {
+      "original": 346,
+      "effective": 346
+    },
+    "mmlu_moral_scenarios": {
+      "original": 895,
+      "effective": 400
+    },
+    "mmlu_philosophy": {
+      "original": 311,
+      "effective": 311
+    },
+    "mmlu_prehistory": {
+      "original": 324,
+      "effective": 324
+    },
+    "mmlu_professional_law": {
+      "original": 1534,
+      "effective": 400
+    },
+    "mmlu_world_religions": {
+      "original": 171,
+      "effective": 171
+    }
+  },
+  "config": {
+    "model": "hf",
+    "model_args": {
+      "pretrained": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+      "trust_remote_code": true
+    },
+    "model_num_parameters": 4022468096,
+    "model_dtype": "torch.bfloat16",
+    "model_revision": "main",
+    "model_sha": "",
+    "batch_size": "12",
+    "batch_sizes": [],
+    "device": "cuda:0",
+    "use_cache": null,
+    "limit": 400.0,
+    "bootstrap_iters": 100000,
+    "gen_kwargs": {},
+    "random_seed": 0,
+    "numpy_seed": 1234,
+    "torch_seed": 1234,
+    "fewshot_seed": 1234
+  },
+  "git_hash": "0ce43af",
+  "date": 1775962096.959724,
+  "pretty_env_info": "PyTorch version: 2.9.0+cu128\nIs debug build: False\nCUDA used to build PyTorch: 12.8\nROCM used to build PyTorch: N/A\n\nOS: Ubuntu 22.04.5 LTS (x86_64)\nGCC version: (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0\nClang version: Could not collect\nCMake version: version 4.1.0\nLibc version: glibc-2.35\n\nPython version: 3.11.14 | packaged by conda-forge | (main, Oct 13 2025, 14:09:32) [GCC 14.3.0] (64-bit runtime)\nPython platform: Linux-6.8.0-90-generic-x86_64-with-glibc2.35\nIs CUDA available: True\nCUDA runtime version: 12.8.93\nCUDA_MODULE_LOADING set to: \nGPU models and configuration: GPU 0: NVIDIA RTX PRO 6000 Blackwell Workstation Edition\nNvidia driver version: 590.48.01\ncuDNN version: Probably one of the following:\n/usr/lib/x86_64-linux-gnu/libcudnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_runtime_compiled.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_graph.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_heuristic.so.9.8.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops.so.9.8.0\nIs XPU available: False\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture:                         x86_64\nCPU op-mode(s):                       32-bit, 64-bit\nAddress sizes:                        43 bits physical, 48 bits virtual\nByte Order:                           Little Endian\nCPU(s):                               192\nOn-line CPU(s) list:                  0-191\nVendor ID:                            AuthenticAMD\nModel name:                           AMD EPYC 7642 48-Core Processor\nCPU family:                           23\nModel:                                49\nThread(s) per core:                   2\nCore(s) per socket:                   48\nSocket(s):                            2\nStepping:                             0\nFrequency boost:                      enabled\nCPU max MHz:                          2300.0000\nCPU min MHz:                          1500.0000\nBogoMIPS:                             4600.15\nFlags:                                fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd amd_ppin arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sev sev_es ibpb_exit_to_user\nVirtualization:                       AMD-V\nL1d cache:                            3 MiB (96 instances)\nL1i cache:                            3 MiB (96 instances)\nL2 cache:                             48 MiB (96 instances)\nL3 cache:                             512 MiB (32 instances)\nNUMA node(s):                         2\nNUMA node0 CPU(s):                    0-47,96-143\nNUMA node1 CPU(s):                    48-95,144-191\nVulnerability Gather data sampling:   Not affected\nVulnerability Itlb multihit:          Not affected\nVulnerability L1tf:                   Not affected\nVulnerability Mds:                    Not affected\nVulnerability Meltdown:               Not affected\nVulnerability Mmio stale data:        Not affected\nVulnerability Reg file data sampling: Not affected\nVulnerability Retbleed:               Mitigation; untrained return thunk; SMT enabled with STIBP protection\nVulnerability Spec rstack overflow:   Mitigation; Safe RET\nVulnerability Spec store bypass:      Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1:             Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2:             Mitigation; Retpolines; IBPB conditional; STIBP always-on; RSB filling; PBRSB-eIBRS Not affected; BHI Not affected\nVulnerability Srbds:                  Not affected\nVulnerability Tsx async abort:        Not affected\nVulnerability Vmscape:                Mitigation; IBPB before exit to userspace\n\nVersions of relevant libraries:\n[pip3] executorch==1.0.1\n[pip3] numpy==2.2.6\n[pip3] nvidia-cublas-cu12==12.8.4.1\n[pip3] nvidia-cuda-cupti-cu12==12.8.90\n[pip3] nvidia-cuda-nvrtc-cu12==12.8.93\n[pip3] nvidia-cuda-runtime-cu12==12.8.90\n[pip3] nvidia-cudnn-cu12==9.10.2.21\n[pip3] nvidia-cudnn-frontend==1.17.0\n[pip3] nvidia-cufft-cu12==11.3.3.83\n[pip3] nvidia-curand-cu12==10.3.9.90\n[pip3] nvidia-cusolver-cu12==11.7.3.90\n[pip3] nvidia-cusparse-cu12==12.5.8.93\n[pip3] nvidia-cusparselt-cu12==0.7.1\n[pip3] nvidia-nccl-cu12==2.27.5\n[pip3] nvidia-nvjitlink-cu12==12.8.93\n[pip3] nvidia-nvtx-cu12==12.8.90\n[pip3] optree==0.17.0\n[pip3] pytorch_tokenizers==1.0.1\n[pip3] torch==2.9.0+cu128\n[pip3] torch_c_dlpack_ext==0.1.4\n[pip3] torch-stoi==0.2.3\n[pip3] torchao==0.14.0\n[pip3] torchaudio==2.9.0+cu128\n[pip3] torchcodec==0.9.1\n[pip3] torchelastic==0.2.2\n[pip3] torchvision==0.24.0+cu128\n[pip3] triton==3.5.0\n[pip3] triton_kernels==1.0.0\n[conda] No relevant packages",
+  "transformers_version": "5.5.3",
+  "lm_eval_version": "0.4.11",
+  "upper_git_hash": null,
+  "tokenizer_pad_token": [
+    "<|PAD_TOKEN|>",
+    "151669"
+  ],
+  "tokenizer_eos_token": [
+    "<|endoftext|>",
+    "151643"
+  ],
+  "tokenizer_bos_token": [
+    null,
+    "None"
+  ],
+  "eot_token_id": 151643,
+  "max_length": 32768,
+  "task_hashes": {},
+  "model_source": "hf",
+  "model_name": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints",
+  "model_name_sanitized": "__home__unsloth__scp_stage1_cpt__artifacts__cpt_full_96gb_qwen3_4b__checkpoints",
+  "system_instruction": null,
+  "system_instruction_sha": null,
+  "fewshot_as_multiturn": null,
+  "chat_template": null,
+  "chat_template_sha": null,
+  "total_evaluation_time_seconds": "580.1511918641627"
+}
\ No newline at end of file
diff --git a/eval/lm_eval/checkpoints/cpt/stdout.txt b/eval/lm_eval/checkpoints/cpt/stdout.txt
new file mode 100644
index 0000000..b6d158b
--- /dev/null
+++ b/eval/lm_eval/checkpoints/cpt/stdout.txt
@@ -0,0 +1,2748 @@
+2026-04-12:02:48:13 WARNING  [config.evaluate_config:281] --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.
+2026-04-12:02:48:16 INFO     [_cli.run:376] Selected Tasks: ['mmlu', 'hellaswag', 'arc_easy', 'arc_challenge', 'winogrande', 'kmmlu', 'kobest_boolq', 'kobest_copa', 'kobest_hellaswag']
+🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
+Unsloth: Your Flash Attention 2 installation seems to be broken. Using Xformers instead. No performance changes will be seen.
+🦥 Unsloth Zoo will now patch everything to make training faster!
+
+Loading weights:   0%|          | 0/398 [00:00<?, ?it/s]
+Loading weights:   4%|▎         | 14/398 [00:00<00:02, 134.29it/s]
+Loading weights:  12%|█▏        | 47/398 [00:00<00:01, 240.09it/s]
+Loading weights:  20%|██        | 80/398 [00:00<00:01, 273.76it/s]
+Loading weights:  28%|██▊       | 113/398 [00:00<00:00, 289.26it/s]
+Loading weights:  37%|███▋      | 146/398 [00:00<00:00, 296.97it/s]
+Loading weights:  45%|████▍     | 179/398 [00:00<00:00, 301.80it/s]
+Loading weights:  53%|█████▎    | 212/398 [00:00<00:00, 306.67it/s]
+Loading weights:  62%|██████▏   | 245/398 [00:00<00:00, 302.23it/s]
+Loading weights:  70%|███████   | 279/398 [00:00<00:00, 310.07it/s]
+Loading weights:  78%|███████▊  | 312/398 [00:01<00:00, 312.75it/s]
+Loading weights:  87%|████████▋ | 345/398 [00:01<00:00, 313.64it/s]
+Loading weights:  95%|█████████▍| 378/398 [00:01<00:00, 313.85it/s]
+Loading weights: 100%|██████████| 398/398 [00:01<00:00, 301.67it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 51%|█████▏    | 205/400 [00:00<00:00, 2049.62it/s]
+100%|██████████| 400/400 [00:00<00:00, 2029.39it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+100%|██████████| 400/400 [00:00<00:00, 97553.30it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 43%|████▎     | 171/400 [00:00<00:00, 1702.54it/s]
+ 86%|████████▌ | 343/400 [00:00<00:00, 1707.16it/s]
+100%|██████████| 400/400 [00:00<00:00, 1707.95it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 105/400 [00:00<00:00, 1047.09it/s]
+ 53%|█████▎    | 211/400 [00:00<00:00, 1051.82it/s]
+ 79%|███████▉  | 317/400 [00:00<00:00, 1053.87it/s]
+100%|██████████| 400/400 [00:00<00:00, 1052.02it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 105/400 [00:00<00:00, 1043.69it/s]
+ 53%|█████▎    | 211/400 [00:00<00:00, 1048.16it/s]
+ 79%|███████▉  | 317/400 [00:00<00:00, 1051.00it/s]
+100%|██████████| 400/400 [00:00<00:00, 1051.81it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1052.11it/s]
+ 53%|█████▎    | 212/400 [00:00<00:00, 1051.95it/s]
+ 80%|███████▉  | 318/400 [00:01<00:00, 214.69it/s] 
+100%|██████████| 400/400 [00:01<00:00, 322.21it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1058.55it/s]
+ 53%|█████▎    | 212/400 [00:00<00:00, 1058.90it/s]
+ 80%|███████▉  | 318/400 [00:00<00:00, 1012.59it/s]
+100%|██████████| 400/400 [00:00<00:00, 1032.82it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1064.31it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1064.77it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1064.50it/s]
+100%|██████████| 400/400 [00:00<00:00, 1063.60it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1052.31it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1063.42it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1068.05it/s]
+100%|██████████| 400/400 [00:00<00:00, 1067.29it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1065.74it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1045.42it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1059.74it/s]
+100%|██████████| 400/400 [00:00<00:00, 1060.43it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1068.37it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1070.74it/s]
+ 81%|████████  | 323/400 [00:00<00:00, 1068.18it/s]
+100%|██████████| 400/400 [00:00<00:00, 1067.52it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1057.35it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1063.09it/s]
+ 80%|████████  | 320/400 [00:00<00:00, 1066.13it/s]
+100%|██████████| 400/400 [00:00<00:00, 1065.04it/s]
+
+  0%|          | 0/300 [00:00<?, ?it/s]
+ 36%|███▌      | 107/300 [00:00<00:00, 1065.63it/s]
+ 71%|███████▏  | 214/300 [00:00<00:00, 1005.51it/s]
+100%|██████████| 300/300 [00:00<00:00, 1026.32it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1062.96it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1049.62it/s]
+ 80%|████████  | 320/400 [00:00<00:00, 1053.52it/s]
+100%|██████████| 400/400 [00:00<00:00, 1055.92it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1048.80it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1058.30it/s]
+ 80%|████████  | 320/400 [00:00<00:00, 1059.40it/s]
+100%|██████████| 400/400 [00:00<00:00, 1061.23it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1053.06it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1059.99it/s]
+ 80%|███████▉  | 319/400 [00:00<00:00, 1058.01it/s]
+100%|██████████| 400/400 [00:00<00:00, 1054.35it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1068.30it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1070.39it/s]
+ 81%|████████  | 323/400 [00:00<00:00, 1069.84it/s]
+100%|██████████| 400/400 [00:00<00:00, 1069.79it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1061.68it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1063.09it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1062.61it/s]
+100%|██████████| 400/400 [00:00<00:00, 1064.74it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1069.35it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1068.83it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1065.58it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1069.19it/s]
+100%|██████████| 400/400 [00:00<00:00, 1068.98it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1062.71it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1068.60it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1067.03it/s]
+100%|██████████| 400/400 [00:00<00:00, 1065.98it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1064.34it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1060.32it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1067.18it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1060.56it/s]
+100%|██████████| 400/400 [00:00<00:00, 1063.47it/s]
+
+  0%|          | 0/200 [00:00<?, ?it/s]
+ 54%|█████▎    | 107/200 [00:00<00:00, 1067.89it/s]
+100%|██████████| 200/200 [00:00<00:00, 1070.13it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1069.92it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1068.20it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1064.17it/s]
+100%|██████████| 400/400 [00:00<00:00, 1064.78it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1052.28it/s]
+ 53%|█████▎    | 212/400 [00:00<00:00, 1056.67it/s]
+ 80%|███████▉  | 318/400 [00:00<00:00, 1050.93it/s]
+100%|██████████| 400/400 [00:00<00:00, 1054.93it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1065.89it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1064.78it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1068.30it/s]
+100%|██████████| 400/400 [00:00<00:00, 1066.34it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1064.34it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1066.32it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1069.31it/s]
+100%|██████████| 400/400 [00:00<00:00, 1067.72it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1056.17it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1060.97it/s]
+ 80%|████████  | 320/400 [00:00<00:00, 1064.77it/s]
+100%|██████████| 400/400 [00:00<00:00, 1063.56it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1062.52it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1067.65it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1063.79it/s]
+100%|██████████| 400/400 [00:00<00:00, 1064.71it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1063.57it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1063.89it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1063.64it/s]
+100%|██████████| 400/400 [00:00<00:00, 1063.45it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1057.93it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1059.39it/s]
+ 80%|███████▉  | 319/400 [00:00<00:00, 1058.78it/s]
+100%|██████████| 400/400 [00:00<00:00, 1060.75it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1065.24it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1068.20it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1067.68it/s]
+100%|██████████| 400/400 [00:00<00:00, 1068.18it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1052.53it/s]
+ 53%|█████▎    | 212/400 [00:00<00:00, 1053.79it/s]
+ 80%|███████▉  | 319/400 [00:00<00:00, 1058.98it/s]
+100%|██████████| 400/400 [00:00<00:00, 1055.98it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1060.76it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1063.47it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1063.44it/s]
+100%|██████████| 400/400 [00:00<00:00, 1063.93it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1056.85it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1062.78it/s]
+ 80%|████████  | 320/400 [00:00<00:00, 1063.74it/s]
+100%|██████████| 400/400 [00:00<00:00, 1057.21it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1065.29it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1066.56it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1065.31it/s]
+100%|██████████| 400/400 [00:00<00:00, 1065.13it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1063.28it/s]
+
+  0%|          | 0/200 [00:00<?, ?it/s]
+ 54%|█████▎    | 107/200 [00:00<00:00, 1060.85it/s]
+100%|██████████| 200/200 [00:00<00:00, 1063.75it/s]
+
+  0%|          | 0/130 [00:00<?, ?it/s]
+ 82%|████████▏ | 107/130 [00:00<00:00, 1060.36it/s]
+100%|██████████| 130/130 [00:00<00:00, 1060.92it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1061.31it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+100%|██████████| 100/100 [00:00<00:00, 1048.03it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 26%|██▋       | 106/400 [00:00<00:00, 1059.85it/s]
+ 53%|█████▎    | 213/400 [00:00<00:00, 1065.18it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1067.50it/s]
+100%|██████████| 400/400 [00:00<00:00, 1066.98it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1066.59it/s]
+ 54%|█████▍    | 215/400 [00:00<00:00, 1070.71it/s]
+ 81%|████████  | 323/400 [00:00<00:00, 1069.58it/s]
+100%|██████████| 400/400 [00:00<00:00, 1068.21it/s]
+
+  0%|          | 0/300 [00:00<?, ?it/s]
+ 36%|███▌      | 107/300 [00:00<00:00, 1066.76it/s]
+ 71%|███████▏  | 214/300 [00:00<00:00, 1033.32it/s]
+100%|██████████| 300/300 [00:00<00:00, 1039.89it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1064.72it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1064.87it/s]
+ 80%|████████  | 322/400 [00:00<00:00, 1068.78it/s]
+100%|██████████| 400/400 [00:00<00:00, 1066.09it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 27%|██▋       | 107/400 [00:00<00:00, 1066.90it/s]
+ 54%|█████▎    | 214/400 [00:00<00:00, 1068.68it/s]
+ 80%|████████  | 321/400 [00:00<00:00, 1066.80it/s]
+100%|██████████| 400/400 [00:00<00:00, 1064.83it/s]
+
+  0%|          | 0/200 [00:00<?, ?it/s]
+ 54%|█████▎    | 107/200 [00:00<00:00, 1061.95it/s]
+100%|██████████| 200/200 [00:00<00:00, 1063.70it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+100%|██████████| 400/400 [00:00<00:00, 69250.08it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 20%|█▉        | 78/400 [00:00<00:00, 777.13it/s]
+ 39%|███▉      | 156/400 [00:00<00:00, 777.53it/s]
+ 58%|█████▊    | 234/400 [00:00<00:00, 778.49it/s]
+ 78%|███████▊  | 312/400 [00:00<00:00, 777.14it/s]
+ 98%|█████████▊| 390/400 [00:00<00:00, 777.90it/s]
+100%|██████████| 400/400 [00:00<00:00, 777.69it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 20%|█▉        | 79/400 [00:00<00:00, 780.23it/s]
+ 40%|███▉      | 158/400 [00:00<00:00, 780.80it/s]
+ 59%|█████▉    | 237/400 [00:00<00:00, 780.01it/s]
+ 79%|███████▉  | 316/400 [00:00<00:00, 780.10it/s]
+ 99%|█████████▉| 395/400 [00:00<00:00, 777.35it/s]
+100%|██████████| 400/400 [00:00<00:00, 778.40it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 52%|█████▎    | 210/400 [00:00<00:00, 2097.29it/s]
+100%|██████████| 400/400 [00:00<00:00, 2105.41it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 953.42it/s]
+100%|██████████| 100/100 [00:00<00:00, 952.22it/s]
+
+  0%|          | 0/135 [00:00<?, ?it/s]
+ 71%|███████   | 96/135 [00:00<00:00, 956.74it/s]
+100%|██████████| 135/135 [00:00<00:00, 953.32it/s]
+
+  0%|          | 0/152 [00:00<?, ?it/s]
+ 63%|██████▎   | 96/152 [00:00<00:00, 952.32it/s]
+100%|██████████| 152/152 [00:00<00:00, 952.73it/s]
+
+  0%|          | 0/144 [00:00<?, ?it/s]
+ 47%|████▋     | 68/144 [00:00<00:00, 621.01it/s]
+100%|██████████| 144/144 [00:00<00:00, 755.53it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 95%|█████████▌| 95/100 [00:00<00:00, 943.53it/s]
+100%|██████████| 100/100 [00:00<00:00, 942.75it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 95%|█████████▌| 95/100 [00:00<00:00, 946.25it/s]
+100%|██████████| 100/100 [00:00<00:00, 942.67it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 95%|█████████▌| 95/100 [00:00<00:00, 948.04it/s]
+100%|██████████| 100/100 [00:00<00:00, 946.55it/s]
+
+  0%|          | 0/102 [00:00<?, ?it/s]
+ 94%|█████████▍| 96/102 [00:00<00:00, 954.81it/s]
+100%|██████████| 102/102 [00:00<00:00, 953.04it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 950.40it/s]
+100%|██████████| 100/100 [00:00<00:00, 948.66it/s]
+
+  0%|          | 0/235 [00:00<?, ?it/s]
+ 41%|████      | 96/235 [00:00<00:00, 956.29it/s]
+ 82%|████████▏ | 192/235 [00:00<00:00, 956.90it/s]
+100%|██████████| 235/235 [00:00<00:00, 957.01it/s]
+
+  0%|          | 0/145 [00:00<?, ?it/s]
+ 66%|██████▌   | 96/145 [00:00<00:00, 957.03it/s]
+100%|██████████| 145/145 [00:00<00:00, 957.73it/s]
+
+  0%|          | 0/378 [00:00<?, ?it/s]
+ 25%|██▌       | 96/378 [00:00<00:00, 950.31it/s]
+ 51%|█████     | 192/378 [00:00<00:00, 954.84it/s]
+ 76%|███████▌  | 288/378 [00:00<00:00, 949.99it/s]
+100%|██████████| 378/378 [00:00<00:00, 951.62it/s]
+
+  0%|          | 0/310 [00:00<?, ?it/s]
+ 31%|███       | 96/310 [00:00<00:00, 952.93it/s]
+ 62%|██████▏   | 192/310 [00:00<00:00, 951.81it/s]
+ 93%|█████████▎| 288/310 [00:00<00:00, 952.92it/s]
+100%|██████████| 310/310 [00:00<00:00, 951.61it/s]
+
+  0%|          | 0/203 [00:00<?, ?it/s]
+ 47%|████▋     | 96/203 [00:00<00:00, 952.73it/s]
+ 95%|█████████▍| 192/203 [00:00<00:00, 951.85it/s]
+100%|██████████| 203/203 [00:00<00:00, 951.19it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 954.61it/s]
+100%|██████████| 100/100 [00:00<00:00, 951.47it/s]
+
+  0%|          | 0/270 [00:00<?, ?it/s]
+ 36%|███▌      | 96/270 [00:00<00:00, 951.55it/s]
+ 71%|███████   | 192/270 [00:00<00:00, 952.65it/s]
+100%|██████████| 270/270 [00:00<00:00, 953.66it/s]
+
+  0%|          | 0/151 [00:00<?, ?it/s]
+ 63%|██████▎   | 95/151 [00:00<00:00, 949.17it/s]
+100%|██████████| 151/151 [00:00<00:00, 953.40it/s]
+
+  0%|          | 0/216 [00:00<?, ?it/s]
+ 44%|████▍     | 96/216 [00:00<00:00, 951.23it/s]
+ 89%|████████▉ | 192/216 [00:00<00:00, 952.25it/s]
+100%|██████████| 216/216 [00:00<00:00, 951.84it/s]
+
+  0%|          | 0/112 [00:00<?, ?it/s]
+ 86%|████████▌ | 96/112 [00:00<00:00, 949.82it/s]
+100%|██████████| 112/112 [00:00<00:00, 950.49it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 95%|█████████▌| 95/100 [00:00<00:00, 948.12it/s]
+100%|██████████| 100/100 [00:00<00:00, 947.67it/s]
+
+  0%|          | 0/265 [00:00<?, ?it/s]
+ 35%|███▍      | 92/265 [00:00<00:00, 915.77it/s]
+ 71%|███████   | 187/265 [00:00<00:00, 934.86it/s]
+100%|██████████| 265/265 [00:00<00:00, 937.20it/s]
+
+  0%|          | 0/173 [00:00<?, ?it/s]
+ 55%|█████▍    | 95/173 [00:00<00:00, 945.59it/s]
+100%|██████████| 173/173 [00:00<00:00, 946.78it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 953.27it/s]
+100%|██████████| 100/100 [00:00<00:00, 952.39it/s]
+
+  0%|          | 0/223 [00:00<?, ?it/s]
+ 42%|████▏     | 94/223 [00:00<00:00, 933.52it/s]
+ 84%|████████▍ | 188/223 [00:00<00:00, 935.82it/s]
+100%|██████████| 223/223 [00:00<00:00, 934.92it/s]
+
+  0%|          | 0/103 [00:00<?, ?it/s]
+ 90%|█████████ | 93/103 [00:00<00:00, 922.33it/s]
+100%|██████████| 103/103 [00:00<00:00, 919.96it/s]
+
+  0%|          | 0/234 [00:00<?, ?it/s]
+ 39%|███▉      | 91/234 [00:00<00:00, 904.37it/s]
+ 79%|███████▉  | 185/234 [00:00<00:00, 923.25it/s]
+100%|██████████| 234/234 [00:00<00:00, 922.78it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 92%|█████████▏| 92/100 [00:00<00:00, 918.51it/s]
+100%|██████████| 100/100 [00:00<00:00, 920.04it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▍       | 95/400 [00:00<00:00, 942.74it/s]
+ 48%|████▊     | 190/400 [00:00<00:00, 945.41it/s]
+ 72%|███████▏  | 286/400 [00:00<00:00, 950.22it/s]
+ 96%|█████████▌| 382/400 [00:00<00:00, 948.66it/s]
+100%|██████████| 400/400 [00:00<00:00, 947.80it/s]
+
+  0%|          | 0/306 [00:00<?, ?it/s]
+ 31%|███▏      | 96/306 [00:00<00:00, 954.32it/s]
+ 63%|██████▎   | 192/306 [00:00<00:00, 954.55it/s]
+ 94%|█████████▍| 288/306 [00:00<00:00, 955.30it/s]
+100%|██████████| 306/306 [00:00<00:00, 953.75it/s]
+
+  0%|          | 0/282 [00:00<?, ?it/s]
+ 33%|███▎      | 92/282 [00:00<00:00, 918.84it/s]
+ 66%|██████▋   | 187/282 [00:00<00:00, 935.91it/s]
+100%|██████████| 282/282 [00:00<00:00, 940.44it/s]
+100%|██████████| 282/282 [00:00<00:00, 937.16it/s]
+
+  0%|          | 0/272 [00:00<?, ?it/s]
+ 35%|███▌      | 96/272 [00:00<00:00, 953.40it/s]
+ 71%|███████   | 192/272 [00:00<00:00, 950.46it/s]
+100%|██████████| 272/272 [00:00<00:00, 950.01it/s]
+
+  0%|          | 0/166 [00:00<?, ?it/s]
+ 57%|█████▋    | 95/166 [00:00<00:00, 948.96it/s]
+100%|██████████| 166/166 [00:00<00:00, 953.12it/s]
+
+  0%|          | 0/114 [00:00<?, ?it/s]
+ 83%|████████▎ | 95/114 [00:00<00:00, 944.14it/s]
+100%|██████████| 114/114 [00:00<00:00, 944.51it/s]
+
+  0%|          | 0/198 [00:00<?, ?it/s]
+ 48%|████▊     | 95/198 [00:00<00:00, 947.85it/s]
+ 96%|█████████▋| 191/198 [00:00<00:00, 951.88it/s]
+100%|██████████| 198/198 [00:00<00:00, 950.44it/s]
+
+  0%|          | 0/193 [00:00<?, ?it/s]
+ 49%|████▉     | 95/193 [00:00<00:00, 945.62it/s]
+ 98%|█████████▊| 190/193 [00:00<00:00, 946.03it/s]
+100%|██████████| 193/193 [00:00<00:00, 945.56it/s]
+
+  0%|          | 0/390 [00:00<?, ?it/s]
+ 24%|██▍       | 95/390 [00:00<00:00, 942.40it/s]
+ 49%|████▊     | 190/390 [00:00<00:00, 946.58it/s]
+ 73%|███████▎  | 285/390 [00:00<00:00, 945.01it/s]
+ 97%|█████████▋| 380/390 [00:00<00:00, 946.66it/s]
+100%|██████████| 390/390 [00:00<00:00, 945.72it/s]
+
+  0%|          | 0/238 [00:00<?, ?it/s]
+ 40%|███▉      | 95/238 [00:00<00:00, 949.33it/s]
+ 80%|████████  | 191/238 [00:00<00:00, 952.05it/s]
+100%|██████████| 238/238 [00:00<00:00, 952.52it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▍       | 95/400 [00:00<00:00, 939.49it/s]
+ 48%|████▊     | 191/400 [00:00<00:00, 949.28it/s]
+ 72%|███████▏  | 287/400 [00:00<00:00, 950.48it/s]
+ 96%|█████████▌| 383/400 [00:00<00:00, 951.45it/s]
+100%|██████████| 400/400 [00:00<00:00, 950.13it/s]
+
+  0%|          | 0/131 [00:00<?, ?it/s]
+ 73%|███████▎  | 96/131 [00:00<00:00, 950.72it/s]
+100%|██████████| 131/131 [00:00<00:00, 951.50it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▍       | 96/400 [00:00<00:00, 950.23it/s]
+ 48%|████▊     | 192/400 [00:00<00:00, 953.43it/s]
+ 72%|███████▏  | 288/400 [00:00<00:00, 953.58it/s]
+ 96%|█████████▌| 384/400 [00:00<00:00, 954.94it/s]
+100%|██████████| 400/400 [00:00<00:00, 953.91it/s]
+
+  0%|          | 0/110 [00:00<?, ?it/s]
+ 87%|████████▋ | 96/110 [00:00<00:00, 955.67it/s]
+100%|██████████| 110/110 [00:00<00:00, 954.31it/s]
+
+  0%|          | 0/245 [00:00<?, ?it/s]
+ 39%|███▉      | 96/245 [00:00<00:00, 954.09it/s]
+ 78%|███████▊  | 192/245 [00:00<00:00, 953.79it/s]
+100%|██████████| 245/245 [00:00<00:00, 954.75it/s]
+
+  0%|          | 0/201 [00:00<?, ?it/s]
+ 47%|████▋     | 95/201 [00:00<00:00, 942.55it/s]
+ 95%|█████████▌| 191/201 [00:00<00:00, 948.86it/s]
+100%|██████████| 201/201 [00:00<00:00, 947.35it/s]
+
+  0%|          | 0/100 [00:00<?, ?it/s]
+ 96%|█████████▌| 96/100 [00:00<00:00, 954.72it/s]
+100%|██████████| 100/100 [00:00<00:00, 952.91it/s]
+
+  0%|          | 0/126 [00:00<?, ?it/s]
+ 76%|███████▌  | 96/126 [00:00<00:00, 952.37it/s]
+100%|██████████| 126/126 [00:00<00:00, 953.01it/s]
+
+  0%|          | 0/165 [00:00<?, ?it/s]
+ 58%|█████▊    | 95/165 [00:00<00:00, 944.35it/s]
+100%|██████████| 165/165 [00:00<00:00, 944.24it/s]
+
+  0%|          | 0/204 [00:00<?, ?it/s]
+ 45%|████▌     | 92/204 [00:00<00:00, 916.62it/s]
+ 92%|█████████▏| 188/204 [00:00<00:00, 936.54it/s]
+100%|██████████| 204/204 [00:00<00:00, 934.63it/s]
+
+  0%|          | 0/237 [00:00<?, ?it/s]
+ 41%|████      | 96/237 [00:00<00:00, 951.88it/s]
+ 81%|████████  | 192/237 [00:00<00:00, 954.42it/s]
+100%|██████████| 237/237 [00:00<00:00, 953.83it/s]
+
+  0%|          | 0/121 [00:00<?, ?it/s]
+ 79%|███████▉  | 96/121 [00:00<00:00, 955.15it/s]
+100%|██████████| 121/121 [00:00<00:00, 949.87it/s]
+
+  0%|          | 0/108 [00:00<?, ?it/s]
+ 87%|████████▋ | 94/108 [00:00<00:00, 937.80it/s]
+100%|██████████| 108/108 [00:00<00:00, 929.80it/s]
+
+  0%|          | 0/163 [00:00<?, ?it/s]
+ 58%|█████▊    | 94/163 [00:00<00:00, 936.68it/s]
+100%|██████████| 163/163 [00:00<00:00, 938.73it/s]
+
+  0%|          | 0/346 [00:00<?, ?it/s]
+ 28%|██▊       | 96/346 [00:00<00:00, 952.92it/s]
+ 55%|█████▌    | 192/346 [00:00<00:00, 956.79it/s]
+ 84%|████████▎ | 289/346 [00:00<00:00, 959.27it/s]
+100%|██████████| 346/346 [00:00<00:00, 957.67it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▍       | 96/400 [00:00<00:00, 959.10it/s]
+ 48%|████▊     | 192/400 [00:00<00:00, 954.12it/s]
+ 72%|███████▏  | 288/400 [00:00<00:00, 956.21it/s]
+ 96%|█████████▋| 385/400 [00:00<00:00, 957.75it/s]
+100%|██████████| 400/400 [00:00<00:00, 955.88it/s]
+
+  0%|          | 0/311 [00:00<?, ?it/s]
+ 31%|███       | 96/311 [00:00<00:00, 953.35it/s]
+ 62%|██████▏   | 192/311 [00:00<00:00, 952.69it/s]
+ 93%|█████████▎| 288/311 [00:00<00:00, 953.43it/s]
+100%|██████████| 311/311 [00:00<00:00, 952.49it/s]
+
+  0%|          | 0/324 [00:00<?, ?it/s]
+ 29%|██▉       | 94/324 [00:00<00:00, 935.44it/s]
+ 59%|█████▊    | 190/324 [00:00<00:00, 949.72it/s]
+ 88%|████████▊ | 286/324 [00:00<00:00, 952.30it/s]
+100%|██████████| 324/324 [00:00<00:00, 950.16it/s]
+
+  0%|          | 0/400 [00:00<?, ?it/s]
+ 24%|██▍       | 95/400 [00:00<00:00, 949.70it/s]
+ 48%|████▊     | 190/400 [00:00<00:00, 949.77it/s]
+ 72%|███████▏  | 286/400 [00:00<00:00, 951.79it/s]
+ 96%|█████████▌| 383/400 [00:00<00:00, 955.09it/s]
+100%|██████████| 400/400 [00:00<00:00, 952.81it/s]
+
+  0%|          | 0/171 [00:00<?, ?it/s]
+ 56%|█████▌    | 96/171 [00:00<00:00, 955.82it/s]
+100%|██████████| 171/171 [00:00<00:00, 954.16it/s]
+
+Running loglikelihood requests:   0%|          | 0/117208 [00:00<?, ?it/s]
+Running loglikelihood requests:   0%|          | 1/117208 [00:08<267:45:53,  8.22s/it]
+Running loglikelihood requests:   0%|          | 49/117208 [00:08<4:13:03,  7.72it/s] 
+Running loglikelihood requests:   0%|          | 97/117208 [00:09<1:57:56, 16.55it/s]
+Running loglikelihood requests:   0%|          | 145/117208 [00:09<1:13:24, 26.58it/s]
+Running loglikelihood requests:   0%|          | 193/117208 [00:10<52:10, 37.38it/s]  
+Running loglikelihood requests:   0%|          | 241/117208 [00:10<40:19, 48.35it/s]
+Running loglikelihood requests:   0%|          | 289/117208 [00:11<32:54, 59.21it/s]
+Running loglikelihood requests:   0%|          | 337/117208 [00:11<27:59, 69.60it/s]
+Running loglikelihood requests:   0%|          | 385/117208 [00:12<24:39, 78.97it/s]
+Running loglikelihood requests:   0%|          | 433/117208 [00:12<22:17, 87.32it/s]
+Running loglikelihood requests:   0%|          | 481/117208 [00:12<20:36, 94.44it/s]
+Running loglikelihood requests:   0%|          | 529/117208 [00:13<19:14, 101.05it/s]
+Running loglikelihood requests:   0%|          | 577/117208 [00:13<18:13, 106.65it/s]
+Running loglikelihood requests:   1%|          | 625/117208 [00:14<17:29, 111.09it/s]
+Running loglikelihood requests:   1%|          | 673/117208 [00:14<16:51, 115.18it/s]
+Running loglikelihood requests:   1%|          | 721/117208 [00:14<16:21, 118.67it/s]
+Running loglikelihood requests:   1%|          | 769/117208 [00:15<15:58, 121.47it/s]
+Running loglikelihood requests:   1%|          | 817/117208 [00:15<15:37, 124.17it/s]
+Running loglikelihood requests:   1%|          | 865/117208 [00:15<15:16, 126.95it/s]
+Running loglikelihood requests:   1%|          | 913/117208 [00:16<15:00, 129.20it/s]
+Running loglikelihood requests:   1%|          | 961/117208 [00:16<14:45, 131.32it/s]
+Running loglikelihood requests:   1%|          | 1009/117208 [00:17<14:33, 133.04it/s]
+Running loglikelihood requests:   1%|          | 1057/117208 [00:17<14:24, 134.42it/s]
+Running loglikelihood requests:   1%|          | 1105/117208 [00:17<14:12, 136.21it/s]
+Running loglikelihood requests:   1%|          | 1153/117208 [00:18<14:02, 137.69it/s]
+Running loglikelihood requests:   1%|          | 1201/117208 [00:18<13:43, 140.82it/s]
+Running loglikelihood requests:   1%|          | 1249/117208 [00:18<13:29, 143.28it/s]
+Running loglikelihood requests:   1%|          | 1297/117208 [00:19<13:18, 145.24it/s]
+Running loglikelihood requests:   1%|          | 1345/117208 [00:19<13:06, 147.24it/s]
+Running loglikelihood requests:   1%|          | 1393/117208 [00:19<12:58, 148.83it/s]
+Running loglikelihood requests:   1%|          | 1441/117208 [00:19<12:50, 150.17it/s]
+Running loglikelihood requests:   1%|▏         | 1489/117208 [00:20<12:44, 151.34it/s]
+Running loglikelihood requests:   1%|▏         | 1537/117208 [00:20<12:33, 153.58it/s]
+Running loglikelihood requests:   1%|▏         | 1585/117208 [00:20<12:24, 155.31it/s]
+Running loglikelihood requests:   1%|▏         | 1633/117208 [00:21<12:16, 156.82it/s]
+Running loglikelihood requests:   1%|▏         | 1681/117208 [00:21<12:08, 158.58it/s]
+Running loglikelihood requests:   1%|▏         | 1729/117208 [00:21<12:01, 159.95it/s]
+Running loglikelihood requests:   2%|▏         | 1777/117208 [00:22<11:56, 161.11it/s]
+Running loglikelihood requests:   2%|▏         | 1825/117208 [00:22<11:52, 162.00it/s]
+Running loglikelihood requests:   2%|▏         | 1873/117208 [00:22<11:48, 162.75it/s]
+Running loglikelihood requests:   2%|▏         | 1921/117208 [00:22<11:42, 164.19it/s]
+Running loglikelihood requests:   2%|▏         | 1969/117208 [00:23<11:37, 165.33it/s]
+Running loglikelihood requests:   2%|▏         | 2017/117208 [00:23<11:32, 166.29it/s]
+Running loglikelihood requests:   2%|▏         | 2065/117208 [00:23<11:29, 167.05it/s]
+Running loglikelihood requests:   2%|▏         | 2113/117208 [00:24<11:25, 167.81it/s]
+Running loglikelihood requests:   2%|▏         | 2161/117208 [00:24<11:21, 168.71it/s]
+Running loglikelihood requests:   2%|▏         | 2209/117208 [00:24<11:18, 169.61it/s]
+Running loglikelihood requests:   2%|▏         | 2257/117208 [00:24<11:14, 170.47it/s]
+Running loglikelihood requests:   2%|▏         | 2305/117208 [00:25<11:11, 171.04it/s]
+Running loglikelihood requests:   2%|▏         | 2353/117208 [00:25<11:09, 171.63it/s]
+Running loglikelihood requests:   2%|▏         | 2401/117208 [00:25<11:02, 173.26it/s]
+Running loglikelihood requests:   2%|▏         | 2449/117208 [00:26<10:58, 174.29it/s]
+Running loglikelihood requests:   2%|▏         | 2497/117208 [00:26<10:54, 175.27it/s]
+Running loglikelihood requests:   2%|▏         | 2545/117208 [00:26<10:51, 175.91it/s]
+Running loglikelihood requests:   2%|▏         | 2593/117208 [00:26<10:48, 176.68it/s]
+Running loglikelihood requests:   2%|▏         | 2641/117208 [00:27<10:46, 177.15it/s]
+Running loglikelihood requests:   2%|▏         | 2689/117208 [00:27<10:44, 177.79it/s]
+Running loglikelihood requests:   2%|▏         | 2737/117208 [00:27<10:37, 179.44it/s]
+Running loglikelihood requests:   2%|▏         | 2785/117208 [00:27<10:33, 180.68it/s]
+Running loglikelihood requests:   2%|▏         | 2833/117208 [00:28<10:29, 181.80it/s]
+Running loglikelihood requests:   2%|▏         | 2881/117208 [00:28<10:25, 182.71it/s]
+Running loglikelihood requests:   2%|▏         | 2929/117208 [00:28<10:23, 183.39it/s]
+Running loglikelihood requests:   3%|▎         | 2971/117208 [00:28<10:46, 176.58it/s]
+Running loglikelihood requests:   3%|▎         | 3019/117208 [00:29<10:33, 180.25it/s]
+Running loglikelihood requests:   3%|▎         | 3067/117208 [00:29<10:23, 183.11it/s]
+Running loglikelihood requests:   3%|▎         | 3115/117208 [00:29<10:15, 185.33it/s]
+Running loglikelihood requests:   3%|▎         | 3163/117208 [00:29<10:10, 186.75it/s]
+Running loglikelihood requests:   3%|▎         | 3211/117208 [00:30<10:06, 187.96it/s]
+Running loglikelihood requests:   3%|▎         | 3259/117208 [00:30<10:03, 188.80it/s]
+Running loglikelihood requests:   3%|▎         | 3307/117208 [00:30<10:00, 189.54it/s]
+Running loglikelihood requests:   3%|▎         | 3355/117208 [00:30<09:56, 190.74it/s]
+Running loglikelihood requests:   3%|▎         | 3403/117208 [00:31<09:53, 191.76it/s]
+Running loglikelihood requests:   3%|▎         | 3451/117208 [00:31<09:51, 192.38it/s]
+Running loglikelihood requests:   3%|▎         | 3499/117208 [00:31<09:48, 193.10it/s]
+Running loglikelihood requests:   3%|▎         | 3547/117208 [00:31<09:47, 193.50it/s]
+Running loglikelihood requests:   3%|▎         | 3595/117208 [00:32<09:46, 193.81it/s]
+Running loglikelihood requests:   3%|▎         | 3643/117208 [00:32<09:44, 194.43it/s]
+Running loglikelihood requests:   3%|▎         | 3691/117208 [00:32<09:42, 194.97it/s]
+Running loglikelihood requests:   3%|▎         | 3739/117208 [00:32<09:38, 196.22it/s]
+Running loglikelihood requests:   3%|▎         | 3787/117208 [00:33<09:35, 197.07it/s]
+Running loglikelihood requests:   3%|▎         | 3835/117208 [00:33<09:33, 197.80it/s]
+Running loglikelihood requests:   3%|▎         | 3883/117208 [00:33<09:31, 198.36it/s]
+Running loglikelihood requests:   3%|▎         | 3931/117208 [00:33<09:29, 198.74it/s]
+Running loglikelihood requests:   3%|▎         | 3979/117208 [00:34<09:28, 199.23it/s]
+Running loglikelihood requests:   3%|▎         | 4027/117208 [00:34<09:27, 199.53it/s]
+Running loglikelihood requests:   3%|▎         | 4075/117208 [00:34<09:26, 199.80it/s]
+Running loglikelihood requests:   4%|▎         | 4123/117208 [00:34<09:24, 200.18it/s]
+Running loglikelihood requests:   4%|▎         | 4171/117208 [00:35<09:23, 200.56it/s]
+Running loglikelihood requests:   4%|▎         | 4219/117208 [00:35<09:22, 200.81it/s]
+Running loglikelihood requests:   4%|▎         | 4267/117208 [00:35<09:20, 201.44it/s]
+Running loglikelihood requests:   4%|▎         | 4315/117208 [00:35<09:18, 202.19it/s]
+Running loglikelihood requests:   4%|▎         | 4360/117208 [00:36<09:26, 199.11it/s]
+Running loglikelihood requests:   4%|▍         | 4405/117208 [00:36<09:33, 196.83it/s]
+Running loglikelihood requests:   4%|▍         | 4453/117208 [00:36<09:26, 199.03it/s]
+Running loglikelihood requests:   4%|▍         | 4502/117208 [00:36<09:17, 202.04it/s]
+Running loglikelihood requests:   4%|▍         | 4547/117208 [00:36<09:25, 199.38it/s]
+Running loglikelihood requests:   4%|▍         | 4589/117208 [00:37<09:41, 193.57it/s]
+Running loglikelihood requests:   4%|▍         | 4637/117208 [00:37<09:30, 197.19it/s]
+Running loglikelihood requests:   4%|▍         | 4679/117208 [00:37<09:45, 192.27it/s]
+Running loglikelihood requests:   4%|▍         | 4724/117208 [00:37<09:44, 192.58it/s]
+Running loglikelihood requests:   4%|▍         | 4772/117208 [00:38<09:31, 196.64it/s]
+Running loglikelihood requests:   4%|▍         | 4817/117208 [00:38<09:24, 199.03it/s]
+Running loglikelihood requests:   4%|▍         | 4865/117208 [00:38<09:08, 204.77it/s]
+Running loglikelihood requests:   4%|▍         | 4910/117208 [00:38<09:07, 204.98it/s]
+Running loglikelihood requests:   4%|▍         | 4958/117208 [00:38<08:56, 209.18it/s]
+Running loglikelihood requests:   4%|▍         | 5006/117208 [00:39<08:48, 212.33it/s]
+Running loglikelihood requests:   4%|▍         | 5051/117208 [00:39<08:52, 210.60it/s]
+Running loglikelihood requests:   4%|▍         | 5099/117208 [00:39<08:45, 213.51it/s]
+Running loglikelihood requests:   4%|▍         | 5147/117208 [00:39<08:39, 215.68it/s]
+Running loglikelihood requests:   4%|▍         | 5195/117208 [00:40<08:35, 217.41it/s]
+Running loglikelihood requests:   4%|▍         | 5243/117208 [00:40<08:31, 218.75it/s]
+Running loglikelihood requests:   5%|▍         | 5288/117208 [00:40<08:39, 215.55it/s]
+Running loglikelihood requests:   5%|▍         | 5333/117208 [00:40<08:44, 213.21it/s]
+Running loglikelihood requests:   5%|▍         | 5381/117208 [00:40<08:35, 216.92it/s]
+Running loglikelihood requests:   5%|▍         | 5429/117208 [00:41<08:28, 219.73it/s]
+Running loglikelihood requests:   5%|▍         | 5474/117208 [00:41<08:33, 217.64it/s]
+Running loglikelihood requests:   5%|▍         | 5522/117208 [00:41<08:26, 220.31it/s]
+Running loglikelihood requests:   5%|▍         | 5570/117208 [00:41<08:21, 222.51it/s]
+Running loglikelihood requests:   5%|▍         | 5618/117208 [00:41<08:18, 223.85it/s]
+Running loglikelihood requests:   5%|▍         | 5663/117208 [00:42<08:25, 220.59it/s]
+Running loglikelihood requests:   5%|▍         | 5708/117208 [00:42<08:30, 218.55it/s]
+Running loglikelihood requests:   5%|▍         | 5760/117208 [00:42<08:10, 227.10it/s]
+Running loglikelihood requests:   5%|▍         | 5805/117208 [00:42<08:19, 223.09it/s]
+Running loglikelihood requests:   5%|▍         | 5853/117208 [00:43<08:15, 224.77it/s]
+Running loglikelihood requests:   5%|▌         | 5901/117208 [00:43<08:12, 225.99it/s]
+Running loglikelihood requests:   5%|▌         | 5946/117208 [00:43<08:19, 222.71it/s]
+Running loglikelihood requests:   5%|▌         | 5991/117208 [00:43<08:24, 220.42it/s]
+Running loglikelihood requests:   5%|▌         | 6033/117208 [00:43<08:37, 214.71it/s]
+Running loglikelihood requests:   5%|▌         | 6081/117208 [00:44<08:27, 219.16it/s]
+Running loglikelihood requests:   5%|▌         | 6123/117208 [00:44<08:32, 216.85it/s]
+Running loglikelihood requests:   5%|▌         | 6171/117208 [00:44<08:16, 223.76it/s]
+Running loglikelihood requests:   5%|▌         | 6216/117208 [00:44<08:14, 224.49it/s]
+Running loglikelihood requests:   5%|▌         | 6261/117208 [00:44<08:13, 224.97it/s]
+Running loglikelihood requests:   5%|▌         | 6306/117208 [00:45<08:11, 225.57it/s]
+Running loglikelihood requests:   5%|▌         | 6348/117208 [00:45<08:21, 221.26it/s]
+Running loglikelihood requests:   5%|▌         | 6396/117208 [00:45<08:07, 227.08it/s]
+Running loglikelihood requests:   5%|▌         | 6441/117208 [00:45<08:08, 226.93it/s]
+Running loglikelihood requests:   6%|▌         | 6489/117208 [00:45<07:58, 231.26it/s]
+Running loglikelihood requests:   6%|▌         | 6534/117208 [00:46<08:01, 230.05it/s]
+Running loglikelihood requests:   6%|▌         | 6573/117208 [00:46<08:21, 220.54it/s]
+Running loglikelihood requests:   6%|▌         | 6618/117208 [00:46<08:16, 222.85it/s]
+Running loglikelihood requests:   6%|▌         | 6666/117208 [00:46<08:02, 229.05it/s]
+Running loglikelihood requests:   6%|▌         | 6714/117208 [00:46<07:53, 233.52it/s]
+Running loglikelihood requests:   6%|▌         | 6756/117208 [00:47<08:05, 227.67it/s]
+Running loglikelihood requests:   6%|▌         | 6798/117208 [00:47<08:13, 223.57it/s]
+Running loglikelihood requests:   6%|▌         | 6837/117208 [00:47<08:28, 217.21it/s]
+Running loglikelihood requests:   6%|▌         | 6882/117208 [00:47<08:17, 221.81it/s]
+Running loglikelihood requests:   6%|▌         | 6927/117208 [00:47<08:10, 224.92it/s]
+Running loglikelihood requests:   6%|▌         | 6972/117208 [00:48<08:05, 227.04it/s]
+Running loglikelihood requests:   6%|▌         | 7017/117208 [00:48<08:02, 228.59it/s]
+Running loglikelihood requests:   6%|▌         | 7062/117208 [00:48<07:59, 229.75it/s]
+Running loglikelihood requests:   6%|▌         | 7107/117208 [00:48<07:57, 230.62it/s]
+Running loglikelihood requests:   6%|▌         | 7155/117208 [00:48<07:46, 235.81it/s]
+Running loglikelihood requests:   6%|▌         | 7197/117208 [00:49<07:57, 230.23it/s]
+Running loglikelihood requests:   6%|▌         | 7239/117208 [00:49<08:05, 226.37it/s]
+Running loglikelihood requests:   6%|▌         | 7284/117208 [00:49<08:01, 228.30it/s]
+Running loglikelihood requests:   6%|▋         | 7329/117208 [00:49<07:58, 229.81it/s]
+Running loglikelihood requests:   6%|▋         | 7368/117208 [00:49<08:15, 221.67it/s]
+Running loglikelihood requests:   6%|▋         | 7413/117208 [00:49<08:07, 225.37it/s]
+Running loglikelihood requests:   6%|▋         | 7461/117208 [00:50<07:53, 231.97it/s]
+Running loglikelihood requests:   6%|▋         | 7506/117208 [00:50<07:53, 231.80it/s]
+Running loglikelihood requests:   6%|▋         | 7551/117208 [00:50<07:52, 231.84it/s]
+Running loglikelihood requests:   6%|▋         | 7596/117208 [00:50<07:52, 231.90it/s]
+Running loglikelihood requests:   7%|▋         | 7641/117208 [00:50<07:52, 231.88it/s]
+Running loglikelihood requests:   7%|▋         | 7689/117208 [00:51<07:43, 236.40it/s]
+Running loglikelihood requests:   7%|▋         | 7734/117208 [00:51<07:42, 236.92it/s]
+Running loglikelihood requests:   7%|▋         | 7776/117208 [00:51<07:51, 232.28it/s]
+Running loglikelihood requests:   7%|▋         | 7815/117208 [00:51<08:07, 224.61it/s]
+Running loglikelihood requests:   7%|▋         | 7860/117208 [00:51<07:58, 228.36it/s]
+Running loglikelihood requests:   7%|▋         | 7902/117208 [00:52<08:02, 226.43it/s]
+Running loglikelihood requests:   7%|▋         | 7941/117208 [00:52<08:15, 220.52it/s]
+Running loglikelihood requests:   7%|▋         | 7977/117208 [00:52<08:36, 211.64it/s]
+Running loglikelihood requests:   7%|▋         | 8020/117208 [00:52<08:24, 216.36it/s]
+Running loglikelihood requests:   7%|▋         | 8053/117208 [00:52<08:54, 204.18it/s]
+Running loglikelihood requests:   7%|▋         | 8095/117208 [00:53<08:39, 210.02it/s]
+Running loglikelihood requests:   7%|▋         | 8125/117208 [00:53<09:19, 195.11it/s]
+Running loglikelihood requests:   7%|▋         | 8170/117208 [00:53<08:43, 208.35it/s]
+Running loglikelihood requests:   7%|▋         | 8209/117208 [00:53<08:42, 208.42it/s]
+Running loglikelihood requests:   7%|▋         | 8248/117208 [00:53<08:43, 208.14it/s]
+Running loglikelihood requests:   7%|▋         | 8293/117208 [00:53<08:20, 217.79it/s]
+Running loglikelihood requests:   7%|▋         | 8323/117208 [00:54<09:02, 200.62it/s]
+Running loglikelihood requests:   7%|▋         | 8371/117208 [00:54<08:21, 217.17it/s]
+Running loglikelihood requests:   7%|▋         | 8416/117208 [00:54<08:05, 224.27it/s]
+Running loglikelihood requests:   7%|▋         | 8464/117208 [00:54<07:45, 233.80it/s]
+Running loglikelihood requests:   7%|▋         | 8509/117208 [00:54<07:40, 235.80it/s]
+Running loglikelihood requests:   7%|▋         | 8554/117208 [00:55<07:37, 237.69it/s]
+Running loglikelihood requests:   7%|▋         | 8602/117208 [00:55<07:25, 243.78it/s]
+Running loglikelihood requests:   7%|▋         | 8647/117208 [00:55<07:26, 243.03it/s]
+Running loglikelihood requests:   7%|▋         | 8689/117208 [00:55<07:36, 237.91it/s]
+Running loglikelihood requests:   7%|▋         | 8734/117208 [00:55<07:28, 242.04it/s]
+Running loglikelihood requests:   7%|▋         | 8776/117208 [00:55<07:31, 240.27it/s]
+Running loglikelihood requests:   8%|▊         | 8821/117208 [00:56<07:24, 244.03it/s]
+Running loglikelihood requests:   8%|▊         | 8860/117208 [00:56<07:37, 236.94it/s]
+Running loglikelihood requests:   8%|▊         | 8908/117208 [00:56<07:18, 246.71it/s]
+Running loglikelihood requests:   8%|▊         | 8953/117208 [00:56<07:15, 248.81it/s]
+Running loglikelihood requests:   8%|▊         | 8992/117208 [00:56<07:30, 240.26it/s]
+Running loglikelihood requests:   8%|▊         | 9037/117208 [00:57<07:23, 244.10it/s]
+Running loglikelihood requests:   8%|▊         | 9082/117208 [00:57<07:17, 246.87it/s]
+Running loglikelihood requests:   8%|▊         | 9127/117208 [00:57<07:13, 249.16it/s]
+Running loglikelihood requests:   8%|▊         | 9179/117208 [00:57<06:51, 262.52it/s]
+Running loglikelihood requests:   8%|▊         | 9221/117208 [00:57<07:03, 255.19it/s]
+Running loglikelihood requests:   8%|▊         | 9269/117208 [00:57<06:55, 260.07it/s]
+Running loglikelihood requests:   8%|▊         | 9317/117208 [00:58<06:49, 263.24it/s]
+Running loglikelihood requests:   8%|▊         | 9356/117208 [00:58<07:10, 250.52it/s]
+Running loglikelihood requests:   8%|▊         | 9404/117208 [00:58<06:59, 256.87it/s]
+Running loglikelihood requests:   8%|▊         | 9452/117208 [00:58<06:52, 261.35it/s]
+Running loglikelihood requests:   8%|▊         | 9499/117208 [00:58<06:49, 263.30it/s]
+Running loglikelihood requests:   8%|▊         | 9547/117208 [00:59<06:44, 266.22it/s]
+Running loglikelihood requests:   8%|▊         | 9589/117208 [00:59<06:57, 257.93it/s]
+Running loglikelihood requests:   8%|▊         | 9631/117208 [00:59<07:06, 252.19it/s]
+Running loglikelihood requests:   8%|▊         | 9676/117208 [00:59<07:04, 253.53it/s]
+Running loglikelihood requests:   8%|▊         | 9712/117208 [00:59<07:28, 239.49it/s]
+Running loglikelihood requests:   8%|▊         | 9748/117208 [00:59<07:48, 229.50it/s]
+Running loglikelihood requests:   8%|▊         | 9796/117208 [01:00<07:22, 242.63it/s]
+Running loglikelihood requests:   8%|▊         | 9826/117208 [01:00<08:04, 221.59it/s]
+Running loglikelihood requests:   8%|▊         | 9868/117208 [01:00<07:43, 231.77it/s]
+Running loglikelihood requests:   8%|▊         | 9910/117208 [01:00<07:28, 239.10it/s]
+Running loglikelihood requests:   8%|▊         | 9946/117208 [01:00<07:38, 233.76it/s]
+Running loglikelihood requests:   9%|▊         | 9991/117208 [01:00<07:15, 245.92it/s]
+Running loglikelihood requests:   9%|▊         | 10024/117208 [01:01<07:39, 233.10it/s]
+Running loglikelihood requests:   9%|▊         | 10066/117208 [01:01<07:26, 240.04it/s]
+Running loglikelihood requests:   9%|▊         | 10108/117208 [01:01<07:16, 245.30it/s]
+Running loglikelihood requests:   9%|▊         | 10150/117208 [01:01<07:09, 249.05it/s]
+Running loglikelihood requests:   9%|▊         | 10192/117208 [01:01<07:05, 251.62it/s]
+Running loglikelihood requests:   9%|▊         | 10237/117208 [01:01<06:53, 258.53it/s]
+Running loglikelihood requests:   9%|▉         | 10285/117208 [01:02<06:37, 268.70it/s]
+Running loglikelihood requests:   9%|▉         | 10327/117208 [01:02<06:43, 264.95it/s]
+Running loglikelihood requests:   9%|▉         | 10372/117208 [01:02<06:38, 267.99it/s]
+Running loglikelihood requests:   9%|▉         | 10417/117208 [01:02<06:35, 270.20it/s]
+Running loglikelihood requests:   9%|▉         | 10462/117208 [01:02<06:32, 271.67it/s]
+Running loglikelihood requests:   9%|▉         | 10507/117208 [01:02<06:31, 272.79it/s]
+Running loglikelihood requests:   9%|▉         | 10543/117208 [01:03<06:54, 257.28it/s]
+Running loglikelihood requests:   9%|▉         | 10585/117208 [01:03<06:53, 257.59it/s]
+Running loglikelihood requests:   9%|▉         | 10627/117208 [01:03<06:53, 257.77it/s]
+Running loglikelihood requests:   9%|▉         | 10654/117208 [01:03<07:41, 230.67it/s]
+Running loglikelihood requests:   9%|▉         | 10699/117208 [01:03<07:15, 244.33it/s]
+Running loglikelihood requests:   9%|▉         | 10738/117208 [01:03<07:18, 242.79it/s]
+Running loglikelihood requests:   9%|▉         | 10784/117208 [01:03<06:58, 254.57it/s]
+Running loglikelihood requests:   9%|▉         | 10823/117208 [01:04<07:05, 250.22it/s]
+Running loglikelihood requests:   9%|▉         | 10859/117208 [01:04<07:20, 241.59it/s]
+Running loglikelihood requests:   9%|▉         | 10886/117208 [01:04<08:04, 219.62it/s]
+Running loglikelihood requests:   9%|▉         | 10934/117208 [01:04<07:18, 242.51it/s]
+Running loglikelihood requests:   9%|▉         | 10976/117208 [01:04<07:08, 247.63it/s]
+Running loglikelihood requests:   9%|▉         | 11024/117208 [01:04<06:45, 262.17it/s]
+Running loglikelihood requests:   9%|▉         | 11060/117208 [01:05<07:03, 250.46it/s]
+Running loglikelihood requests:   9%|▉         | 11105/117208 [01:05<06:50, 258.67it/s]
+Running loglikelihood requests:  10%|▉         | 11147/117208 [01:05<06:49, 258.86it/s]
+Running loglikelihood requests:  10%|▉         | 11195/117208 [01:05<06:32, 269.88it/s]
+Running loglikelihood requests:  10%|▉         | 11237/117208 [01:05<06:36, 267.03it/s]
+Running loglikelihood requests:  10%|▉         | 11285/117208 [01:05<06:24, 275.77it/s]
+Running loglikelihood requests:  10%|▉         | 11324/117208 [01:06<06:38, 265.65it/s]
+Running loglikelihood requests:  10%|▉         | 11372/117208 [01:06<06:24, 275.08it/s]
+Running loglikelihood requests:  10%|▉         | 11411/117208 [01:06<06:37, 266.43it/s]
+Running loglikelihood requests:  10%|▉         | 11459/117208 [01:06<06:21, 276.84it/s]
+Running loglikelihood requests:  10%|▉         | 11495/117208 [01:06<06:43, 261.89it/s]
+Running loglikelihood requests:  10%|▉         | 11531/117208 [01:06<07:00, 251.56it/s]
+Running loglikelihood requests:  10%|▉         | 11583/117208 [01:07<06:25, 273.90it/s]
+Running loglikelihood requests:  10%|▉         | 11622/117208 [01:07<06:37, 265.54it/s]
+Running loglikelihood requests:  10%|▉         | 11661/117208 [01:07<06:46, 259.55it/s]
+Running loglikelihood requests:  10%|▉         | 11710/117208 [01:07<06:24, 274.14it/s]
+Running loglikelihood requests:  10%|█         | 11758/117208 [01:07<06:13, 282.61it/s]
+Running loglikelihood requests:  10%|█         | 11794/117208 [01:07<06:36, 266.16it/s]
+Running loglikelihood requests:  10%|█         | 11830/117208 [01:07<06:53, 254.83it/s]
+Running loglikelihood requests:  10%|█         | 11875/117208 [01:08<06:39, 263.63it/s]
+Running loglikelihood requests:  10%|█         | 11911/117208 [01:08<06:56, 253.06it/s]
+Running loglikelihood requests:  10%|█         | 11944/117208 [01:08<07:18, 240.29it/s]
+Running loglikelihood requests:  10%|█         | 11992/117208 [01:08<06:45, 259.39it/s]
+Running loglikelihood requests:  10%|█         | 12025/117208 [01:08<07:10, 244.34it/s]
+Running loglikelihood requests:  10%|█         | 12058/117208 [01:08<07:29, 233.74it/s]
+Running loglikelihood requests:  10%|█         | 12097/117208 [01:09<07:22, 237.61it/s]
+Running loglikelihood requests:  10%|█         | 12139/117208 [01:09<07:06, 246.31it/s]
+Running loglikelihood requests:  10%|█         | 12187/117208 [01:09<06:38, 263.62it/s]
+Running loglikelihood requests:  10%|█         | 12220/117208 [01:09<07:03, 247.79it/s]
+Running loglikelihood requests:  10%|█         | 12259/117208 [01:09<07:03, 247.99it/s]
+Running loglikelihood requests:  10%|█         | 12295/117208 [01:09<07:12, 242.57it/s]
+Running loglikelihood requests:  11%|█         | 12343/117208 [01:10<06:41, 261.14it/s]
+Running loglikelihood requests:  11%|█         | 12382/117208 [01:10<06:47, 257.27it/s]
+Running loglikelihood requests:  11%|█         | 12424/117208 [01:10<06:42, 260.14it/s]
+Running loglikelihood requests:  11%|█         | 12466/117208 [01:10<06:39, 262.51it/s]
+Running loglikelihood requests:  11%|█         | 12511/117208 [01:10<06:27, 269.95it/s]
+Running loglikelihood requests:  11%|█         | 12559/117208 [01:10<06:12, 280.75it/s]
+Running loglikelihood requests:  11%|█         | 12592/117208 [01:10<06:42, 260.06it/s]
+Running loglikelihood requests:  11%|█         | 12637/117208 [01:11<06:30, 267.87it/s]
+Running loglikelihood requests:  11%|█         | 12682/117208 [01:11<06:22, 273.62it/s]
+Running loglikelihood requests:  11%|█         | 12724/117208 [01:11<06:24, 271.87it/s]
+Running loglikelihood requests:  11%|█         | 12763/117208 [01:11<06:34, 265.08it/s]
+Running loglikelihood requests:  11%|█         | 12802/117208 [01:11<06:40, 260.58it/s]
+Running loglikelihood requests:  11%|█         | 12850/117208 [01:11<06:20, 274.46it/s]
+Running loglikelihood requests:  11%|█         | 12898/117208 [01:12<06:07, 283.66it/s]
+Running loglikelihood requests:  11%|█         | 12927/117208 [01:12<06:49, 254.54it/s]
+Running loglikelihood requests:  11%|█         | 12961/117208 [01:12<07:07, 243.74it/s]
+Running loglikelihood requests:  11%|█         | 13003/117208 [01:12<06:54, 251.51it/s]
+Running loglikelihood requests:  11%|█         | 13045/117208 [01:12<06:45, 257.03it/s]
+Running loglikelihood requests:  11%|█         | 13093/117208 [01:12<06:22, 271.97it/s]
+Running loglikelihood requests:  11%|█         | 13129/117208 [01:13<06:40, 259.67it/s]
+Running loglikelihood requests:  11%|█         | 13168/117208 [01:13<06:45, 256.80it/s]
+Running loglikelihood requests:  11%|█▏        | 13210/117208 [01:13<06:35, 262.70it/s]
+Running loglikelihood requests:  11%|█▏        | 13252/117208 [01:13<06:30, 266.50it/s]
+Running loglikelihood requests:  11%|█▏        | 13288/117208 [01:13<06:43, 257.82it/s]
+Running loglikelihood requests:  11%|█▏        | 13333/117208 [01:13<06:25, 269.22it/s]
+Running loglikelihood requests:  11%|█▏        | 13372/117208 [01:13<06:31, 265.28it/s]
+Running loglikelihood requests:  11%|█▏        | 13420/117208 [01:14<06:10, 280.39it/s]
+Running loglikelihood requests:  11%|█▏        | 13465/117208 [01:14<06:04, 284.88it/s]
+Running loglikelihood requests:  12%|█▏        | 13498/117208 [01:14<06:31, 264.80it/s]
+Running loglikelihood requests:  12%|█▏        | 13537/117208 [01:14<06:34, 262.60it/s]
+Running loglikelihood requests:  12%|█▏        | 13582/117208 [01:14<06:20, 272.68it/s]
+Running loglikelihood requests:  12%|█▏        | 13630/117208 [01:14<06:02, 285.80it/s]
+Running loglikelihood requests:  12%|█▏        | 13666/117208 [01:14<06:21, 271.48it/s]
+Running loglikelihood requests:  12%|█▏        | 13699/117208 [01:15<06:44, 255.79it/s]
+Running loglikelihood requests:  12%|█▏        | 13735/117208 [01:15<06:53, 250.44it/s]
+Running loglikelihood requests:  12%|█▏        | 13783/117208 [01:15<06:22, 270.19it/s]
+Running loglikelihood requests:  12%|█▏        | 13828/117208 [01:15<06:11, 278.19it/s]
+Running loglikelihood requests:  12%|█▏        | 13856/117208 [01:15<06:52, 250.36it/s]
+Running loglikelihood requests:  12%|█▏        | 13894/117208 [01:15<06:51, 250.85it/s]
+Running loglikelihood requests:  12%|█▏        | 13942/117208 [01:16<06:22, 270.24it/s]
+Running loglikelihood requests:  12%|█▏        | 13984/117208 [01:16<06:18, 272.53it/s]
+Running loglikelihood requests:  12%|█▏        | 14023/117208 [01:16<06:24, 268.12it/s]
+Running loglikelihood requests:  12%|█▏        | 14065/117208 [01:16<06:20, 270.95it/s]
+Running loglikelihood requests:  12%|█▏        | 14098/117208 [01:16<06:43, 255.55it/s]
+Running loglikelihood requests:  12%|█▏        | 14137/117208 [01:16<06:42, 256.19it/s]
+Running loglikelihood requests:  12%|█▏        | 14182/117208 [01:16<06:23, 268.68it/s]
+Running loglikelihood requests:  12%|█▏        | 14221/117208 [01:17<06:27, 265.45it/s]
+Running loglikelihood requests:  12%|█▏        | 14248/117208 [01:17<07:08, 240.09it/s]
+Running loglikelihood requests:  12%|█▏        | 14293/117208 [01:17<06:40, 257.20it/s]
+Running loglikelihood requests:  12%|█▏        | 14335/117208 [01:17<06:30, 263.68it/s]
+Running loglikelihood requests:  12%|█▏        | 14383/117208 [01:17<06:07, 279.67it/s]
+Running loglikelihood requests:  12%|█▏        | 14425/117208 [01:17<06:07, 279.38it/s]
+Running loglikelihood requests:  12%|█▏        | 14467/117208 [01:18<06:08, 279.09it/s]
+Running loglikelihood requests:  12%|█▏        | 14509/117208 [01:18<06:08, 278.94it/s]
+Running loglikelihood requests:  12%|█▏        | 14554/117208 [01:18<06:00, 285.11it/s]
+Running loglikelihood requests:  12%|█▏        | 14602/117208 [01:18<05:47, 294.97it/s]
+Running loglikelihood requests:  12%|█▏        | 14632/117208 [01:18<06:24, 266.89it/s]
+Running loglikelihood requests:  13%|█▎        | 14674/117208 [01:18<06:19, 270.40it/s]
+Running loglikelihood requests:  13%|█▎        | 14719/117208 [01:18<06:06, 279.29it/s]
+Running loglikelihood requests:  13%|█▎        | 14767/117208 [01:19<05:52, 290.92it/s]
+Running loglikelihood requests:  13%|█▎        | 14803/117208 [01:19<06:11, 275.75it/s]
+Running loglikelihood requests:  13%|█▎        | 14833/117208 [01:19<06:44, 253.32it/s]
+Running loglikelihood requests:  13%|█▎        | 14881/117208 [01:19<06:14, 273.08it/s]
+Running loglikelihood requests:  13%|█▎        | 14929/117208 [01:19<05:56, 286.99it/s]
+Running loglikelihood requests:  13%|█▎        | 14977/117208 [01:19<05:44, 297.01it/s]
+Running loglikelihood requests:  13%|█▎        | 15016/117208 [01:19<05:57, 286.03it/s]
+Running loglikelihood requests:  13%|█▎        | 15058/117208 [01:20<05:59, 284.31it/s]
+Running loglikelihood requests:  13%|█▎        | 15103/117208 [01:20<05:53, 288.78it/s]
+Running loglikelihood requests:  13%|█▎        | 15151/117208 [01:20<05:40, 299.31it/s]
+Running loglikelihood requests:  13%|█▎        | 15199/117208 [01:20<05:33, 306.15it/s]
+Running loglikelihood requests:  13%|█▎        | 15247/117208 [01:20<05:27, 311.19it/s]
+Running loglikelihood requests:  13%|█▎        | 15279/117208 [01:20<06:00, 282.40it/s]
+Running loglikelihood requests:  13%|█▎        | 15308/117208 [01:21<06:35, 257.40it/s]
+Running loglikelihood requests:  13%|█▎        | 15349/117208 [01:21<06:26, 263.38it/s]
+Running loglikelihood requests:  13%|█▎        | 15397/117208 [01:21<06:01, 281.46it/s]
+Running loglikelihood requests:  13%|█▎        | 15445/117208 [01:21<05:45, 294.18it/s]
+Running loglikelihood requests:  13%|█▎        | 15481/117208 [01:21<06:04, 278.94it/s]
+Running loglikelihood requests:  13%|█▎        | 15520/117208 [01:21<06:10, 274.21it/s]
+Running loglikelihood requests:  13%|█▎        | 15565/117208 [01:21<05:59, 283.08it/s]
+Running loglikelihood requests:  13%|█▎        | 15610/117208 [01:22<05:51, 289.24it/s]
+Running loglikelihood requests:  13%|█▎        | 15658/117208 [01:22<05:38, 299.62it/s]
+Running loglikelihood requests:  13%|█▎        | 15706/117208 [01:22<05:30, 306.78it/s]
+Running loglikelihood requests:  13%|█▎        | 15737/117208 [01:22<06:04, 278.04it/s]
+Running loglikelihood requests:  13%|█▎        | 15781/117208 [01:22<05:57, 283.99it/s]
+Running loglikelihood requests:  14%|█▎        | 15826/117208 [01:22<05:49, 289.88it/s]
+Running loglikelihood requests:  14%|█▎        | 15874/117208 [01:22<05:37, 300.11it/s]
+Running loglikelihood requests:  14%|█▎        | 15919/117208 [01:23<05:36, 301.34it/s]
+Running loglikelihood requests:  14%|█▎        | 15967/117208 [01:23<05:28, 308.18it/s]
+Running loglikelihood requests:  14%|█▎        | 16009/117208 [01:23<05:35, 301.24it/s]
+Running loglikelihood requests:  14%|█▎        | 16057/117208 [01:23<05:28, 308.03it/s]
+Running loglikelihood requests:  14%|█▎        | 16102/117208 [01:23<05:29, 307.20it/s]
+Running loglikelihood requests:  14%|█▍        | 16147/117208 [01:23<05:30, 306.11it/s]
+Running loglikelihood requests:  14%|█▍        | 16192/117208 [01:23<05:30, 305.76it/s]
+Running loglikelihood requests:  14%|█▍        | 16240/117208 [01:24<05:23, 311.69it/s]
+Running loglikelihood requests:  14%|█▍        | 16288/117208 [01:24<05:19, 315.59it/s]
+Running loglikelihood requests:  14%|█▍        | 16330/117208 [01:24<05:29, 306.47it/s]
+Running loglikelihood requests:  14%|█▍        | 16366/117208 [01:24<05:49, 288.21it/s]
+Running loglikelihood requests:  14%|█▍        | 16414/117208 [01:24<05:37, 299.02it/s]
+Running loglikelihood requests:  14%|█▍        | 16459/117208 [01:24<05:34, 300.99it/s]
+Running loglikelihood requests:  14%|█▍        | 16498/117208 [01:25<05:46, 290.47it/s]
+Running loglikelihood requests:  14%|█▍        | 16546/117208 [01:25<05:34, 301.02it/s]
+Running loglikelihood requests:  14%|█▍        | 16591/117208 [01:25<05:32, 302.56it/s]
+Running loglikelihood requests:  14%|█▍        | 16624/117208 [01:25<06:00, 279.33it/s]
+Running loglikelihood requests:  14%|█▍        | 16669/117208 [01:25<05:50, 287.15it/s]
+Running loglikelihood requests:  14%|█▍        | 16711/117208 [01:25<05:50, 286.69it/s]
+Running loglikelihood requests:  14%|█▍        | 16753/117208 [01:25<05:50, 286.54it/s]
+Running loglikelihood requests:  14%|█▍        | 16801/117208 [01:26<05:36, 298.44it/s]
+Running loglikelihood requests:  14%|█▍        | 16849/117208 [01:26<05:26, 306.94it/s]
+Running loglikelihood requests:  14%|█▍        | 16882/117208 [01:26<05:55, 282.58it/s]
+Running loglikelihood requests:  14%|█▍        | 16915/117208 [01:26<06:17, 265.38it/s]
+Running loglikelihood requests:  14%|█▍        | 16963/117208 [01:26<05:53, 283.65it/s]
+Running loglikelihood requests:  15%|█▍        | 17005/117208 [01:26<05:52, 284.18it/s]
+Running loglikelihood requests:  15%|█▍        | 17047/117208 [01:26<05:51, 284.81it/s]
+Running loglikelihood requests:  15%|█▍        | 17095/117208 [01:27<05:36, 297.26it/s]
+Running loglikelihood requests:  15%|█▍        | 17140/117208 [01:27<05:33, 299.83it/s]
+Running loglikelihood requests:  15%|█▍        | 17176/117208 [01:27<05:52, 283.73it/s]
+Running loglikelihood requests:  15%|█▍        | 17218/117208 [01:27<05:51, 284.46it/s]
+Running loglikelihood requests:  15%|█▍        | 17266/117208 [01:27<05:36, 296.80it/s]
+Running loglikelihood requests:  15%|█▍        | 17305/117208 [01:27<05:46, 288.16it/s]
+Running loglikelihood requests:  15%|█▍        | 17353/117208 [01:27<05:32, 300.07it/s]
+Running loglikelihood requests:  15%|█▍        | 17395/117208 [01:28<05:37, 296.17it/s]
+Running loglikelihood requests:  15%|█▍        | 17437/117208 [01:28<05:39, 293.62it/s]
+Running loglikelihood requests:  15%|█▍        | 17467/117208 [01:28<06:13, 266.94it/s]
+Running loglikelihood requests:  15%|█▍        | 17497/117208 [01:28<06:39, 249.53it/s]
+Running loglikelihood requests:  15%|█▍        | 17545/117208 [01:28<06:05, 272.89it/s]
+Running loglikelihood requests:  15%|█▌        | 17590/117208 [01:28<05:51, 283.51it/s]
+Running loglikelihood requests:  15%|█▌        | 17638/117208 [01:28<05:35, 296.76it/s]
+Running loglikelihood requests:  15%|█▌        | 17686/117208 [01:29<05:25, 306.11it/s]
+Running loglikelihood requests:  15%|█▌        | 17731/117208 [01:29<05:24, 306.86it/s]
+Running loglikelihood requests:  15%|█▌        | 17762/117208 [01:29<05:56, 278.76it/s]
+Running loglikelihood requests:  15%|█▌        | 17803/117208 [01:29<05:55, 279.44it/s]
+Running loglikelihood requests:  15%|█▌        | 17851/117208 [01:29<05:38, 293.92it/s]
+Running loglikelihood requests:  15%|█▌        | 17896/117208 [01:29<05:33, 298.14it/s]
+Running loglikelihood requests:  15%|█▌        | 17941/117208 [01:29<05:29, 301.15it/s]
+Running loglikelihood requests:  15%|█▌        | 17989/117208 [01:30<05:20, 309.43it/s]
+Running loglikelihood requests:  15%|█▌        | 18031/117208 [01:30<05:26, 303.30it/s]
+Running loglikelihood requests:  15%|█▌        | 18076/117208 [01:30<05:25, 304.75it/s]
+Running loglikelihood requests:  15%|█▌        | 18112/117208 [01:30<05:44, 287.71it/s]
+Running loglikelihood requests:  15%|█▌        | 18157/117208 [01:30<05:36, 293.94it/s]
+Running loglikelihood requests:  16%|█▌        | 18196/117208 [01:30<05:37, 293.08it/s]
+Running loglikelihood requests:  16%|█▌        | 18244/117208 [01:30<05:18, 311.08it/s]
+Running loglikelihood requests:  16%|█▌        | 18286/117208 [01:31<05:17, 311.44it/s]
+Running loglikelihood requests:  16%|█▌        | 18325/117208 [01:31<05:23, 305.30it/s]
+Running loglikelihood requests:  16%|█▌        | 18358/117208 [01:31<05:42, 288.26it/s]
+Running loglikelihood requests:  16%|█▌        | 18400/117208 [01:31<05:34, 295.52it/s]
+Running loglikelihood requests:  16%|█▌        | 18445/117208 [01:31<05:21, 307.14it/s]
+Running loglikelihood requests:  16%|█▌        | 18487/117208 [01:31<05:19, 309.09it/s]
+Running loglikelihood requests:  16%|█▌        | 18532/117208 [01:31<05:11, 317.07it/s]
+Running loglikelihood requests:  16%|█▌        | 18580/117208 [01:32<04:59, 328.82it/s]
+Running loglikelihood requests:  16%|█▌        | 18622/117208 [01:32<05:04, 324.08it/s]
+Running loglikelihood requests:  16%|█▌        | 18655/117208 [01:32<05:28, 300.42it/s]
+Running loglikelihood requests:  16%|█▌        | 18686/117208 [01:32<05:50, 280.90it/s]
+Running loglikelihood requests:  16%|█▌        | 18727/117208 [01:32<05:41, 288.73it/s]
+Running loglikelihood requests:  16%|█▌        | 18772/117208 [01:32<05:24, 303.36it/s]
+Running loglikelihood requests:  16%|█▌        | 18820/117208 [01:32<05:07, 319.69it/s]
+Running loglikelihood requests:  16%|█▌        | 18868/117208 [01:32<04:56, 331.26it/s]
+Running loglikelihood requests:  16%|█▌        | 18913/117208 [01:33<04:55, 333.12it/s]
+Running loglikelihood requests:  16%|█▌        | 18958/117208 [01:33<04:54, 333.83it/s]
+Running loglikelihood requests:  16%|█▌        | 18992/117208 [01:33<05:17, 309.03it/s]
+Running loglikelihood requests:  16%|█▌        | 19023/117208 [01:33<05:41, 287.51it/s]
+Running loglikelihood requests:  16%|█▋        | 19063/117208 [01:33<05:36, 291.24it/s]
+Running loglikelihood requests:  16%|█▋        | 19105/117208 [01:33<05:29, 298.01it/s]
+Running loglikelihood requests:  16%|█▋        | 19153/117208 [01:33<05:10, 316.30it/s]
+Running loglikelihood requests:  16%|█▋        | 19201/117208 [01:34<04:58, 328.76it/s]
+Running loglikelihood requests:  16%|█▋        | 19240/117208 [01:34<05:08, 317.60it/s]
+Running loglikelihood requests:  16%|█▋        | 19273/117208 [01:34<05:29, 296.86it/s]
+Running loglikelihood requests:  16%|█▋        | 19315/117208 [01:34<05:24, 301.91it/s]
+Running loglikelihood requests:  17%|█▋        | 19360/117208 [01:34<05:13, 312.04it/s]
+Running loglikelihood requests:  17%|█▋        | 19399/117208 [01:34<05:19, 305.87it/s]
+Running loglikelihood requests:  17%|█▋        | 19444/117208 [01:34<05:09, 315.52it/s]
+Running loglikelihood requests:  17%|█▋        | 19492/117208 [01:34<04:57, 328.34it/s]
+Running loglikelihood requests:  17%|█▋        | 19540/117208 [01:35<04:49, 337.45it/s]
+Running loglikelihood requests:  17%|█▋        | 19576/117208 [01:35<05:07, 317.86it/s]
+Running loglikelihood requests:  17%|█▋        | 19608/117208 [01:35<05:32, 293.58it/s]
+Running loglikelihood requests:  17%|█▋        | 19638/117208 [01:35<05:55, 274.52it/s]
+Running loglikelihood requests:  17%|█▋        | 19675/117208 [01:35<05:53, 276.14it/s]
+Running loglikelihood requests:  17%|█▋        | 19711/117208 [01:35<05:55, 274.49it/s]
+Running loglikelihood requests:  17%|█▋        | 19756/117208 [01:35<05:31, 294.16it/s]
+Running loglikelihood requests:  17%|█▋        | 19804/117208 [01:36<05:10, 314.04it/s]
+Running loglikelihood requests:  17%|█▋        | 19852/117208 [01:36<04:56, 327.86it/s]
+Running loglikelihood requests:  17%|█▋        | 19885/117208 [01:36<05:19, 304.74it/s]
+Running loglikelihood requests:  17%|█▋        | 19916/117208 [01:36<05:44, 282.41it/s]
+Running loglikelihood requests:  17%|█▋        | 19945/117208 [01:36<06:07, 264.59it/s]
+Running loglikelihood requests:  17%|█▋        | 19984/117208 [01:36<05:55, 273.81it/s]
+Running loglikelihood requests:  17%|█▋        | 20029/117208 [01:36<05:31, 292.98it/s]
+Running loglikelihood requests:  17%|█▋        | 20071/117208 [01:36<05:23, 300.32it/s]
+Running loglikelihood requests:  17%|█▋        | 20119/117208 [01:37<05:04, 318.42it/s]
+Running loglikelihood requests:  17%|█▋        | 20167/117208 [01:37<04:53, 330.82it/s]
+Running loglikelihood requests:  17%|█▋        | 20212/117208 [01:37<04:50, 333.36it/s]
+Running loglikelihood requests:  17%|█▋        | 20246/117208 [01:37<05:12, 310.07it/s]
+Running loglikelihood requests:  17%|█▋        | 20278/117208 [01:37<05:35, 288.82it/s]
+Running loglikelihood requests:  17%|█▋        | 20307/117208 [01:37<05:59, 269.58it/s]
+Running loglikelihood requests:  17%|█▋        | 20350/117208 [01:37<05:38, 285.82it/s]
+Running loglikelihood requests:  17%|█▋        | 20395/117208 [01:38<05:21, 301.56it/s]
+Running loglikelihood requests:  17%|█▋        | 20443/117208 [01:38<05:02, 319.90it/s]
+Running loglikelihood requests:  17%|█▋        | 20491/117208 [01:38<04:51, 332.25it/s]
+Running loglikelihood requests:  18%|█▊        | 20539/117208 [01:38<04:43, 341.13it/s]
+Running loglikelihood requests:  18%|█▊        | 20584/117208 [01:38<04:43, 340.70it/s]
+Running loglikelihood requests:  18%|█▊        | 20619/117208 [01:38<05:04, 317.52it/s]
+Running loglikelihood requests:  18%|█▊        | 20653/117208 [01:38<05:21, 300.63it/s]
+Running loglikelihood requests:  18%|█▊        | 20698/117208 [01:38<05:09, 312.10it/s]
+Running loglikelihood requests:  18%|█▊        | 20746/117208 [01:39<04:54, 327.35it/s]
+Running loglikelihood requests:  18%|█▊        | 20794/117208 [01:39<04:45, 337.85it/s]
+Running loglikelihood requests:  18%|█▊        | 20842/117208 [01:39<04:39, 345.02it/s]
+Running loglikelihood requests:  18%|█▊        | 20887/117208 [01:39<04:40, 343.70it/s]
+Running loglikelihood requests:  18%|█▊        | 20922/117208 [01:39<05:00, 320.00it/s]
+Running loglikelihood requests:  18%|█▊        | 20956/117208 [01:39<05:19, 301.48it/s]
+Running loglikelihood requests:  18%|█▊        | 21004/117208 [01:39<05:00, 320.03it/s]
+Running loglikelihood requests:  18%|█▊        | 21052/117208 [01:40<04:49, 332.35it/s]
+Running loglikelihood requests:  18%|█▊        | 21100/117208 [01:40<04:41, 341.88it/s]
+Running loglikelihood requests:  18%|█▊        | 21148/117208 [01:40<04:36, 348.02it/s]
+Running loglikelihood requests:  18%|█▊        | 21196/117208 [01:40<04:32, 352.52it/s]
+Running loglikelihood requests:  18%|█▊        | 21248/117208 [01:40<04:23, 364.81it/s]
+Running loglikelihood requests:  18%|█▊        | 21285/117208 [01:40<04:42, 339.42it/s]
+Running loglikelihood requests:  18%|█▊        | 21319/117208 [01:40<05:04, 314.60it/s]
+Running loglikelihood requests:  18%|█▊        | 21351/117208 [01:40<05:25, 294.11it/s]
+Running loglikelihood requests:  18%|█▊        | 21392/117208 [01:41<05:20, 299.23it/s]
+Running loglikelihood requests:  18%|█▊        | 21440/117208 [01:41<05:00, 318.53it/s]
+Running loglikelihood requests:  18%|█▊        | 21488/117208 [01:41<04:48, 331.78it/s]
+Running loglikelihood requests:  18%|█▊        | 21536/117208 [01:41<04:40, 341.37it/s]
+Running loglikelihood requests:  18%|█▊        | 21581/117208 [01:41<04:40, 341.35it/s]
+Running loglikelihood requests:  18%|█▊        | 21617/117208 [01:41<04:57, 321.23it/s]
+Running loglikelihood requests:  18%|█▊        | 21650/117208 [01:41<05:19, 299.37it/s]
+Running loglikelihood requests:  18%|█▊        | 21680/117208 [01:42<05:42, 278.91it/s]
+Running loglikelihood requests:  19%|█▊        | 21716/117208 [01:42<05:43, 278.22it/s]
+Running loglikelihood requests:  19%|█▊        | 21758/117208 [01:42<05:28, 290.14it/s]
+Running loglikelihood requests:  19%|█▊        | 21803/117208 [01:42<05:16, 301.24it/s]
+Running loglikelihood requests:  19%|█▊        | 21851/117208 [01:42<05:02, 315.55it/s]
+Running loglikelihood requests:  19%|█▊        | 21899/117208 [01:42<04:52, 325.54it/s]
+Running loglikelihood requests:  19%|█▊        | 21944/117208 [01:42<04:52, 325.69it/s]
+Running loglikelihood requests:  19%|█▉        | 21986/117208 [01:42<04:58, 319.35it/s]
+Running loglikelihood requests:  19%|█▉        | 22018/117208 [01:43<05:24, 293.02it/s]
+Running loglikelihood requests:  19%|█▉        | 22048/117208 [01:43<05:50, 271.16it/s]
+Running loglikelihood requests:  19%|█▉        | 22091/117208 [01:43<05:35, 283.37it/s]
+Running loglikelihood requests:  19%|█▉        | 22136/117208 [01:43<05:21, 296.06it/s]
+Running loglikelihood requests:  19%|█▉        | 22184/117208 [01:43<05:05, 311.55it/s]
+Running loglikelihood requests:  19%|█▉        | 22229/117208 [01:43<05:00, 316.13it/s]
+Running loglikelihood requests:  19%|█▉        | 22277/117208 [01:43<04:51, 325.87it/s]
+Running loglikelihood requests:  19%|█▉        | 22325/117208 [01:44<04:45, 332.41it/s]
+Running loglikelihood requests:  19%|█▉        | 22370/117208 [01:44<04:46, 330.54it/s]
+Running loglikelihood requests:  19%|█▉        | 22404/117208 [01:44<05:11, 304.83it/s]
+Running loglikelihood requests:  19%|█▉        | 22435/117208 [01:44<05:35, 282.22it/s]
+Running loglikelihood requests:  19%|█▉        | 22478/117208 [01:44<05:25, 291.33it/s]
+Running loglikelihood requests:  19%|█▉        | 22526/117208 [01:44<05:07, 308.26it/s]
+Running loglikelihood requests:  19%|█▉        | 22571/117208 [01:44<05:01, 313.54it/s]
+Running loglikelihood requests:  19%|█▉        | 22619/117208 [01:45<04:51, 324.06it/s]
+Running loglikelihood requests:  19%|█▉        | 22667/117208 [01:45<04:45, 331.45it/s]
+Running loglikelihood requests:  19%|█▉        | 22715/117208 [01:45<04:41, 336.25it/s]
+Running loglikelihood requests:  19%|█▉        | 22757/117208 [01:45<04:48, 326.83it/s]
+Running loglikelihood requests:  19%|█▉        | 22790/117208 [01:45<05:13, 300.70it/s]
+Running loglikelihood requests:  19%|█▉        | 22832/117208 [01:45<05:12, 302.44it/s]
+Running loglikelihood requests:  20%|█▉        | 22874/117208 [01:45<05:11, 303.31it/s]
+Running loglikelihood requests:  20%|█▉        | 22913/117208 [01:45<05:17, 297.40it/s]
+Running loglikelihood requests:  20%|█▉        | 22961/117208 [01:46<05:01, 313.00it/s]
+Running loglikelihood requests:  20%|█▉        | 23009/117208 [01:46<04:50, 323.73it/s]
+Running loglikelihood requests:  20%|█▉        | 23057/117208 [01:46<04:44, 331.11it/s]
+Running loglikelihood requests:  20%|█▉        | 23105/117208 [01:46<04:39, 336.09it/s]
+Running loglikelihood requests:  20%|█▉        | 23144/117208 [01:46<04:53, 320.57it/s]
+Running loglikelihood requests:  20%|█▉        | 23192/117208 [01:46<04:45, 328.75it/s]
+Running loglikelihood requests:  20%|█▉        | 23225/117208 [01:46<05:11, 302.06it/s]
+Running loglikelihood requests:  20%|█▉        | 23264/117208 [01:47<05:16, 297.06it/s]
+Running loglikelihood requests:  20%|█▉        | 23306/117208 [01:47<05:13, 299.78it/s]
+Running loglikelihood requests:  20%|█▉        | 23342/117208 [01:47<05:25, 288.81it/s]
+Running loglikelihood requests:  20%|█▉        | 23384/117208 [01:47<05:19, 293.62it/s]
+Running loglikelihood requests:  20%|█▉        | 23432/117208 [01:47<05:02, 310.24it/s]
+Running loglikelihood requests:  20%|██        | 23480/117208 [01:47<04:51, 321.84it/s]
+Running loglikelihood requests:  20%|██        | 23528/117208 [01:47<04:43, 329.94it/s]
+Running loglikelihood requests:  20%|██        | 23576/117208 [01:48<04:38, 335.67it/s]
+Running loglikelihood requests:  20%|██        | 23618/117208 [01:48<04:46, 326.97it/s]
+Running loglikelihood requests:  20%|██        | 23651/117208 [01:48<05:10, 301.13it/s]
+Running loglikelihood requests:  20%|██        | 23682/117208 [01:48<05:34, 279.33it/s]
+Running loglikelihood requests:  20%|██        | 23726/117208 [01:48<05:20, 291.85it/s]
+Running loglikelihood requests:  20%|██        | 23771/117208 [01:48<05:08, 302.43it/s]
+Running loglikelihood requests:  20%|██        | 23813/117208 [01:48<05:07, 303.54it/s]
+Running loglikelihood requests:  20%|██        | 23861/117208 [01:49<04:54, 317.16it/s]
+Running loglikelihood requests:  20%|██        | 23900/117208 [01:49<05:03, 307.73it/s]
+Running loglikelihood requests:  20%|██        | 23948/117208 [01:49<04:51, 320.41it/s]
+Running loglikelihood requests:  20%|██        | 23996/117208 [01:49<04:43, 329.23it/s]
+Running loglikelihood requests:  21%|██        | 24041/117208 [01:49<04:43, 328.77it/s]
+Running loglikelihood requests:  21%|██        | 24077/117208 [01:49<05:01, 309.19it/s]
+Running loglikelihood requests:  21%|██        | 24108/117208 [01:49<05:27, 284.70it/s]
+Running loglikelihood requests:  21%|██        | 24152/117208 [01:49<05:14, 295.60it/s]
+Running loglikelihood requests:  21%|██        | 24194/117208 [01:50<05:10, 299.16it/s]
+Running loglikelihood requests:  21%|██        | 24236/117208 [01:50<05:08, 301.46it/s]
+Running loglikelihood requests:  21%|██        | 24281/117208 [01:50<05:00, 309.12it/s]
+Running loglikelihood requests:  21%|██        | 24323/117208 [01:50<05:01, 308.52it/s]
+Running loglikelihood requests:  21%|██        | 24371/117208 [01:50<04:49, 321.03it/s]
+Running loglikelihood requests:  21%|██        | 24419/117208 [01:50<04:41, 329.75it/s]
+Running loglikelihood requests:  21%|██        | 24467/117208 [01:50<04:36, 335.96it/s]
+Running loglikelihood requests:  21%|██        | 24515/117208 [01:51<04:32, 340.35it/s]
+Running loglikelihood requests:  21%|██        | 24563/117208 [01:51<04:29, 343.44it/s]
+Running loglikelihood requests:  21%|██        | 24602/117208 [01:51<04:44, 326.02it/s]
+Running loglikelihood requests:  21%|██        | 24635/117208 [01:51<05:08, 299.97it/s]
+Running loglikelihood requests:  21%|██        | 24666/117208 [01:51<05:31, 279.18it/s]
+Running loglikelihood requests:  21%|██        | 24704/117208 [01:51<05:31, 279.42it/s]
+Running loglikelihood requests:  21%|██        | 24743/117208 [01:51<05:28, 281.56it/s]
+Running loglikelihood requests:  21%|██        | 24785/117208 [01:52<05:19, 289.15it/s]
+Running loglikelihood requests:  21%|██        | 24827/117208 [01:52<05:13, 294.75it/s]
+Running loglikelihood requests:  21%|██        | 24875/117208 [01:52<04:56, 311.46it/s]
+Running loglikelihood requests:  21%|██▏       | 24923/117208 [01:52<04:45, 323.38it/s]
+Running loglikelihood requests:  21%|██▏       | 24971/117208 [01:52<04:38, 331.74it/s]
+Running loglikelihood requests:  21%|██▏       | 25005/117208 [01:52<05:00, 307.06it/s]
+Running loglikelihood requests:  21%|██▏       | 25037/117208 [01:52<05:21, 286.31it/s]
+Running loglikelihood requests:  21%|██▏       | 25085/117208 [01:52<05:01, 305.87it/s]
+Running loglikelihood requests:  21%|██▏       | 25130/117208 [01:53<04:54, 313.05it/s]
+Running loglikelihood requests:  21%|██▏       | 25169/117208 [01:53<05:01, 305.64it/s]
+Running loglikelihood requests:  22%|██▏       | 25217/117208 [01:53<04:47, 320.16it/s]
+Running loglikelihood requests:  22%|██▏       | 25265/117208 [01:53<04:38, 329.88it/s]
+Running loglikelihood requests:  22%|██▏       | 25313/117208 [01:53<04:33, 336.56it/s]
+Running loglikelihood requests:  22%|██▏       | 25361/117208 [01:53<04:29, 341.41it/s]
+Running loglikelihood requests:  22%|██▏       | 25406/117208 [01:53<04:31, 338.37it/s]
+Running loglikelihood requests:  22%|██▏       | 25440/117208 [01:54<04:53, 312.19it/s]
+Running loglikelihood requests:  22%|██▏       | 25472/117208 [01:54<05:16, 289.67it/s]
+Running loglikelihood requests:  22%|██▏       | 25520/117208 [01:54<04:57, 308.51it/s]
+Running loglikelihood requests:  22%|██▏       | 25568/117208 [01:54<04:44, 321.72it/s]
+Running loglikelihood requests:  22%|██▏       | 25616/117208 [01:54<04:36, 331.15it/s]
+Running loglikelihood requests:  22%|██▏       | 25664/117208 [01:54<04:31, 337.77it/s]
+Running loglikelihood requests:  22%|██▏       | 25709/117208 [01:54<04:32, 335.93it/s]
+Running loglikelihood requests:  22%|██▏       | 25757/117208 [01:55<04:28, 340.94it/s]
+Running loglikelihood requests:  22%|██▏       | 25805/117208 [01:55<04:25, 344.46it/s]
+Running loglikelihood requests:  22%|██▏       | 25853/117208 [01:55<04:23, 347.22it/s]
+Running loglikelihood requests:  22%|██▏       | 25898/117208 [01:55<04:26, 342.77it/s]
+Running loglikelihood requests:  22%|██▏       | 25933/117208 [01:55<04:47, 317.60it/s]
+Running loglikelihood requests:  22%|██▏       | 25965/117208 [01:55<05:12, 292.32it/s]
+Running loglikelihood requests:  22%|██▏       | 25995/117208 [01:55<05:35, 271.97it/s]
+Running loglikelihood requests:  22%|██▏       | 26024/117208 [01:55<05:56, 255.98it/s]
+Running loglikelihood requests:  22%|██▏       | 26072/117208 [01:56<05:19, 284.94it/s]
+Running loglikelihood requests:  22%|██▏       | 26117/117208 [01:56<05:04, 298.71it/s]
+Running loglikelihood requests:  22%|██▏       | 26162/117208 [01:56<04:49, 314.72it/s]
+Running loglikelihood requests:  22%|██▏       | 26210/117208 [01:56<04:33, 332.87it/s]
+Running loglikelihood requests:  22%|██▏       | 26258/117208 [01:56<04:23, 345.81it/s]
+Running loglikelihood requests:  22%|██▏       | 26306/117208 [01:56<04:16, 354.58it/s]
+Running loglikelihood requests:  22%|██▏       | 26354/117208 [01:56<04:11, 361.17it/s]
+Running loglikelihood requests:  23%|██▎       | 26399/117208 [01:56<04:12, 359.11it/s]
+Running loglikelihood requests:  23%|██▎       | 26435/117208 [01:57<04:29, 336.57it/s]
+Running loglikelihood requests:  23%|██▎       | 26469/117208 [01:57<04:48, 315.00it/s]
+Running loglikelihood requests:  23%|██▎       | 26501/117208 [01:57<05:05, 297.22it/s]
+Running loglikelihood requests:  23%|██▎       | 26531/117208 [01:57<05:23, 280.36it/s]
+Running loglikelihood requests:  23%|██▎       | 26567/117208 [01:57<05:21, 282.10it/s]
+Running loglikelihood requests:  23%|██▎       | 26609/117208 [01:57<05:04, 297.06it/s]
+Running loglikelihood requests:  23%|██▎       | 26651/117208 [01:57<04:54, 307.15it/s]
+Running loglikelihood requests:  23%|██▎       | 26699/117208 [01:58<04:35, 328.48it/s]
+Running loglikelihood requests:  23%|██▎       | 26747/117208 [01:58<04:23, 343.42it/s]
+Running loglikelihood requests:  23%|██▎       | 26795/117208 [01:58<04:15, 353.82it/s]
+Running loglikelihood requests:  23%|██▎       | 26843/117208 [01:58<04:10, 360.88it/s]
+Running loglikelihood requests:  23%|██▎       | 26888/117208 [01:58<04:11, 358.96it/s]
+Running loglikelihood requests:  23%|██▎       | 26924/117208 [01:58<04:28, 336.86it/s]
+Running loglikelihood requests:  23%|██▎       | 26958/117208 [01:58<04:46, 315.13it/s]
+Running loglikelihood requests:  23%|██▎       | 26990/117208 [01:58<05:03, 297.39it/s]
+Running loglikelihood requests:  23%|██▎       | 27020/117208 [01:59<05:21, 280.43it/s]
+Running loglikelihood requests:  23%|██▎       | 27056/117208 [01:59<05:19, 282.30it/s]
+Running loglikelihood requests:  23%|██▎       | 27101/117208 [01:59<04:56, 304.38it/s]
+Running loglikelihood requests:  23%|██▎       | 27149/117208 [01:59<04:35, 326.38it/s]
+Running loglikelihood requests:  23%|██▎       | 27194/117208 [01:59<04:28, 335.20it/s]
+Running loglikelihood requests:  23%|██▎       | 27242/117208 [01:59<04:18, 348.65it/s]
+Running loglikelihood requests:  23%|██▎       | 27290/117208 [01:59<04:11, 357.69it/s]
+Running loglikelihood requests:  23%|██▎       | 27338/117208 [01:59<04:06, 364.06it/s]
+Running loglikelihood requests:  23%|██▎       | 27383/117208 [02:00<04:08, 361.67it/s]
+Running loglikelihood requests:  23%|██▎       | 27431/117208 [02:00<04:04, 366.80it/s]
+Running loglikelihood requests:  23%|██▎       | 27468/117208 [02:00<04:21, 343.68it/s]
+Running loglikelihood requests:  23%|██▎       | 27503/117208 [02:00<04:36, 324.15it/s]
+Running loglikelihood requests:  23%|██▎       | 27536/117208 [02:00<04:52, 306.66it/s]
+Running loglikelihood requests:  24%|██▎       | 27572/117208 [02:00<04:57, 301.11it/s]
+Running loglikelihood requests:  24%|██▎       | 27617/117208 [02:00<04:42, 317.67it/s]
+Running loglikelihood requests:  24%|██▎       | 27665/117208 [02:00<04:26, 336.15it/s]
+Running loglikelihood requests:  24%|██▎       | 27710/117208 [02:01<04:20, 342.98it/s]
+Running loglikelihood requests:  24%|██▎       | 27758/117208 [02:01<04:12, 354.83it/s]
+Running loglikelihood requests:  24%|██▎       | 27806/117208 [02:01<04:06, 362.98it/s]
+Running loglikelihood requests:  24%|██▍       | 27854/117208 [02:01<04:02, 368.67it/s]
+Running loglikelihood requests:  24%|██▍       | 27902/117208 [02:01<03:59, 372.69it/s]
+Running loglikelihood requests:  24%|██▍       | 27944/117208 [02:01<04:07, 361.39it/s]
+Running loglikelihood requests:  24%|██▍       | 27986/117208 [02:01<04:12, 353.76it/s]
+Running loglikelihood requests:  24%|██▍       | 28022/117208 [02:01<04:27, 333.86it/s]
+Running loglikelihood requests:  24%|██▍       | 28056/117208 [02:02<04:43, 314.21it/s]
+Running loglikelihood requests:  24%|██▍       | 28088/117208 [02:02<04:59, 298.04it/s]
+Running loglikelihood requests:  24%|██▍       | 28118/117208 [02:02<05:16, 281.80it/s]
+Running loglikelihood requests:  24%|██▍       | 28166/117208 [02:02<04:45, 311.74it/s]
+Running loglikelihood requests:  24%|██▍       | 28214/117208 [02:02<04:27, 332.56it/s]
+Running loglikelihood requests:  24%|██▍       | 28263/117208 [02:02<04:13, 350.24it/s]
+Running loglikelihood requests:  24%|██▍       | 28311/117208 [02:02<04:06, 359.95it/s]
+Running loglikelihood requests:  24%|██▍       | 28359/117208 [02:02<04:02, 366.91it/s]
+Running loglikelihood requests:  24%|██▍       | 28407/117208 [02:03<03:58, 371.68it/s]
+Running loglikelihood requests:  24%|██▍       | 28455/117208 [02:03<03:56, 375.32it/s]
+Running loglikelihood requests:  24%|██▍       | 28503/117208 [02:03<03:55, 377.46it/s]
+Running loglikelihood requests:  24%|██▍       | 28545/117208 [02:03<04:02, 365.01it/s]
+Running loglikelihood requests:  24%|██▍       | 28593/117208 [02:03<03:59, 370.37it/s]
+Running loglikelihood requests:  24%|██▍       | 28631/117208 [02:03<04:14, 348.70it/s]
+Running loglikelihood requests:  24%|██▍       | 28666/117208 [02:03<04:28, 329.26it/s]
+Running loglikelihood requests:  24%|██▍       | 28699/117208 [02:03<04:44, 310.84it/s]
+Running loglikelihood requests:  25%|██▍       | 28740/117208 [02:04<04:39, 316.48it/s]
+Running loglikelihood requests:  25%|██▍       | 28788/117208 [02:04<04:22, 336.55it/s]
+Running loglikelihood requests:  25%|██▍       | 28833/117208 [02:04<04:17, 343.62it/s]
+Running loglikelihood requests:  25%|██▍       | 28881/117208 [02:04<04:08, 355.61it/s]
+Running loglikelihood requests:  25%|██▍       | 28929/117208 [02:04<04:02, 363.90it/s]
+Running loglikelihood requests:  25%|██▍       | 28977/117208 [02:04<03:58, 369.84it/s]
+Running loglikelihood requests:  25%|██▍       | 29025/117208 [02:04<03:56, 373.54it/s]
+Running loglikelihood requests:  25%|██▍       | 29064/117208 [02:04<04:08, 355.06it/s]
+Running loglikelihood requests:  25%|██▍       | 29100/117208 [02:05<04:22, 335.28it/s]
+Running loglikelihood requests:  25%|██▍       | 29134/117208 [02:05<04:39, 315.14it/s]
+Running loglikelihood requests:  25%|██▍       | 29166/117208 [02:05<04:54, 298.96it/s]
+Running loglikelihood requests:  25%|██▍       | 29205/117208 [02:05<04:49, 303.54it/s]
+Running loglikelihood requests:  25%|██▍       | 29250/117208 [02:05<04:34, 320.38it/s]
+Running loglikelihood requests:  25%|██▍       | 29298/117208 [02:05<04:18, 339.64it/s]
+Running loglikelihood requests:  25%|██▌       | 29340/117208 [02:05<04:19, 338.97it/s]
+Running loglikelihood requests:  25%|██▌       | 29388/117208 [02:05<04:08, 352.73it/s]
+Running loglikelihood requests:  25%|██▌       | 29436/117208 [02:06<04:02, 362.25it/s]
+Running loglikelihood requests:  25%|██▌       | 29484/117208 [02:06<03:57, 369.03it/s]
+Running loglikelihood requests:  25%|██▌       | 29532/117208 [02:06<03:54, 373.64it/s]
+Running loglikelihood requests:  25%|██▌       | 29580/117208 [02:06<03:52, 376.86it/s]
+Running loglikelihood requests:  25%|██▌       | 29628/117208 [02:06<03:51, 379.12it/s]
+Running loglikelihood requests:  25%|██▌       | 29676/117208 [02:06<03:50, 380.57it/s]
+Running loglikelihood requests:  25%|██▌       | 29715/117208 [02:06<04:03, 359.35it/s]
+Running loglikelihood requests:  25%|██▌       | 29751/117208 [02:06<04:18, 337.70it/s]
+Running loglikelihood requests:  25%|██▌       | 29785/117208 [02:07<04:33, 319.54it/s]
+Running loglikelihood requests:  25%|██▌       | 29817/117208 [02:07<04:49, 302.06it/s]
+Running loglikelihood requests:  25%|██▌       | 29856/117208 [02:07<04:45, 306.24it/s]
+Running loglikelihood requests:  26%|██▌       | 29901/117208 [02:07<04:30, 322.87it/s]
+Running loglikelihood requests:  26%|██▌       | 29946/117208 [02:07<04:16, 340.63it/s]
+Running loglikelihood requests:  26%|██▌       | 29994/117208 [02:07<04:01, 361.00it/s]
+Running loglikelihood requests:  26%|██▌       | 30042/117208 [02:07<03:52, 375.59it/s]
+Running loglikelihood requests:  26%|██▌       | 30090/117208 [02:07<03:45, 385.72it/s]
+Running loglikelihood requests:  26%|██▌       | 30138/117208 [02:08<03:41, 392.74it/s]
+Running loglikelihood requests:  26%|██▌       | 30186/117208 [02:08<03:38, 397.81it/s]
+Running loglikelihood requests:  26%|██▌       | 30234/117208 [02:08<03:36, 400.97it/s]
+Running loglikelihood requests:  26%|██▌       | 30282/117208 [02:08<03:35, 403.36it/s]
+Running loglikelihood requests:  26%|██▌       | 30330/117208 [02:08<03:34, 405.15it/s]
+Running loglikelihood requests:  26%|██▌       | 30371/117208 [02:08<03:43, 387.94it/s]
+Running loglikelihood requests:  26%|██▌       | 30410/117208 [02:08<05:01, 287.94it/s]
+Running loglikelihood requests:  26%|██▌       | 30443/117208 [02:08<05:01, 287.60it/s]
+Running loglikelihood requests:  26%|██▌       | 30486/117208 [02:09<04:40, 308.75it/s]
+Running loglikelihood requests:  26%|██▌       | 30534/117208 [02:09<04:17, 336.37it/s]
+Running loglikelihood requests:  26%|██▌       | 30582/117208 [02:09<04:02, 357.11it/s]
+Running loglikelihood requests:  26%|██▌       | 30627/117208 [02:09<03:56, 365.37it/s]
+Running loglikelihood requests:  26%|██▌       | 30675/117208 [02:09<03:48, 378.84it/s]
+Running loglikelihood requests:  26%|██▌       | 30723/117208 [02:09<03:42, 388.19it/s]
+Running loglikelihood requests:  26%|██▋       | 30775/117208 [02:09<03:33, 404.73it/s]
+Running loglikelihood requests:  26%|██▋       | 30823/117208 [02:09<03:32, 406.65it/s]
+Running loglikelihood requests:  26%|██▋       | 30871/117208 [02:09<03:31, 408.05it/s]
+Running loglikelihood requests:  26%|██▋       | 30919/117208 [02:10<03:31, 408.95it/s]
+Running loglikelihood requests:  26%|██▋       | 30961/117208 [02:10<03:38, 394.63it/s]
+Running loglikelihood requests:  26%|██▋       | 31001/117208 [02:10<04:53, 293.85it/s]
+Running loglikelihood requests:  26%|██▋       | 31039/117208 [02:10<04:44, 302.83it/s]
+Running loglikelihood requests:  27%|██▋       | 31087/117208 [02:10<04:20, 331.00it/s]
+Running loglikelihood requests:  27%|██▋       | 31135/117208 [02:10<04:03, 352.84it/s]
+Running loglikelihood requests:  27%|██▋       | 31183/117208 [02:10<03:52, 369.26it/s]
+Running loglikelihood requests:  27%|██▋       | 31225/117208 [02:11<03:54, 366.89it/s]
+Running loglikelihood requests:  27%|██▋       | 31270/117208 [02:11<03:50, 372.86it/s]
+Running loglikelihood requests:  27%|██▋       | 31318/117208 [02:11<03:43, 384.17it/s]
+Running loglikelihood requests:  27%|██▋       | 31366/117208 [02:11<03:38, 392.28it/s]
+Running loglikelihood requests:  27%|██▋       | 31414/117208 [02:11<03:35, 398.25it/s]
+Running loglikelihood requests:  27%|██▋       | 31466/117208 [02:11<03:28, 412.22it/s]
+Running loglikelihood requests:  27%|██▋       | 31514/117208 [02:11<03:27, 412.10it/s]
+Running loglikelihood requests:  27%|██▋       | 31562/117208 [02:11<03:27, 412.22it/s]
+Running loglikelihood requests:  27%|██▋       | 31604/117208 [02:11<03:35, 397.02it/s]
+Running loglikelihood requests:  27%|██▋       | 31646/117208 [02:12<03:41, 386.38it/s]
+Running loglikelihood requests:  27%|██▋       | 31685/117208 [02:12<04:57, 287.31it/s]
+Running loglikelihood requests:  27%|██▋       | 31730/117208 [02:12<04:33, 312.44it/s]
+Running loglikelihood requests:  27%|██▋       | 31775/117208 [02:12<04:17, 331.73it/s]
+Running loglikelihood requests:  27%|██▋       | 31823/117208 [02:12<04:01, 353.49it/s]
+Running loglikelihood requests:  27%|██▋       | 31871/117208 [02:12<03:50, 370.00it/s]
+Running loglikelihood requests:  27%|██▋       | 31916/117208 [02:12<03:47, 374.58it/s]
+Running loglikelihood requests:  27%|██▋       | 31964/117208 [02:13<03:41, 385.24it/s]
+Running loglikelihood requests:  27%|██▋       | 32009/117208 [02:13<03:40, 385.57it/s]
+Running loglikelihood requests:  27%|██▋       | 32057/117208 [02:13<03:36, 393.06it/s]
+Running loglikelihood requests:  27%|██▋       | 32105/117208 [02:13<03:33, 398.91it/s]
+Running loglikelihood requests:  27%|██▋       | 32153/117208 [02:13<03:30, 403.19it/s]
+Running loglikelihood requests:  27%|██▋       | 32201/117208 [02:13<03:29, 406.31it/s]
+Running loglikelihood requests:  28%|██▊       | 32249/117208 [02:13<03:27, 408.50it/s]
+Running loglikelihood requests:  28%|██▊       | 32297/117208 [02:13<03:27, 409.58it/s]
+Running loglikelihood requests:  28%|██▊       | 32339/117208 [02:13<03:34, 395.44it/s]
+Running loglikelihood requests:  28%|██▊       | 32379/117208 [02:14<03:43, 379.98it/s]
+Running loglikelihood requests:  28%|██▊       | 32418/117208 [02:14<03:52, 364.96it/s]
+Running loglikelihood requests:  28%|██▊       | 32455/117208 [02:14<04:01, 351.36it/s]
+Running loglikelihood requests:  28%|██▊       | 32491/117208 [02:14<04:09, 339.69it/s]
+Running loglikelihood requests:  28%|██▊       | 32525/117208 [02:14<04:18, 327.24it/s]
+Running loglikelihood requests:  28%|██▊       | 32558/117208 [02:14<04:28, 315.58it/s]
+Running loglikelihood requests:  28%|██▊       | 32600/117208 [02:14<04:16, 329.85it/s]
+Running loglikelihood requests:  28%|██▊       | 32648/117208 [02:14<03:58, 354.69it/s]
+Running loglikelihood requests:  28%|██▊       | 32690/117208 [02:14<03:56, 357.87it/s]
+Running loglikelihood requests:  28%|██▊       | 32738/117208 [02:15<03:44, 376.00it/s]
+Running loglikelihood requests:  28%|██▊       | 32786/117208 [02:15<03:37, 388.39it/s]
+Running loglikelihood requests:  28%|██▊       | 32834/117208 [02:15<03:32, 397.06it/s]
+Running loglikelihood requests:  28%|██▊       | 32882/117208 [02:15<03:29, 403.07it/s]
+Running loglikelihood requests:  28%|██▊       | 32930/117208 [02:15<03:26, 407.61it/s]
+Running loglikelihood requests:  28%|██▊       | 32975/117208 [02:15<03:29, 402.53it/s]
+Running loglikelihood requests:  28%|██▊       | 33020/117208 [02:15<03:31, 398.58it/s]
+Running loglikelihood requests:  28%|██▊       | 33060/117208 [02:16<04:43, 297.04it/s]
+Running loglikelihood requests:  28%|██▊       | 33104/117208 [02:16<04:23, 318.67it/s]
+Running loglikelihood requests:  28%|██▊       | 33149/117208 [02:16<04:09, 337.36it/s]
+Running loglikelihood requests:  28%|██▊       | 33194/117208 [02:16<03:58, 351.98it/s]
+Running loglikelihood requests:  28%|██▊       | 33239/117208 [02:16<03:51, 363.22it/s]
+Running loglikelihood requests:  28%|██▊       | 33287/117208 [02:16<03:41, 378.59it/s]
+Running loglikelihood requests:  28%|██▊       | 33326/117208 [02:16<03:48, 367.27it/s]
+Running loglikelihood requests:  28%|██▊       | 33374/117208 [02:16<03:39, 382.13it/s]
+Running loglikelihood requests:  29%|██▊       | 33422/117208 [02:16<03:33, 392.79it/s]
+Running loglikelihood requests:  29%|██▊       | 33470/117208 [02:17<03:29, 400.27it/s]
+Running loglikelihood requests:  29%|██▊       | 33518/117208 [02:17<03:26, 405.20it/s]
+Running loglikelihood requests:  29%|██▊       | 33566/117208 [02:17<03:24, 409.08it/s]
+Running loglikelihood requests:  29%|██▊       | 33614/117208 [02:17<03:22, 412.08it/s]
+Running loglikelihood requests:  29%|██▊       | 33662/117208 [02:17<03:21, 413.88it/s]
+Running loglikelihood requests:  29%|██▉       | 33707/117208 [02:17<03:25, 407.09it/s]
+Running loglikelihood requests:  29%|██▉       | 33752/117208 [02:17<03:27, 402.74it/s]
+Running loglikelihood requests:  29%|██▉       | 33793/117208 [02:17<04:37, 300.25it/s]
+Running loglikelihood requests:  29%|██▉       | 33827/117208 [02:18<04:38, 299.52it/s]
+Running loglikelihood requests:  29%|██▉       | 33860/117208 [02:18<04:40, 297.62it/s]
+Running loglikelihood requests:  29%|██▉       | 33899/117208 [02:18<04:28, 309.75it/s]
+Running loglikelihood requests:  29%|██▉       | 33947/117208 [02:18<04:05, 339.78it/s]
+Running loglikelihood requests:  29%|██▉       | 33995/117208 [02:18<03:50, 361.48it/s]
+Running loglikelihood requests:  29%|██▉       | 34037/117208 [02:18<03:49, 362.83it/s]
+Running loglikelihood requests:  29%|██▉       | 34085/117208 [02:18<03:39, 378.94it/s]
+Running loglikelihood requests:  29%|██▉       | 34133/117208 [02:18<03:33, 389.33it/s]
+Running loglikelihood requests:  29%|██▉       | 34181/117208 [02:18<03:29, 396.07it/s]
+Running loglikelihood requests:  29%|██▉       | 34229/117208 [02:19<03:26, 401.56it/s]
+Running loglikelihood requests:  29%|██▉       | 34277/117208 [02:19<03:24, 406.23it/s]
+Running loglikelihood requests:  29%|██▉       | 34325/117208 [02:19<03:22, 409.44it/s]
+Running loglikelihood requests:  29%|██▉       | 34373/117208 [02:19<03:21, 411.83it/s]
+Running loglikelihood requests:  29%|██▉       | 34415/117208 [02:19<03:27, 398.15it/s]
+Running loglikelihood requests:  29%|██▉       | 34463/117208 [02:19<03:25, 402.81it/s]
+Running loglikelihood requests:  29%|██▉       | 34504/117208 [02:19<04:35, 300.35it/s]
+Running loglikelihood requests:  29%|██▉       | 34538/117208 [02:20<04:35, 300.36it/s]
+Running loglikelihood requests:  30%|██▉       | 34580/117208 [02:20<04:20, 317.76it/s]
+Running loglikelihood requests:  30%|██▉       | 34628/117208 [02:20<03:59, 344.39it/s]
+Running loglikelihood requests:  30%|██▉       | 34673/117208 [02:20<03:51, 356.24it/s]
+Running loglikelihood requests:  30%|██▉       | 34715/117208 [02:20<03:51, 356.83it/s]
+Running loglikelihood requests:  30%|██▉       | 34763/117208 [02:20<03:40, 373.20it/s]
+Running loglikelihood requests:  30%|██▉       | 34811/117208 [02:20<03:33, 385.90it/s]
+Running loglikelihood requests:  30%|██▉       | 34859/117208 [02:20<03:28, 395.42it/s]
+Running loglikelihood requests:  30%|██▉       | 34907/117208 [02:20<03:24, 402.58it/s]
+Running loglikelihood requests:  30%|██▉       | 34955/117208 [02:21<03:21, 407.68it/s]
+Running loglikelihood requests:  30%|██▉       | 35003/117208 [02:21<03:19, 411.07it/s]
+Running loglikelihood requests:  30%|██▉       | 35051/117208 [02:21<03:18, 413.40it/s]
+Running loglikelihood requests:  30%|██▉       | 35099/117208 [02:21<03:17, 415.61it/s]
+Running loglikelihood requests:  30%|██▉       | 35144/117208 [02:21<03:20, 409.17it/s]
+Running loglikelihood requests:  30%|███       | 35185/117208 [02:21<03:28, 393.95it/s]
+Running loglikelihood requests:  30%|███       | 35228/117208 [02:21<03:30, 389.26it/s]
+Running loglikelihood requests:  30%|███       | 35267/117208 [02:21<04:42, 290.53it/s]
+Running loglikelihood requests:  30%|███       | 35315/117208 [02:22<04:13, 322.77it/s]
+Running loglikelihood requests:  30%|███       | 35363/117208 [02:22<03:55, 348.05it/s]
+Running loglikelihood requests:  30%|███       | 35411/117208 [02:22<03:42, 367.36it/s]
+Running loglikelihood requests:  30%|███       | 35453/117208 [02:22<03:42, 367.49it/s]
+Running loglikelihood requests:  30%|███       | 35501/117208 [02:22<03:33, 382.68it/s]
+Running loglikelihood requests:  30%|███       | 35549/117208 [02:22<03:27, 393.43it/s]
+Running loglikelihood requests:  30%|███       | 35597/117208 [02:22<03:23, 401.12it/s]
+Running loglikelihood requests:  30%|███       | 35645/117208 [02:22<03:20, 407.09it/s]
+Running loglikelihood requests:  30%|███       | 35693/117208 [02:23<03:18, 411.25it/s]
+Running loglikelihood requests:  30%|███       | 35741/117208 [02:23<03:16, 413.70it/s]
+Running loglikelihood requests:  31%|███       | 35789/117208 [02:23<03:15, 415.79it/s]
+Running loglikelihood requests:  31%|███       | 35837/117208 [02:23<03:15, 417.11it/s]
+Running loglikelihood requests:  31%|███       | 35885/117208 [02:23<03:14, 417.93it/s]
+Running loglikelihood requests:  31%|███       | 35927/117208 [02:23<03:21, 403.16it/s]
+Running loglikelihood requests:  31%|███       | 35968/117208 [02:23<03:27, 390.76it/s]
+Running loglikelihood requests:  31%|███       | 36011/117208 [02:23<03:29, 386.94it/s]
+Running loglikelihood requests:  31%|███       | 36050/117208 [02:23<03:38, 371.69it/s]
+Running loglikelihood requests:  31%|███       | 36088/117208 [02:24<03:45, 360.31it/s]
+Running loglikelihood requests:  31%|███       | 36125/117208 [02:24<03:51, 350.68it/s]
+Running loglikelihood requests:  31%|███       | 36161/117208 [02:24<03:57, 341.44it/s]
+Running loglikelihood requests:  31%|███       | 36196/117208 [02:24<04:03, 332.64it/s]
+Running loglikelihood requests:  31%|███       | 36242/117208 [02:24<03:48, 354.08it/s]
+Running loglikelihood requests:  31%|███       | 36290/117208 [02:24<03:36, 373.77it/s]
+Running loglikelihood requests:  31%|███       | 36332/117208 [02:24<03:37, 372.42it/s]
+Running loglikelihood requests:  31%|███       | 36377/117208 [02:24<03:32, 379.98it/s]
+Running loglikelihood requests:  31%|███       | 36425/117208 [02:24<03:25, 392.87it/s]
+Running loglikelihood requests:  31%|███       | 36473/117208 [02:25<03:20, 402.02it/s]
+Running loglikelihood requests:  31%|███       | 36525/117208 [02:25<03:12, 418.82it/s]
+Running loglikelihood requests:  31%|███       | 36573/117208 [02:25<03:11, 420.08it/s]
+Running loglikelihood requests:  31%|███       | 36621/117208 [02:25<03:11, 420.88it/s]
+Running loglikelihood requests:  31%|███▏      | 36669/117208 [02:25<03:10, 421.81it/s]
+Running loglikelihood requests:  31%|███▏      | 36717/117208 [02:25<03:10, 422.16it/s]
+Running loglikelihood requests:  31%|███▏      | 36760/117208 [02:25<03:16, 409.32it/s]
+Running loglikelihood requests:  31%|███▏      | 36801/117208 [02:25<04:23, 305.34it/s]
+Running loglikelihood requests:  31%|███▏      | 36836/117208 [02:26<04:22, 306.23it/s]
+Running loglikelihood requests:  31%|███▏      | 36870/117208 [02:26<04:22, 305.85it/s]
+Running loglikelihood requests:  31%|███▏      | 36906/117208 [02:26<04:18, 310.08it/s]
+Running loglikelihood requests:  32%|███▏      | 36954/117208 [02:26<03:54, 342.07it/s]
+Running loglikelihood requests:  32%|███▏      | 36996/117208 [02:26<03:48, 350.34it/s]
+Running loglikelihood requests:  32%|███▏      | 37038/117208 [02:26<03:44, 356.38it/s]
+Running loglikelihood requests:  32%|███▏      | 37086/117208 [02:26<03:32, 376.17it/s]
+Running loglikelihood requests:  32%|███▏      | 37134/117208 [02:26<03:25, 390.02it/s]
+Running loglikelihood requests:  32%|███▏      | 37182/117208 [02:26<03:20, 399.86it/s]
+Running loglikelihood requests:  32%|███▏      | 37230/117208 [02:27<03:16, 407.22it/s]
+Running loglikelihood requests:  32%|███▏      | 37282/117208 [02:27<03:09, 422.07it/s]
+Running loglikelihood requests:  32%|███▏      | 37330/117208 [02:27<03:09, 422.19it/s]
+Running loglikelihood requests:  32%|███▏      | 37378/117208 [02:27<03:08, 422.79it/s]
+Running loglikelihood requests:  32%|███▏      | 37426/117208 [02:27<03:08, 422.61it/s]
+Running loglikelihood requests:  32%|███▏      | 37474/117208 [02:27<03:08, 422.70it/s]
+Running loglikelihood requests:  32%|███▏      | 37517/117208 [02:27<04:11, 317.02it/s]
+Running loglikelihood requests:  32%|███▏      | 37553/117208 [02:27<04:10, 318.61it/s]
+Running loglikelihood requests:  32%|███▏      | 37594/117208 [02:28<04:00, 330.56it/s]
+Running loglikelihood requests:  32%|███▏      | 37642/117208 [02:28<03:43, 355.77it/s]
+Running loglikelihood requests:  32%|███▏      | 37684/117208 [02:28<03:40, 360.53it/s]
+Running loglikelihood requests:  32%|███▏      | 37732/117208 [02:28<03:29, 378.65it/s]
+Running loglikelihood requests:  32%|███▏      | 37780/117208 [02:28<03:22, 391.42it/s]
+Running loglikelihood requests:  32%|███▏      | 37822/117208 [02:28<03:25, 385.59it/s]
+Running loglikelihood requests:  32%|███▏      | 37870/117208 [02:28<03:19, 397.37it/s]
+Running loglikelihood requests:  32%|███▏      | 37918/117208 [02:28<03:15, 405.66it/s]
+Running loglikelihood requests:  32%|███▏      | 37966/117208 [02:29<03:12, 411.70it/s]
+Running loglikelihood requests:  32%|███▏      | 38014/117208 [02:29<03:10, 415.46it/s]
+Running loglikelihood requests:  32%|███▏      | 38062/117208 [02:29<03:09, 418.15it/s]
+Running loglikelihood requests:  33%|███▎      | 38107/117208 [02:29<03:11, 412.76it/s]
+Running loglikelihood requests:  33%|███▎      | 38149/117208 [02:29<03:17, 399.84it/s]
+Running loglikelihood requests:  33%|███▎      | 38190/117208 [02:29<03:23, 388.18it/s]
+Running loglikelihood requests:  33%|███▎      | 38229/117208 [02:29<03:29, 376.47it/s]
+Running loglikelihood requests:  33%|███▎      | 38267/117208 [02:29<03:36, 365.36it/s]
+Running loglikelihood requests:  33%|███▎      | 38304/117208 [02:29<03:41, 355.60it/s]
+Running loglikelihood requests:  33%|███▎      | 38340/117208 [02:30<03:48, 345.82it/s]
+Running loglikelihood requests:  33%|███▎      | 38383/117208 [02:30<03:40, 356.89it/s]
+Running loglikelihood requests:  33%|███▎      | 38425/117208 [02:30<03:37, 361.87it/s]
+Running loglikelihood requests:  33%|███▎      | 38474/117208 [02:30<03:25, 383.03it/s]
+Running loglikelihood requests:  33%|███▎      | 38522/117208 [02:30<03:18, 396.01it/s]
+Running loglikelihood requests:  33%|███▎      | 38570/117208 [02:30<03:14, 405.21it/s]
+Running loglikelihood requests:  33%|███▎      | 38618/117208 [02:30<03:11, 411.30it/s]
+Running loglikelihood requests:  33%|███▎      | 38666/117208 [02:30<03:08, 416.03it/s]
+Running loglikelihood requests:  33%|███▎      | 38714/117208 [02:30<03:07, 418.86it/s]
+Running loglikelihood requests:  33%|███▎      | 38762/117208 [02:31<03:06, 420.71it/s]
+Running loglikelihood requests:  33%|███▎      | 38807/117208 [02:31<03:08, 414.84it/s]
+Running loglikelihood requests:  33%|███▎      | 38849/117208 [02:31<03:14, 402.73it/s]
+Running loglikelihood requests:  33%|███▎      | 38890/117208 [02:31<04:18, 303.20it/s]
+Running loglikelihood requests:  33%|███▎      | 38927/117208 [02:31<04:12, 309.97it/s]
+Running loglikelihood requests:  33%|███▎      | 38972/117208 [02:31<03:54, 333.72it/s]
+Running loglikelihood requests:  33%|███▎      | 39020/117208 [02:31<03:38, 358.63it/s]
+Running loglikelihood requests:  33%|███▎      | 39065/117208 [02:31<03:30, 370.44it/s]
+Running loglikelihood requests:  33%|███▎      | 39113/117208 [02:32<03:22, 386.30it/s]
+Running loglikelihood requests:  33%|███▎      | 39158/117208 [02:32<03:20, 389.92it/s]
+Running loglikelihood requests:  33%|███▎      | 39200/117208 [02:32<03:22, 385.45it/s]
+Running loglikelihood requests:  33%|███▎      | 39248/117208 [02:32<03:16, 397.45it/s]
+Running loglikelihood requests:  34%|███▎      | 39296/117208 [02:32<03:11, 405.83it/s]
+Running loglikelihood requests:  34%|███▎      | 39344/117208 [02:32<03:08, 412.39it/s]
+Running loglikelihood requests:  34%|███▎      | 39396/117208 [02:32<03:02, 426.50it/s]
+Running loglikelihood requests:  34%|███▎      | 39444/117208 [02:32<03:02, 426.42it/s]
+Running loglikelihood requests:  34%|███▎      | 39492/117208 [02:32<03:02, 426.57it/s]
+Running loglikelihood requests:  34%|███▎      | 39535/117208 [02:33<03:07, 413.36it/s]
+Running loglikelihood requests:  34%|███▍      | 39577/117208 [02:33<04:09, 310.89it/s]
+Running loglikelihood requests:  34%|███▍      | 39612/117208 [02:33<04:08, 311.74it/s]
+Running loglikelihood requests:  34%|███▍      | 39646/117208 [02:33<04:09, 310.84it/s]
+Running loglikelihood requests:  34%|███▍      | 39693/117208 [02:33<03:48, 339.97it/s]
+Running loglikelihood requests:  34%|███▍      | 39738/117208 [02:33<03:36, 357.14it/s]
+Running loglikelihood requests:  34%|███▍      | 39783/117208 [02:33<03:29, 369.25it/s]
+Running loglikelihood requests:  34%|███▍      | 39828/117208 [02:33<03:24, 378.68it/s]
+Running loglikelihood requests:  34%|███▍      | 39876/117208 [02:34<03:17, 392.48it/s]
+Running loglikelihood requests:  34%|███▍      | 39924/117208 [02:34<03:12, 402.13it/s]
+Running loglikelihood requests:  34%|███▍      | 39969/117208 [02:34<03:12, 402.25it/s]
+Running loglikelihood requests:  34%|███▍      | 40017/117208 [02:34<03:08, 409.81it/s]
+Running loglikelihood requests:  34%|███▍      | 40065/117208 [02:34<03:05, 415.11it/s]
+Running loglikelihood requests:  34%|███▍      | 40113/117208 [02:34<03:04, 418.98it/s]
+Running loglikelihood requests:  34%|███▍      | 40161/117208 [02:34<03:02, 421.12it/s]
+Running loglikelihood requests:  34%|███▍      | 40209/117208 [02:34<03:01, 423.57it/s]
+Running loglikelihood requests:  34%|███▍      | 40261/117208 [02:34<02:56, 435.04it/s]
+Running loglikelihood requests:  34%|███▍      | 40306/117208 [02:35<03:00, 424.91it/s]
+Running loglikelihood requests:  34%|███▍      | 40349/117208 [02:35<04:00, 319.55it/s]
+Running loglikelihood requests:  34%|███▍      | 40385/117208 [02:35<03:59, 320.39it/s]
+Running loglikelihood requests:  34%|███▍      | 40423/117208 [02:35<03:55, 325.99it/s]
+Running loglikelihood requests:  35%|███▍      | 40468/117208 [02:35<03:41, 346.52it/s]
+Running loglikelihood requests:  35%|███▍      | 40513/117208 [02:35<03:31, 361.90it/s]
+Running loglikelihood requests:  35%|███▍      | 40561/117208 [02:35<03:21, 380.41it/s]
+Running loglikelihood requests:  35%|███▍      | 40603/117208 [02:35<03:22, 378.97it/s]
+Running loglikelihood requests:  35%|███▍      | 40651/117208 [02:36<03:12, 398.48it/s]
+Running loglikelihood requests:  35%|███▍      | 40696/117208 [02:36<03:08, 405.07it/s]
+Running loglikelihood requests:  35%|███▍      | 40744/117208 [02:36<03:02, 418.26it/s]
+Running loglikelihood requests:  35%|███▍      | 40792/117208 [02:36<02:59, 426.54it/s]
+Running loglikelihood requests:  35%|███▍      | 40840/117208 [02:36<02:56, 433.34it/s]
+Running loglikelihood requests:  35%|███▍      | 40888/117208 [02:36<02:54, 437.55it/s]
+Running loglikelihood requests:  35%|███▍      | 40936/117208 [02:36<02:53, 440.31it/s]
+Running loglikelihood requests:  35%|███▍      | 40984/117208 [02:36<02:52, 443.05it/s]
+Running loglikelihood requests:  35%|███▌      | 41032/117208 [02:36<02:51, 443.74it/s]
+Running loglikelihood requests:  35%|███▌      | 41077/117208 [02:37<02:54, 436.52it/s]
+Running loglikelihood requests:  35%|███▌      | 41121/117208 [02:37<03:49, 331.17it/s]
+Running loglikelihood requests:  35%|███▌      | 41158/117208 [02:37<03:47, 334.82it/s]
+Running loglikelihood requests:  35%|███▌      | 41195/117208 [02:37<03:44, 338.87it/s]
+Running loglikelihood requests:  35%|███▌      | 41231/117208 [02:37<03:43, 339.66it/s]
+Running loglikelihood requests:  35%|███▌      | 41267/117208 [02:37<03:43, 339.51it/s]
+Running loglikelihood requests:  35%|███▌      | 41314/117208 [02:37<03:25, 368.61it/s]
+Running loglikelihood requests:  35%|███▌      | 41359/117208 [02:37<03:17, 383.94it/s]
+Running loglikelihood requests:  35%|███▌      | 41404/117208 [02:37<03:12, 394.48it/s]
+Running loglikelihood requests:  35%|███▌      | 41452/117208 [02:38<03:04, 411.21it/s]
+Running loglikelihood requests:  35%|███▌      | 41500/117208 [02:38<02:59, 422.75it/s]
+Running loglikelihood requests:  35%|███▌      | 41548/117208 [02:38<02:55, 430.67it/s]
+Running loglikelihood requests:  35%|███▌      | 41596/117208 [02:38<02:53, 437.04it/s]
+Running loglikelihood requests:  36%|███▌      | 41648/117208 [02:38<02:47, 451.74it/s]
+Running loglikelihood requests:  36%|███▌      | 41694/117208 [02:38<02:49, 444.98it/s]
+Running loglikelihood requests:  36%|███▌      | 41741/117208 [02:38<02:49, 444.15it/s]
+Running loglikelihood requests:  36%|███▌      | 41789/117208 [02:38<02:49, 445.73it/s]
+Running loglikelihood requests:  36%|███▌      | 41834/117208 [02:38<02:52, 435.82it/s]
+Running loglikelihood requests:  36%|███▌      | 41878/117208 [02:39<02:56, 427.55it/s]
+Running loglikelihood requests:  36%|███▌      | 41921/117208 [02:39<02:58, 421.24it/s]
+Running loglikelihood requests:  36%|███▌      | 41964/117208 [02:39<03:00, 415.80it/s]
+Running loglikelihood requests:  36%|███▌      | 42006/117208 [02:39<03:03, 410.42it/s]
+Running loglikelihood requests:  36%|███▌      | 42048/117208 [02:39<03:04, 406.79it/s]
+Running loglikelihood requests:  36%|███▌      | 42089/117208 [02:39<03:07, 400.84it/s]
+Running loglikelihood requests:  36%|███▌      | 42130/117208 [02:39<03:09, 397.07it/s]
+Running loglikelihood requests:  36%|███▌      | 42170/117208 [02:39<03:11, 392.11it/s]
+Running loglikelihood requests:  36%|███▌      | 42212/117208 [02:39<03:10, 393.65it/s]
+Running loglikelihood requests:  36%|███▌      | 42260/117208 [02:40<03:02, 411.49it/s]
+Running loglikelihood requests:  36%|███▌      | 42308/117208 [02:40<02:56, 424.16it/s]
+Running loglikelihood requests:  36%|███▌      | 42356/117208 [02:40<02:53, 432.66it/s]
+Running loglikelihood requests:  36%|███▌      | 42404/117208 [02:40<02:50, 438.58it/s]
+Running loglikelihood requests:  36%|███▌      | 42449/117208 [02:40<02:52, 434.57it/s]
+Running loglikelihood requests:  36%|███▋      | 42497/117208 [02:40<02:49, 440.03it/s]
+Running loglikelihood requests:  36%|███▋      | 42545/117208 [02:40<02:48, 443.46it/s]
+Running loglikelihood requests:  36%|███▋      | 42590/117208 [02:40<02:50, 437.87it/s]
+Running loglikelihood requests:  36%|███▋      | 42634/117208 [02:40<03:44, 332.60it/s]
+Running loglikelihood requests:  36%|███▋      | 42671/117208 [02:41<03:41, 337.03it/s]
+Running loglikelihood requests:  36%|███▋      | 42708/117208 [02:41<03:38, 341.39it/s]
+Running loglikelihood requests:  36%|███▋      | 42746/117208 [02:41<03:34, 347.20it/s]
+Running loglikelihood requests:  37%|███▋      | 42794/117208 [02:41<03:17, 376.17it/s]
+Running loglikelihood requests:  37%|███▋      | 42842/117208 [02:41<03:06, 397.90it/s]
+Running loglikelihood requests:  37%|███▋      | 42883/117208 [02:41<03:08, 395.09it/s]
+Running loglikelihood requests:  37%|███▋      | 42929/117208 [02:41<03:02, 406.35it/s]
+Running loglikelihood requests:  37%|███▋      | 42977/117208 [02:41<02:56, 419.92it/s]
+Running loglikelihood requests:  37%|███▋      | 43025/117208 [02:41<02:52, 430.18it/s]
+Running loglikelihood requests:  37%|███▋      | 43073/117208 [02:42<02:49, 436.76it/s]
+Running loglikelihood requests:  37%|███▋      | 43121/117208 [02:42<02:48, 440.83it/s]
+Running loglikelihood requests:  37%|███▋      | 43169/117208 [02:42<02:46, 444.52it/s]
+Running loglikelihood requests:  37%|███▋      | 43217/117208 [02:42<02:45, 447.27it/s]
+Running loglikelihood requests:  37%|███▋      | 43265/117208 [02:42<02:44, 448.48it/s]
+Running loglikelihood requests:  37%|███▋      | 43313/117208 [02:42<02:44, 450.01it/s]
+Running loglikelihood requests:  37%|███▋      | 43361/117208 [02:42<02:43, 451.23it/s]
+Running loglikelihood requests:  37%|███▋      | 43407/117208 [02:42<04:21, 281.90it/s]
+Running loglikelihood requests:  37%|███▋      | 43444/117208 [02:43<04:08, 296.60it/s]
+Running loglikelihood requests:  37%|███▋      | 43487/117208 [02:43<03:48, 322.44it/s]
+Running loglikelihood requests:  37%|███▋      | 43535/117208 [02:43<03:27, 354.64it/s]
+Running loglikelihood requests:  37%|███▋      | 43583/117208 [02:43<03:13, 380.40it/s]
+Running loglikelihood requests:  37%|███▋      | 43631/117208 [02:43<03:03, 400.41it/s]
+Running loglikelihood requests:  37%|███▋      | 43676/117208 [02:43<03:00, 406.65it/s]
+Running loglikelihood requests:  37%|███▋      | 43719/117208 [02:43<03:01, 405.65it/s]
+Running loglikelihood requests:  37%|███▋      | 43763/117208 [02:43<02:59, 408.65it/s]
+Running loglikelihood requests:  37%|███▋      | 43808/117208 [02:43<02:57, 413.27it/s]
+Running loglikelihood requests:  37%|███▋      | 43856/117208 [02:44<02:52, 425.01it/s]
+Running loglikelihood requests:  37%|███▋      | 43904/117208 [02:44<02:48, 433.81it/s]
+Running loglikelihood requests:  37%|███▋      | 43952/117208 [02:44<02:46, 439.77it/s]
+Running loglikelihood requests:  38%|███▊      | 44000/117208 [02:44<02:45, 443.53it/s]
+Running loglikelihood requests:  38%|███▊      | 44048/117208 [02:44<02:43, 446.56it/s]
+Running loglikelihood requests:  38%|███▊      | 44096/117208 [02:44<02:43, 448.52it/s]
+Running loglikelihood requests:  38%|███▊      | 44144/117208 [02:44<02:42, 449.27it/s]
+Running loglikelihood requests:  38%|███▊      | 44189/117208 [02:44<02:45, 441.89it/s]
+Running loglikelihood requests:  38%|███▊      | 44234/117208 [02:44<02:46, 437.78it/s]
+Running loglikelihood requests:  38%|███▊      | 44278/117208 [02:45<03:38, 333.68it/s]
+Running loglikelihood requests:  38%|███▊      | 44315/117208 [02:45<03:36, 337.19it/s]
+Running loglikelihood requests:  38%|███▊      | 44352/117208 [02:45<03:33, 341.85it/s]
+Running loglikelihood requests:  38%|███▊      | 44389/117208 [02:45<03:30, 345.36it/s]
+Running loglikelihood requests:  38%|███▊      | 44429/117208 [02:45<03:24, 355.07it/s]
+Running loglikelihood requests:  38%|███▊      | 44477/117208 [02:45<03:09, 383.20it/s]
+Running loglikelihood requests:  38%|███▊      | 44525/117208 [02:45<03:00, 403.69it/s]
+Running loglikelihood requests:  38%|███▊      | 44567/117208 [02:45<03:00, 402.01it/s]
+Running loglikelihood requests:  38%|███▊      | 44612/117208 [02:45<02:57, 408.68it/s]
+Running loglikelihood requests:  38%|███▊      | 44657/117208 [02:46<02:55, 414.57it/s]
+Running loglikelihood requests:  38%|███▊      | 44705/117208 [02:46<02:49, 426.83it/s]
+Running loglikelihood requests:  38%|███▊      | 44753/117208 [02:46<02:46, 435.13it/s]
+Running loglikelihood requests:  38%|███▊      | 44801/117208 [02:46<02:43, 441.52it/s]
+Running loglikelihood requests:  38%|███▊      | 44849/117208 [02:46<02:42, 445.89it/s]
+Running loglikelihood requests:  38%|███▊      | 44897/117208 [02:46<02:41, 448.57it/s]
+Running loglikelihood requests:  38%|███▊      | 44945/117208 [02:46<02:40, 450.30it/s]
+Running loglikelihood requests:  38%|███▊      | 44991/117208 [02:46<02:42, 445.64it/s]
+Running loglikelihood requests:  38%|███▊      | 45036/117208 [02:46<02:43, 440.40it/s]
+Running loglikelihood requests:  38%|███▊      | 45081/117208 [02:47<03:32, 338.73it/s]
+Running loglikelihood requests:  38%|███▊      | 45119/117208 [02:47<03:29, 343.68it/s]
+Running loglikelihood requests:  39%|███▊      | 45156/117208 [02:47<03:28, 346.09it/s]
+Running loglikelihood requests:  39%|███▊      | 45200/117208 [02:47<03:16, 365.72it/s]
+Running loglikelihood requests:  39%|███▊      | 45248/117208 [02:47<03:04, 390.44it/s]
+Running loglikelihood requests:  39%|███▊      | 45296/117208 [02:47<02:55, 409.32it/s]
+Running loglikelihood requests:  39%|███▊      | 45338/117208 [02:47<02:56, 406.49it/s]
+Running loglikelihood requests:  39%|███▊      | 45380/117208 [02:47<02:57, 403.93it/s]
+Running loglikelihood requests:  39%|███▉      | 45428/117208 [02:47<02:51, 419.03it/s]
+Running loglikelihood requests:  39%|███▉      | 45476/117208 [02:48<02:46, 430.44it/s]
+Running loglikelihood requests:  39%|███▉      | 45524/117208 [02:48<02:43, 438.35it/s]
+Running loglikelihood requests:  39%|███▉      | 45576/117208 [02:48<02:37, 454.28it/s]
+Running loglikelihood requests:  39%|███▉      | 45624/117208 [02:48<02:37, 454.65it/s]
+Running loglikelihood requests:  39%|███▉      | 45672/117208 [02:48<02:37, 455.56it/s]
+Running loglikelihood requests:  39%|███▉      | 45720/117208 [02:48<02:36, 455.97it/s]
+Running loglikelihood requests:  39%|███▉      | 45768/117208 [02:48<02:36, 455.30it/s]
+Running loglikelihood requests:  39%|███▉      | 45814/117208 [02:48<02:38, 449.74it/s]
+Running loglikelihood requests:  39%|███▉      | 45859/117208 [02:48<02:40, 443.30it/s]
+Running loglikelihood requests:  39%|███▉      | 45904/117208 [02:48<02:42, 438.34it/s]
+Running loglikelihood requests:  39%|███▉      | 45948/117208 [02:49<03:33, 334.30it/s]
+Running loglikelihood requests:  39%|███▉      | 45986/117208 [02:49<03:28, 342.04it/s]
+Running loglikelihood requests:  39%|███▉      | 46023/117208 [02:49<03:25, 345.87it/s]
+Running loglikelihood requests:  39%|███▉      | 46068/117208 [02:49<03:13, 368.30it/s]
+Running loglikelihood requests:  39%|███▉      | 46113/117208 [02:49<03:04, 384.77it/s]
+Running loglikelihood requests:  39%|███▉      | 46161/117208 [02:49<02:55, 405.08it/s]
+Running loglikelihood requests:  39%|███▉      | 46206/117208 [02:49<02:52, 411.73it/s]
+Running loglikelihood requests:  39%|███▉      | 46248/117208 [02:49<02:53, 408.19it/s]
+Running loglikelihood requests:  39%|███▉      | 46290/117208 [02:50<02:55, 405.21it/s]
+Running loglikelihood requests:  40%|███▉      | 46338/117208 [02:50<02:48, 420.13it/s]
+Running loglikelihood requests:  40%|███▉      | 46386/117208 [02:50<02:44, 431.07it/s]
+Running loglikelihood requests:  40%|███▉      | 46434/117208 [02:50<02:41, 438.37it/s]
+Running loglikelihood requests:  40%|███▉      | 46482/117208 [02:50<02:39, 443.85it/s]
+Running loglikelihood requests:  40%|███▉      | 46530/117208 [02:50<02:37, 447.58it/s]
+Running loglikelihood requests:  40%|███▉      | 46578/117208 [02:50<02:36, 450.17it/s]
+Running loglikelihood requests:  40%|███▉      | 46624/117208 [02:50<02:38, 445.73it/s]
+Running loglikelihood requests:  40%|███▉      | 46671/117208 [02:50<02:38, 446.08it/s]
+Running loglikelihood requests:  40%|███▉      | 46719/117208 [02:50<02:36, 449.28it/s]
+Running loglikelihood requests:  40%|███▉      | 46764/117208 [02:51<03:25, 342.82it/s]
+Running loglikelihood requests:  40%|███▉      | 46802/117208 [02:51<03:22, 347.79it/s]
+Running loglikelihood requests:  40%|███▉      | 46840/117208 [02:51<03:19, 352.34it/s]
+Running loglikelihood requests:  40%|███▉      | 46878/117208 [02:51<03:17, 355.91it/s]
+Running loglikelihood requests:  40%|████      | 46926/117208 [02:51<03:03, 383.98it/s]
+Running loglikelihood requests:  40%|████      | 46971/117208 [02:51<02:57, 396.65it/s]
+Running loglikelihood requests:  40%|████      | 47016/117208 [02:51<02:52, 406.00it/s]
+Running loglikelihood requests:  40%|████      | 47064/117208 [02:51<02:46, 420.44it/s]
+Running loglikelihood requests:  40%|████      | 47112/117208 [02:52<02:42, 430.61it/s]
+Running loglikelihood requests:  40%|████      | 47156/117208 [02:52<02:44, 426.29it/s]
+Running loglikelihood requests:  40%|████      | 47202/117208 [02:52<02:42, 430.31it/s]
+Running loglikelihood requests:  40%|████      | 47250/117208 [02:52<02:39, 438.84it/s]
+Running loglikelihood requests:  40%|████      | 47298/117208 [02:52<02:37, 444.26it/s]
+Running loglikelihood requests:  40%|████      | 47346/117208 [02:52<02:35, 447.88it/s]
+Running loglikelihood requests:  40%|████      | 47394/117208 [02:52<02:34, 450.87it/s]
+Running loglikelihood requests:  40%|████      | 47442/117208 [02:52<02:34, 453.02it/s]
+Running loglikelihood requests:  41%|████      | 47490/117208 [02:52<02:33, 454.02it/s]
+Running loglikelihood requests:  41%|████      | 47538/117208 [02:52<02:33, 454.61it/s]
+Running loglikelihood requests:  41%|████      | 47586/117208 [02:53<02:32, 455.74it/s]
+Running loglikelihood requests:  41%|████      | 47632/117208 [02:53<03:19, 348.29it/s]
+Running loglikelihood requests:  41%|████      | 47671/117208 [02:53<03:16, 354.53it/s]
+Running loglikelihood requests:  41%|████      | 47710/117208 [02:53<03:13, 359.61it/s]
+Running loglikelihood requests:  41%|████      | 47748/117208 [02:53<03:12, 361.28it/s]
+Running loglikelihood requests:  41%|████      | 47786/117208 [02:53<03:11, 363.06it/s]
+Running loglikelihood requests:  41%|████      | 47832/117208 [02:53<03:00, 384.96it/s]
+Running loglikelihood requests:  41%|████      | 47880/117208 [02:53<02:50, 405.82it/s]
+Running loglikelihood requests:  41%|████      | 47928/117208 [02:54<02:44, 421.12it/s]
+Running loglikelihood requests:  41%|████      | 47971/117208 [02:54<02:45, 418.03it/s]
+Running loglikelihood requests:  41%|████      | 48018/117208 [02:54<02:41, 427.46it/s]
+Running loglikelihood requests:  41%|████      | 48066/117208 [02:54<02:38, 436.53it/s]
+Running loglikelihood requests:  41%|████      | 48114/117208 [02:54<02:36, 442.76it/s]
+Running loglikelihood requests:  41%|████      | 48162/117208 [02:54<02:34, 447.57it/s]
+Running loglikelihood requests:  41%|████      | 48210/117208 [02:54<02:32, 451.30it/s]
+Running loglikelihood requests:  41%|████      | 48258/117208 [02:54<02:32, 453.11it/s]
+Running loglikelihood requests:  41%|████      | 48306/117208 [02:54<02:31, 454.12it/s]
+Running loglikelihood requests:  41%|████▏     | 48354/117208 [02:54<02:31, 455.66it/s]
+Running loglikelihood requests:  41%|████▏     | 48402/117208 [02:55<02:30, 456.79it/s]
+Running loglikelihood requests:  41%|████▏     | 48450/117208 [02:55<02:30, 456.94it/s]
+Running loglikelihood requests:  41%|████▏     | 48496/117208 [02:55<04:00, 286.00it/s]
+Running loglikelihood requests:  41%|████▏     | 48540/117208 [02:55<03:38, 314.93it/s]
+Running loglikelihood requests:  41%|████▏     | 48588/117208 [02:55<03:16, 348.54it/s]
+Running loglikelihood requests:  41%|████▏     | 48636/117208 [02:55<03:02, 375.81it/s]
+Running loglikelihood requests:  42%|████▏     | 48684/117208 [02:55<02:52, 397.20it/s]
+Running loglikelihood requests:  42%|████▏     | 48732/117208 [02:55<02:45, 414.09it/s]
+Running loglikelihood requests:  42%|████▏     | 48780/117208 [02:56<02:40, 426.89it/s]
+Running loglikelihood requests:  42%|████▏     | 48839/117208 [02:56<02:24, 471.97it/s]
+Running loglikelihood requests:  42%|████▏     | 48911/117208 [02:56<02:05, 542.21it/s]
+Running loglikelihood requests:  42%|████▏     | 48972/117208 [02:56<02:32, 447.49it/s]
+Running loglikelihood requests:  42%|████▏     | 49041/117208 [02:56<02:14, 507.33it/s]
+Running loglikelihood requests:  42%|████▏     | 49115/117208 [02:56<01:59, 567.97it/s]
+Running loglikelihood requests:  42%|████▏     | 49176/117208 [02:56<02:25, 467.85it/s]
+Running loglikelihood requests:  42%|████▏     | 49248/117208 [02:56<02:08, 527.86it/s]
+Running loglikelihood requests:  42%|████▏     | 49307/117208 [02:57<02:33, 443.28it/s]
+Running loglikelihood requests:  42%|████▏     | 49358/117208 [02:57<02:58, 379.56it/s]
+Running loglikelihood requests:  42%|████▏     | 49414/117208 [02:57<03:12, 351.92it/s]
+Running loglikelihood requests:  42%|████▏     | 49486/117208 [02:57<02:38, 426.79it/s]
+Running loglikelihood requests:  42%|████▏     | 49557/117208 [02:57<02:17, 490.55it/s]
+Running loglikelihood requests:  42%|████▏     | 49613/117208 [02:57<02:41, 418.59it/s]
+Running loglikelihood requests:  42%|████▏     | 49683/117208 [02:58<02:20, 481.25it/s]
+Running loglikelihood requests:  42%|████▏     | 49738/117208 [02:58<02:18, 485.89it/s]
+Running loglikelihood requests:  42%|████▏     | 49795/117208 [02:58<02:41, 416.37it/s]
+Running loglikelihood requests:  43%|████▎     | 49863/117208 [02:58<02:21, 476.49it/s]
+Running loglikelihood requests:  43%|████▎     | 49932/117208 [02:58<02:07, 528.91it/s]
+Running loglikelihood requests:  43%|████▎     | 49990/117208 [02:58<02:32, 441.63it/s]
+Running loglikelihood requests:  43%|████▎     | 50059/117208 [02:58<02:14, 499.25it/s]
+Running loglikelihood requests:  43%|████▎     | 50125/117208 [02:59<02:32, 441.16it/s]
+Running loglikelihood requests:  43%|████▎     | 50175/117208 [02:59<03:28, 321.66it/s]
+Running loglikelihood requests:  43%|████▎     | 50242/117208 [02:59<02:53, 386.26it/s]
+Running loglikelihood requests:  43%|████▎     | 50311/117208 [02:59<02:56, 378.74it/s]
+Running loglikelihood requests:  43%|████▎     | 50382/117208 [02:59<02:30, 445.36it/s]
+Running loglikelihood requests:  43%|████▎     | 50453/117208 [02:59<02:12, 504.30it/s]
+Running loglikelihood requests:  43%|████▎     | 50511/117208 [02:59<02:34, 430.89it/s]
+Running loglikelihood requests:  43%|████▎     | 50579/117208 [03:00<02:17, 486.10it/s]
+Running loglikelihood requests:  43%|████▎     | 50638/117208 [03:00<02:37, 422.59it/s]
+Running loglikelihood requests:  43%|████▎     | 50711/117208 [03:00<02:15, 490.54it/s]
+Running loglikelihood requests:  43%|████▎     | 50782/117208 [03:00<02:28, 447.36it/s]
+Running loglikelihood requests:  43%|████▎     | 50854/117208 [03:00<02:10, 507.39it/s]
+Running loglikelihood requests:  43%|████▎     | 50926/117208 [03:00<02:23, 460.32it/s]
+Running loglikelihood requests:  44%|████▎     | 50998/117208 [03:00<02:07, 517.29it/s]
+Running loglikelihood requests:  44%|████▎     | 51070/117208 [03:01<02:21, 466.14it/s]
+Running loglikelihood requests:  44%|████▎     | 51124/117208 [03:01<02:43, 403.42it/s]
+Running loglikelihood requests:  44%|████▎     | 51172/117208 [03:01<03:06, 353.61it/s]
+Running loglikelihood requests:  44%|████▎     | 51244/117208 [03:01<02:34, 426.97it/s]
+Running loglikelihood requests:  44%|████▍     | 51316/117208 [03:01<02:40, 410.67it/s]
+Running loglikelihood requests:  44%|████▍     | 51387/117208 [03:01<02:19, 473.39it/s]
+Running loglikelihood requests:  44%|████▍     | 51457/117208 [03:02<02:30, 436.33it/s]
+Running loglikelihood requests:  44%|████▍     | 51528/117208 [03:02<02:12, 495.04it/s]
+Running loglikelihood requests:  44%|████▍     | 51595/117208 [03:02<02:27, 444.20it/s]
+Running loglikelihood requests:  44%|████▍     | 51669/117208 [03:02<02:08, 508.39it/s]
+Running loglikelihood requests:  44%|████▍     | 51739/117208 [03:02<02:23, 457.79it/s]
+Running loglikelihood requests:  44%|████▍     | 51813/117208 [03:02<02:05, 519.28it/s]
+Running loglikelihood requests:  44%|████▍     | 51883/117208 [03:02<02:20, 464.19it/s]
+Running loglikelihood requests:  44%|████▍     | 51956/117208 [03:03<02:04, 522.37it/s]
+Running loglikelihood requests:  44%|████▍     | 52027/117208 [03:03<02:19, 468.55it/s]
+Running loglikelihood requests:  44%|████▍     | 52079/117208 [03:03<02:42, 399.75it/s]
+Running loglikelihood requests:  44%|████▍     | 52141/117208 [03:03<02:52, 376.59it/s]
+Running loglikelihood requests:  45%|████▍     | 52212/117208 [03:03<02:26, 443.29it/s]
+Running loglikelihood requests:  45%|████▍     | 52279/117208 [03:03<02:37, 412.81it/s]
+Running loglikelihood requests:  45%|████▍     | 52354/117208 [03:04<02:14, 483.47it/s]
+Running loglikelihood requests:  45%|████▍     | 52417/117208 [03:04<02:30, 430.19it/s]
+Running loglikelihood requests:  45%|████▍     | 52492/117208 [03:04<02:09, 498.95it/s]
+Running loglikelihood requests:  45%|████▍     | 52561/117208 [03:04<02:23, 451.13it/s]
+Running loglikelihood requests:  45%|████▍     | 52640/117208 [03:04<02:03, 524.81it/s]
+Running loglikelihood requests:  45%|████▍     | 52705/117208 [03:04<02:20, 459.04it/s]
+Running loglikelihood requests:  45%|████▌     | 52781/117208 [03:04<02:02, 524.78it/s]
+Running loglikelihood requests:  45%|████▌     | 52849/117208 [03:05<02:18, 464.63it/s]
+Running loglikelihood requests:  45%|████▌     | 52924/117208 [03:05<02:01, 527.20it/s]
+Running loglikelihood requests:  45%|████▌     | 52993/117208 [03:05<02:17, 467.63it/s]
+Running loglikelihood requests:  45%|████▌     | 53056/117208 [03:05<02:32, 421.09it/s]
+Running loglikelihood requests:  45%|████▌     | 53103/117208 [03:05<02:56, 364.04it/s]
+Running loglikelihood requests:  45%|████▌     | 53180/117208 [03:05<02:23, 445.43it/s]
+Running loglikelihood requests:  45%|████▌     | 53236/117208 [03:06<02:42, 394.70it/s]
+Running loglikelihood requests:  45%|████▌     | 53311/117208 [03:06<02:16, 469.39it/s]
+Running loglikelihood requests:  46%|████▌     | 53380/117208 [03:06<02:27, 433.15it/s]
+Running loglikelihood requests:  46%|████▌     | 53456/117208 [03:06<02:06, 503.50it/s]
+Running loglikelihood requests:  46%|████▌     | 53521/117208 [03:06<02:22, 445.91it/s]
+Running loglikelihood requests:  46%|████▌     | 53600/117208 [03:06<02:02, 520.89it/s]
+Running loglikelihood requests:  46%|████▌     | 53665/117208 [03:06<02:18, 457.44it/s]
+Running loglikelihood requests:  46%|████▌     | 53741/117208 [03:06<02:01, 523.85it/s]
+Running loglikelihood requests:  46%|████▌     | 53809/117208 [03:07<02:16, 464.88it/s]
+Running loglikelihood requests:  46%|████▌     | 53885/117208 [03:07<01:59, 529.84it/s]
+Running loglikelihood requests:  46%|████▌     | 53953/117208 [03:07<02:15, 468.04it/s]
+Running loglikelihood requests:  46%|████▌     | 54029/117208 [03:07<01:58, 532.30it/s]
+Running loglikelihood requests:  46%|████▌     | 54094/117208 [03:07<02:15, 464.26it/s]
+Running loglikelihood requests:  46%|████▌     | 54147/117208 [03:07<02:37, 399.29it/s]
+Running loglikelihood requests:  46%|████▌     | 54199/117208 [03:08<02:55, 359.73it/s]
+Running loglikelihood requests:  46%|████▋     | 54275/117208 [03:08<02:22, 440.34it/s]
+Running loglikelihood requests:  46%|████▋     | 54343/117208 [03:08<02:31, 413.67it/s]
+Running loglikelihood requests:  46%|████▋     | 54419/117208 [03:08<02:09, 486.64it/s]
+Running loglikelihood requests:  46%|████▋     | 54487/117208 [03:08<02:21, 442.20it/s]
+Running loglikelihood requests:  47%|████▋     | 54568/117208 [03:08<02:00, 521.48it/s]
+Running loglikelihood requests:  47%|████▋     | 54631/117208 [03:08<02:17, 454.68it/s]
+Running loglikelihood requests:  47%|████▋     | 54710/117208 [03:09<01:58, 527.95it/s]
+Running loglikelihood requests:  47%|████▋     | 54775/117208 [03:09<02:14, 462.55it/s]
+Running loglikelihood requests:  47%|████▋     | 54851/117208 [03:09<01:58, 528.01it/s]
+Running loglikelihood requests:  47%|████▋     | 54911/117208 [03:09<02:17, 452.10it/s]
+Running loglikelihood requests:  47%|████▋     | 54990/117208 [03:09<01:58, 526.71it/s]
+Running loglikelihood requests:  47%|████▋     | 55050/117208 [03:09<02:18, 449.99it/s]
+Running loglikelihood requests:  47%|████▋     | 55102/117208 [03:10<03:06, 333.80it/s]
+Running loglikelihood requests:  47%|████▋     | 55180/117208 [03:10<02:29, 415.97it/s]
+Running loglikelihood requests:  47%|████▋     | 55240/117208 [03:10<02:40, 385.76it/s]
+Running loglikelihood requests:  47%|████▋     | 55318/117208 [03:10<02:13, 465.27it/s]
+Running loglikelihood requests:  47%|████▋     | 55374/117208 [03:10<02:07, 486.41it/s]
+Running loglikelihood requests:  47%|████▋     | 55432/117208 [03:10<02:26, 422.35it/s]
+Running loglikelihood requests:  47%|████▋     | 55508/117208 [03:10<02:04, 497.54it/s]
+Running loglikelihood requests:  47%|████▋     | 55570/117208 [03:11<02:21, 435.90it/s]
+Running loglikelihood requests:  47%|████▋     | 55649/117208 [03:11<01:59, 514.49it/s]
+Running loglikelihood requests:  48%|████▊     | 55714/117208 [03:11<02:15, 453.30it/s]
+Running loglikelihood requests:  48%|████▊     | 55790/117208 [03:11<01:57, 521.09it/s]
+Running loglikelihood requests:  48%|████▊     | 55862/117208 [03:11<02:10, 471.25it/s]
+Running loglikelihood requests:  48%|████▊     | 55940/117208 [03:11<01:53, 539.60it/s]
+Running loglikelihood requests:  48%|████▊     | 56006/117208 [03:11<02:09, 471.35it/s]
+Running loglikelihood requests:  48%|████▊     | 56088/117208 [03:12<01:51, 548.39it/s]
+Running loglikelihood requests:  48%|████▊     | 56150/117208 [03:12<02:10, 468.48it/s]
+Running loglikelihood requests:  48%|████▊     | 56204/117208 [03:12<02:30, 406.12it/s]
+Running loglikelihood requests:  48%|████▊     | 56250/117208 [03:12<02:52, 353.26it/s]
+Running loglikelihood requests:  48%|████▊     | 56330/117208 [03:12<02:17, 443.57it/s]
+Running loglikelihood requests:  48%|████▊     | 56384/117208 [03:12<02:35, 390.90it/s]
+Running loglikelihood requests:  48%|████▊     | 56465/117208 [03:12<02:06, 479.64it/s]
+Running loglikelihood requests:  48%|████▊     | 56528/117208 [03:13<02:21, 429.32it/s]
+Running loglikelihood requests:  48%|████▊     | 56607/117208 [03:13<01:59, 507.42it/s]
+Running loglikelihood requests:  48%|████▊     | 56669/117208 [03:13<02:16, 444.87it/s]
+Running loglikelihood requests:  48%|████▊     | 56747/117208 [03:13<01:56, 518.44it/s]
+Running loglikelihood requests:  48%|████▊     | 56813/117208 [03:13<02:11, 458.73it/s]
+Running loglikelihood requests:  49%|████▊     | 56898/117208 [03:13<01:50, 545.07it/s]
+Running loglikelihood requests:  49%|████▊     | 56960/117208 [03:14<02:08, 468.33it/s]
+Running loglikelihood requests:  49%|████▊     | 57046/117208 [03:14<01:48, 555.08it/s]
+Running loglikelihood requests:  49%|████▊     | 57110/117208 [03:14<02:05, 477.68it/s]
+Running loglikelihood requests:  49%|████▉     | 57183/117208 [03:14<02:14, 447.21it/s]
+Running loglikelihood requests:  49%|████▉     | 57233/117208 [03:14<02:34, 387.76it/s]
+Running loglikelihood requests:  49%|████▉     | 57309/117208 [03:14<02:32, 393.19it/s]
+Running loglikelihood requests:  49%|████▉     | 57389/117208 [03:14<02:06, 473.24it/s]
+Running loglikelihood requests:  49%|████▉     | 57453/117208 [03:15<02:19, 429.60it/s]
+Running loglikelihood requests:  49%|████▉     | 57536/117208 [03:15<01:56, 513.27it/s]
+Running loglikelihood requests:  49%|████▉     | 57597/117208 [03:15<02:12, 448.38it/s]
+Running loglikelihood requests:  49%|████▉     | 57682/117208 [03:15<01:51, 534.97it/s]
+Running loglikelihood requests:  49%|████▉     | 57744/117208 [03:15<02:08, 462.71it/s]
+Running loglikelihood requests:  49%|████▉     | 57826/117208 [03:15<01:49, 540.95it/s]
+Running loglikelihood requests:  49%|████▉     | 57888/117208 [03:15<02:07, 466.10it/s]
+Running loglikelihood requests:  49%|████▉     | 57969/117208 [03:16<01:49, 542.01it/s]
+Running loglikelihood requests:  50%|████▉     | 58033/117208 [03:16<02:05, 470.53it/s]
+Running loglikelihood requests:  50%|████▉     | 58114/117208 [03:16<01:48, 545.69it/s]
+Running loglikelihood requests:  50%|████▉     | 58177/117208 [03:16<02:05, 469.92it/s]
+Running loglikelihood requests:  50%|████▉     | 58254/117208 [03:16<01:49, 536.50it/s]
+Running loglikelihood requests:  50%|████▉     | 58315/117208 [03:16<02:08, 459.72it/s]
+Running loglikelihood requests:  50%|████▉     | 58368/117208 [03:17<02:27, 398.79it/s]
+Running loglikelihood requests:  50%|████▉     | 58420/117208 [03:17<02:43, 359.33it/s]
+Running loglikelihood requests:  50%|████▉     | 58502/117208 [03:17<02:09, 452.64it/s]
+Running loglikelihood requests:  50%|████▉     | 58561/117208 [03:17<02:24, 405.89it/s]
+Running loglikelihood requests:  50%|█████     | 58645/117208 [03:17<01:57, 497.91it/s]
+Running loglikelihood requests:  50%|█████     | 58705/117208 [03:17<02:14, 435.61it/s]
+Running loglikelihood requests:  50%|█████     | 58785/117208 [03:17<01:53, 514.70it/s]
+Running loglikelihood requests:  50%|█████     | 58846/117208 [03:18<02:10, 447.67it/s]
+Running loglikelihood requests:  50%|█████     | 58927/117208 [03:18<01:50, 527.24it/s]
+Running loglikelihood requests:  50%|█████     | 58991/117208 [03:18<02:06, 461.39it/s]
+Running loglikelihood requests:  50%|█████     | 59075/117208 [03:18<01:46, 545.08it/s]
+Running loglikelihood requests:  50%|█████     | 59137/117208 [03:18<02:04, 468.29it/s]
+Running loglikelihood requests:  51%|█████     | 59223/117208 [03:18<01:44, 555.10it/s]
+Running loglikelihood requests:  51%|█████     | 59287/117208 [03:18<02:01, 477.91it/s]
+Running loglikelihood requests:  51%|█████     | 59371/117208 [03:19<01:43, 558.30it/s]
+Running loglikelihood requests:  51%|█████     | 59435/117208 [03:19<02:00, 480.06it/s]
+Running loglikelihood requests:  51%|█████     | 59490/117208 [03:19<02:18, 415.45it/s]
+Running loglikelihood requests:  51%|█████     | 59549/117208 [03:19<02:30, 382.25it/s]
+Running loglikelihood requests:  51%|█████     | 59632/117208 [03:19<02:01, 472.83it/s]
+Running loglikelihood requests:  51%|█████     | 59693/117208 [03:19<02:15, 423.53it/s]
+Running loglikelihood requests:  51%|█████     | 59776/117208 [03:19<01:52, 509.79it/s]
+Running loglikelihood requests:  51%|█████     | 59837/117208 [03:20<02:08, 445.71it/s]
+Running loglikelihood requests:  51%|█████     | 59933/117208 [03:20<01:58, 481.86it/s]
+Running loglikelihood requests:  51%|█████     | 60029/117208 [03:20<01:53, 505.86it/s]
+Running loglikelihood requests:  51%|█████▏    | 60125/117208 [03:20<01:49, 522.32it/s]
+Running loglikelihood requests:  51%|█████▏    | 60221/117208 [03:20<01:46, 533.88it/s]
+Running loglikelihood requests:  51%|█████▏    | 60321/117208 [03:21<01:43, 548.22it/s]
+Running loglikelihood requests:  52%|█████▏    | 60399/117208 [03:21<01:48, 521.59it/s]
+Running loglikelihood requests:  52%|█████▏    | 60495/117208 [03:21<01:46, 532.47it/s]
+Running loglikelihood requests:  52%|█████▏    | 60549/117208 [03:21<02:00, 469.24it/s]
+Running loglikelihood requests:  52%|█████▏    | 60624/117208 [03:21<02:02, 460.27it/s]
+Running loglikelihood requests:  52%|█████▏    | 60720/117208 [03:21<01:55, 490.53it/s]
+Running loglikelihood requests:  52%|█████▏    | 60816/117208 [03:22<01:50, 510.96it/s]
+Running loglikelihood requests:  52%|█████▏    | 60909/117208 [03:22<01:48, 520.29it/s]
+Running loglikelihood requests:  52%|█████▏    | 61005/117208 [03:22<01:45, 532.20it/s]
+Running loglikelihood requests:  52%|█████▏    | 61101/117208 [03:22<01:43, 540.82it/s]
+Running loglikelihood requests:  52%|█████▏    | 61197/117208 [03:22<01:42, 547.29it/s]
+Running loglikelihood requests:  52%|█████▏    | 61293/117208 [03:22<01:41, 551.26it/s]
+Running loglikelihood requests:  52%|█████▏    | 61389/117208 [03:23<01:40, 555.01it/s]
+Running loglikelihood requests:  52%|█████▏    | 61485/117208 [03:23<01:40, 556.34it/s]
+Running loglikelihood requests:  53%|█████▎    | 61569/117208 [03:23<01:43, 538.17it/s]
+Running loglikelihood requests:  53%|█████▎    | 61665/117208 [03:23<01:41, 544.75it/s]
+Running loglikelihood requests:  53%|█████▎    | 61720/117208 [03:23<01:55, 479.57it/s]
+Running loglikelihood requests:  53%|█████▎    | 61800/117208 [03:23<01:56, 477.58it/s]
+Running loglikelihood requests:  53%|█████▎    | 61896/117208 [03:24<01:49, 503.63it/s]
+Running loglikelihood requests:  53%|█████▎    | 61989/117208 [03:24<01:47, 515.94it/s]
+Running loglikelihood requests:  53%|█████▎    | 62085/117208 [03:24<01:44, 529.75it/s]
+Running loglikelihood requests:  53%|█████▎    | 62181/117208 [03:24<01:41, 540.22it/s]
+Running loglikelihood requests:  53%|█████▎    | 62273/117208 [03:24<01:29, 616.44it/s]
+Running loglikelihood requests:  53%|█████▎    | 62339/117208 [03:24<01:39, 550.97it/s]
+Running loglikelihood requests:  53%|█████▎    | 62421/117208 [03:25<01:42, 532.32it/s]
+Running loglikelihood requests:  53%|█████▎    | 62517/117208 [03:25<01:40, 541.76it/s]
+Running loglikelihood requests:  53%|█████▎    | 62613/117208 [03:25<01:39, 549.80it/s]
+Running loglikelihood requests:  54%|█████▎    | 62709/117208 [03:25<01:38, 554.00it/s]
+Running loglikelihood requests:  54%|█████▎    | 62778/117208 [03:25<01:46, 511.18it/s]
+Running loglikelihood requests:  54%|█████▎    | 62844/117208 [03:25<01:54, 475.09it/s]
+Running loglikelihood requests:  54%|█████▎    | 62940/117208 [03:26<01:47, 502.62it/s]
+Running loglikelihood requests:  54%|█████▍    | 63036/117208 [03:26<01:43, 521.62it/s]
+Running loglikelihood requests:  54%|█████▍    | 63132/117208 [03:26<01:41, 534.93it/s]
+Running loglikelihood requests:  54%|█████▍    | 63228/117208 [03:26<01:39, 544.14it/s]
+Running loglikelihood requests:  54%|█████▍    | 63318/117208 [03:26<01:39, 541.00it/s]
+Running loglikelihood requests:  54%|█████▍    | 63414/117208 [03:26<01:37, 549.25it/s]
+Running loglikelihood requests:  54%|█████▍    | 63510/117208 [03:27<01:36, 554.59it/s]
+Running loglikelihood requests:  54%|█████▍    | 63606/117208 [03:27<01:35, 559.03it/s]
+Running loglikelihood requests:  54%|█████▍    | 63702/117208 [03:27<01:35, 561.45it/s]
+Running loglikelihood requests:  54%|█████▍    | 63798/117208 [03:27<01:34, 563.71it/s]
+Running loglikelihood requests:  55%|█████▍    | 63886/117208 [03:27<01:36, 550.74it/s]
+Running loglikelihood requests:  55%|█████▍    | 63973/117208 [03:27<01:38, 540.54it/s]
+Running loglikelihood requests:  55%|█████▍    | 64028/117208 [03:28<01:51, 477.56it/s]
+Running loglikelihood requests:  55%|█████▍    | 64108/117208 [03:28<01:51, 477.35it/s]
+Running loglikelihood requests:  55%|█████▍    | 64204/117208 [03:28<01:44, 504.82it/s]
+Running loglikelihood requests:  55%|█████▍    | 64300/117208 [03:28<01:41, 523.31it/s]
+Running loglikelihood requests:  55%|█████▍    | 64396/117208 [03:28<01:38, 536.91it/s]
+Running loglikelihood requests:  55%|█████▌    | 64492/117208 [03:28<01:36, 546.13it/s]
+Running loglikelihood requests:  55%|█████▌    | 64588/117208 [03:29<01:35, 553.53it/s]
+Running loglikelihood requests:  55%|█████▌    | 64684/117208 [03:29<01:34, 558.44it/s]
+Running loglikelihood requests:  55%|█████▌    | 64780/117208 [03:29<01:33, 561.81it/s]
+Running loglikelihood requests:  55%|█████▌    | 64876/117208 [03:29<01:32, 564.47it/s]
+Running loglikelihood requests:  55%|█████▌    | 64963/117208 [03:29<01:35, 549.93it/s]
+Running loglikelihood requests:  56%|█████▌    | 65056/117208 [03:29<01:34, 551.06it/s]
+Running loglikelihood requests:  56%|█████▌    | 65131/117208 [03:30<01:39, 520.79it/s]
+Running loglikelihood requests:  56%|█████▌    | 65206/117208 [03:30<01:44, 499.07it/s]
+Running loglikelihood requests:  56%|█████▌    | 65299/117208 [03:30<01:40, 515.31it/s]
+Running loglikelihood requests:  56%|█████▌    | 65395/117208 [03:30<01:37, 530.90it/s]
+Running loglikelihood requests:  56%|█████▌    | 65491/117208 [03:30<01:35, 542.68it/s]
+Running loglikelihood requests:  56%|█████▌    | 65581/117208 [03:30<01:35, 540.74it/s]
+Running loglikelihood requests:  56%|█████▌    | 65677/117208 [03:31<01:33, 550.47it/s]
+Running loglikelihood requests:  56%|█████▌    | 65773/117208 [03:31<01:32, 557.33it/s]
+Running loglikelihood requests:  56%|█████▌    | 65869/117208 [03:31<01:31, 561.20it/s]
+Running loglikelihood requests:  56%|█████▋    | 65965/117208 [03:31<01:30, 564.65it/s]
+Running loglikelihood requests:  56%|█████▋    | 66061/117208 [03:31<01:30, 566.64it/s]
+Running loglikelihood requests:  56%|█████▋    | 66148/117208 [03:31<01:32, 552.66it/s]
+Running loglikelihood requests:  57%|█████▋    | 66226/117208 [03:32<01:36, 527.87it/s]
+Running loglikelihood requests:  57%|█████▋    | 66298/117208 [03:32<01:41, 499.15it/s]
+Running loglikelihood requests:  57%|█████▋    | 66394/117208 [03:32<01:37, 520.94it/s]
+Running loglikelihood requests:  57%|█████▋    | 66490/117208 [03:32<01:34, 536.52it/s]
+Running loglikelihood requests:  57%|█████▋    | 66586/117208 [03:32<01:32, 546.50it/s]
+Running loglikelihood requests:  57%|█████▋    | 66676/117208 [03:32<01:32, 543.88it/s]
+Running loglikelihood requests:  57%|█████▋    | 66769/117208 [03:33<01:32, 547.24it/s]
+Running loglikelihood requests:  57%|█████▋    | 66865/117208 [03:33<01:30, 554.58it/s]
+Running loglikelihood requests:  57%|█████▋    | 66965/117208 [03:33<01:28, 567.29it/s]
+Running loglikelihood requests:  57%|█████▋    | 67061/117208 [03:33<01:28, 568.53it/s]
+Running loglikelihood requests:  57%|█████▋    | 67157/117208 [03:33<01:27, 570.04it/s]
+Running loglikelihood requests:  57%|█████▋    | 67253/117208 [03:33<01:27, 571.05it/s]
+Running loglikelihood requests:  57%|█████▋    | 67337/117208 [03:34<01:30, 550.24it/s]
+Running loglikelihood requests:  58%|█████▊    | 67433/117208 [03:34<01:29, 556.55it/s]
+Running loglikelihood requests:  58%|█████▊    | 67502/117208 [03:34<01:36, 514.82it/s]
+Running loglikelihood requests:  58%|█████▊    | 67595/117208 [03:34<01:34, 526.41it/s]
+Running loglikelihood requests:  58%|█████▊    | 67691/117208 [03:34<01:31, 540.44it/s]
+Running loglikelihood requests:  58%|█████▊    | 67787/117208 [03:34<01:29, 549.71it/s]
+Running loglikelihood requests:  58%|█████▊    | 67883/117208 [03:35<01:28, 556.99it/s]
+Running loglikelihood requests:  58%|█████▊    | 67973/117208 [03:35<01:29, 551.83it/s]
+Running loglikelihood requests:  58%|█████▊    | 68069/117208 [03:35<01:28, 558.20it/s]
+Running loglikelihood requests:  58%|█████▊    | 68165/117208 [03:35<01:27, 563.63it/s]
+Running loglikelihood requests:  58%|█████▊    | 68265/117208 [03:35<01:25, 573.40it/s]
+Running loglikelihood requests:  58%|█████▊    | 68361/117208 [03:35<01:25, 573.53it/s]
+Running loglikelihood requests:  58%|█████▊    | 68451/117208 [03:36<01:26, 563.79it/s]
+Running loglikelihood requests:  58%|█████▊    | 68532/117208 [03:36<01:30, 538.47it/s]
+Running loglikelihood requests:  59%|█████▊    | 68601/117208 [03:36<01:36, 503.12it/s]
+Running loglikelihood requests:  59%|█████▊    | 68667/117208 [03:36<01:42, 471.99it/s]
+Running loglikelihood requests:  59%|█████▊    | 68760/117208 [03:36<01:37, 497.60it/s]
+Running loglikelihood requests:  59%|█████▊    | 68856/117208 [03:36<01:32, 520.55it/s]
+Running loglikelihood requests:  59%|█████▉    | 68952/117208 [03:37<01:29, 537.28it/s]
+Running loglikelihood requests:  59%|█████▉    | 69048/117208 [03:37<01:27, 548.80it/s]
+Running loglikelihood requests:  59%|█████▉    | 69141/117208 [03:37<01:27, 551.84it/s]
+Running loglikelihood requests:  59%|█████▉    | 69237/117208 [03:37<01:25, 559.68it/s]
+Running loglikelihood requests:  59%|█████▉    | 69333/117208 [03:37<01:24, 565.00it/s]
+Running loglikelihood requests:  59%|█████▉    | 69429/117208 [03:37<01:24, 567.71it/s]
+Running loglikelihood requests:  59%|█████▉    | 69525/117208 [03:38<01:23, 570.62it/s]
+Running loglikelihood requests:  59%|█████▉    | 69621/117208 [03:38<01:23, 572.54it/s]
+Running loglikelihood requests:  59%|█████▉    | 69699/117208 [03:38<01:27, 542.29it/s]
+Running loglikelihood requests:  60%|█████▉    | 69792/117208 [03:38<01:26, 547.01it/s]
+Running loglikelihood requests:  60%|█████▉    | 69847/117208 [03:38<01:37, 484.50it/s]
+Running loglikelihood requests:  60%|█████▉    | 69936/117208 [03:38<01:34, 499.54it/s]
+Running loglikelihood requests:  60%|█████▉    | 70032/117208 [03:39<01:30, 521.90it/s]
+Running loglikelihood requests:  60%|█████▉    | 70128/117208 [03:39<01:27, 537.30it/s]
+Running loglikelihood requests:  60%|█████▉    | 70224/117208 [03:39<01:25, 548.76it/s]
+Running loglikelihood requests:  60%|█████▉    | 70320/117208 [03:39<01:24, 557.81it/s]
+Running loglikelihood requests:  60%|██████    | 70416/117208 [03:39<01:22, 564.34it/s]
+Running loglikelihood requests:  60%|██████    | 70512/117208 [03:39<01:22, 568.67it/s]
+Running loglikelihood requests:  60%|██████    | 70608/117208 [03:40<01:21, 571.46it/s]
+Running loglikelihood requests:  60%|██████    | 70704/117208 [03:40<01:21, 574.02it/s]
+Running loglikelihood requests:  60%|██████    | 70800/117208 [03:40<01:20, 575.65it/s]
+Running loglikelihood requests:  60%|██████    | 70896/117208 [03:40<01:20, 575.89it/s]
+Running loglikelihood requests:  61%|██████    | 70968/117208 [03:40<01:26, 535.05it/s]
+Running loglikelihood requests:  61%|██████    | 71058/117208 [03:40<01:25, 537.86it/s]
+Running loglikelihood requests:  61%|██████    | 71136/117208 [03:41<01:28, 518.85it/s]
+Running loglikelihood requests:  61%|██████    | 71232/117208 [03:41<01:25, 536.34it/s]
+Running loglikelihood requests:  61%|██████    | 71328/117208 [03:41<01:23, 549.26it/s]
+Running loglikelihood requests:  61%|██████    | 71424/117208 [03:41<01:22, 558.29it/s]
+Running loglikelihood requests:  61%|██████    | 71520/117208 [03:41<01:20, 564.18it/s]
+Running loglikelihood requests:  61%|██████    | 71610/117208 [03:41<01:21, 558.87it/s]
+Running loglikelihood requests:  61%|██████    | 71706/117208 [03:42<01:20, 565.72it/s]
+Running loglikelihood requests:  61%|██████▏   | 71802/117208 [03:42<01:19, 570.66it/s]
+Running loglikelihood requests:  61%|██████▏   | 71898/117208 [03:42<01:19, 573.51it/s]
+Running loglikelihood requests:  61%|██████▏   | 71994/117208 [03:42<01:18, 575.77it/s]
+Running loglikelihood requests:  62%|██████▏   | 72087/117208 [03:42<01:18, 572.21it/s]
+Running loglikelihood requests:  62%|██████▏   | 72156/117208 [03:42<01:25, 527.82it/s]
+Running loglikelihood requests:  62%|██████▏   | 72237/117208 [03:43<01:26, 517.44it/s]
+Running loglikelihood requests:  62%|██████▏   | 72333/117208 [03:43<01:23, 535.75it/s]
+Running loglikelihood requests:  62%|██████▏   | 72429/117208 [03:43<01:21, 549.45it/s]
+Running loglikelihood requests:  62%|██████▏   | 72525/117208 [03:43<01:19, 559.07it/s]
+Running loglikelihood requests:  62%|██████▏   | 72621/117208 [03:43<01:18, 565.11it/s]
+Running loglikelihood requests:  62%|██████▏   | 72717/117208 [03:43<01:17, 571.66it/s]
+Running loglikelihood requests:  62%|██████▏   | 72807/117208 [03:44<01:17, 570.41it/s]
+Running loglikelihood requests:  62%|██████▏   | 72903/117208 [03:44<01:16, 580.40it/s]
+Running loglikelihood requests:  62%|██████▏   | 72999/117208 [03:44<01:15, 587.42it/s]
+Running loglikelihood requests:  62%|██████▏   | 73095/117208 [03:44<01:14, 591.60it/s]
+Running loglikelihood requests:  62%|██████▏   | 73191/117208 [03:44<01:14, 594.70it/s]
+Running loglikelihood requests:  63%|██████▎   | 73287/117208 [03:44<01:13, 596.92it/s]
+Running loglikelihood requests:  63%|██████▎   | 73380/117208 [03:45<01:13, 593.15it/s]
+Running loglikelihood requests:  63%|██████▎   | 73449/117208 [03:45<01:19, 547.48it/s]
+Running loglikelihood requests:  63%|██████▎   | 73539/117208 [03:45<01:18, 553.12it/s]
+Running loglikelihood requests:  63%|██████▎   | 73605/117208 [03:45<01:24, 514.26it/s]
+Running loglikelihood requests:  63%|██████▎   | 73701/117208 [03:45<01:20, 540.75it/s]
+Running loglikelihood requests:  63%|██████▎   | 73797/117208 [03:45<01:17, 559.56it/s]
+Running loglikelihood requests:  63%|██████▎   | 73893/117208 [03:45<01:15, 572.59it/s]
+Running loglikelihood requests:  63%|██████▎   | 73989/117208 [03:46<01:14, 581.03it/s]
+Running loglikelihood requests:  63%|██████▎   | 74085/117208 [03:46<01:13, 588.33it/s]
+Running loglikelihood requests:  63%|██████▎   | 74181/117208 [03:46<01:12, 593.39it/s]
+Running loglikelihood requests:  63%|██████▎   | 74277/117208 [03:46<01:11, 596.74it/s]
+Running loglikelihood requests:  63%|██████▎   | 74373/117208 [03:46<01:11, 599.34it/s]
+Running loglikelihood requests:  64%|██████▎   | 74469/117208 [03:46<01:11, 601.29it/s]
+Running loglikelihood requests:  64%|██████▎   | 74565/117208 [03:47<01:10, 602.28it/s]
+Running loglikelihood requests:  64%|██████▎   | 74661/117208 [03:47<01:10, 603.01it/s]
+Running loglikelihood requests:  64%|██████▍   | 74742/117208 [03:47<01:13, 576.75it/s]
+Running loglikelihood requests:  64%|██████▍   | 74832/117208 [03:47<01:13, 573.93it/s]
+Running loglikelihood requests:  64%|██████▍   | 74919/117208 [03:47<01:14, 567.04it/s]
+Running loglikelihood requests:  64%|██████▍   | 75015/117208 [03:47<01:12, 578.02it/s]
+Running loglikelihood requests:  64%|██████▍   | 75111/117208 [03:48<01:11, 586.09it/s]
+Running loglikelihood requests:  64%|██████▍   | 75207/117208 [03:48<01:10, 591.80it/s]
+Running loglikelihood requests:  64%|██████▍   | 75303/117208 [03:48<01:10, 595.04it/s]
+Running loglikelihood requests:  64%|██████▍   | 75393/117208 [03:48<01:11, 587.87it/s]
+Running loglikelihood requests:  64%|██████▍   | 75489/117208 [03:48<01:10, 594.10it/s]
+Running loglikelihood requests:  64%|██████▍   | 75585/117208 [03:48<01:09, 598.44it/s]
+Running loglikelihood requests:  65%|██████▍   | 75681/117208 [03:48<01:09, 601.37it/s]
+Running loglikelihood requests:  65%|██████▍   | 75777/117208 [03:49<01:08, 603.35it/s]
+Running loglikelihood requests:  65%|██████▍   | 75873/117208 [03:49<01:08, 604.80it/s]
+Running loglikelihood requests:  65%|██████▍   | 75957/117208 [03:49<01:10, 584.37it/s]
+Running loglikelihood requests:  65%|██████▍   | 76044/117208 [03:49<01:11, 574.62it/s]
+Running loglikelihood requests:  65%|██████▍   | 76128/117208 [03:49<01:13, 562.70it/s]
+Running loglikelihood requests:  65%|██████▌   | 76221/117208 [03:49<01:12, 566.20it/s]
+Running loglikelihood requests:  65%|██████▌   | 76317/117208 [03:50<01:10, 578.98it/s]
+Running loglikelihood requests:  65%|██████▌   | 76413/117208 [03:50<01:09, 587.71it/s]
+Running loglikelihood requests:  65%|██████▌   | 76509/117208 [03:50<01:08, 593.58it/s]
+Running loglikelihood requests:  65%|██████▌   | 76602/117208 [03:50<01:08, 592.04it/s]
+Running loglikelihood requests:  65%|██████▌   | 76692/117208 [03:50<01:09, 586.29it/s]
+Running loglikelihood requests:  66%|██████▌   | 76792/117208 [03:50<01:07, 600.56it/s]
+Running loglikelihood requests:  66%|██████▌   | 76888/117208 [03:51<01:06, 603.13it/s]
+Running loglikelihood requests:  66%|██████▌   | 76984/117208 [03:51<01:06, 605.15it/s]
+Running loglikelihood requests:  66%|██████▌   | 77080/117208 [03:51<01:06, 606.49it/s]
+Running loglikelihood requests:  66%|██████▌   | 77176/117208 [03:51<01:05, 607.27it/s]
+Running loglikelihood requests:  66%|██████▌   | 77266/117208 [03:51<01:06, 596.81it/s]
+Running loglikelihood requests:  66%|██████▌   | 77330/117208 [03:51<01:13, 542.19it/s]
+Running loglikelihood requests:  66%|██████▌   | 77411/117208 [03:51<01:14, 535.00it/s]
+Running loglikelihood requests:  66%|██████▌   | 77507/117208 [03:52<01:11, 557.34it/s]
+Running loglikelihood requests:  66%|██████▌   | 77603/117208 [03:52<01:09, 572.85it/s]
+Running loglikelihood requests:  66%|██████▋   | 77699/117208 [03:52<01:07, 583.58it/s]
+Running loglikelihood requests:  66%|██████▋   | 77795/117208 [03:52<01:06, 590.95it/s]
+Running loglikelihood requests:  66%|██████▋   | 77888/117208 [03:52<01:06, 591.37it/s]
+Running loglikelihood requests:  67%|██████▋   | 77978/117208 [03:52<01:06, 586.62it/s]
+Running loglikelihood requests:  67%|██████▋   | 78074/117208 [03:53<01:05, 594.09it/s]
+Running loglikelihood requests:  67%|██████▋   | 78170/117208 [03:53<01:05, 599.73it/s]
+Running loglikelihood requests:  67%|██████▋   | 78266/117208 [03:53<01:04, 603.29it/s]
+Running loglikelihood requests:  67%|██████▋   | 78362/117208 [03:53<01:04, 605.47it/s]
+Running loglikelihood requests:  67%|██████▋   | 78458/117208 [03:53<01:03, 607.38it/s]
+Running loglikelihood requests:  67%|██████▋   | 78545/117208 [03:53<01:05, 592.39it/s]
+Running loglikelihood requests:  67%|██████▋   | 78605/117208 [03:54<01:12, 531.52it/s]
+Running loglikelihood requests:  67%|██████▋   | 78683/117208 [03:54<01:13, 523.09it/s]
+Running loglikelihood requests:  67%|██████▋   | 78758/117208 [03:54<01:15, 511.02it/s]
+Running loglikelihood requests:  67%|██████▋   | 78854/117208 [03:54<01:11, 539.32it/s]
+Running loglikelihood requests:  67%|██████▋   | 78950/117208 [03:54<01:08, 560.74it/s]
+Running loglikelihood requests:  67%|██████▋   | 79046/117208 [03:54<01:06, 575.86it/s]
+Running loglikelihood requests:  68%|██████▊   | 79142/117208 [03:54<01:04, 585.82it/s]
+Running loglikelihood requests:  68%|██████▊   | 79235/117208 [03:55<01:04, 588.32it/s]
+Running loglikelihood requests:  68%|██████▊   | 79331/117208 [03:55<01:03, 595.52it/s]
+Running loglikelihood requests:  68%|██████▊   | 79427/117208 [03:55<01:02, 600.62it/s]
+Running loglikelihood requests:  68%|██████▊   | 79523/117208 [03:55<01:02, 604.08it/s]
+Running loglikelihood requests:  68%|██████▊   | 79619/117208 [03:55<01:02, 606.24it/s]
+Running loglikelihood requests:  68%|██████▊   | 79715/117208 [03:55<01:01, 607.63it/s]
+Running loglikelihood requests:  68%|██████▊   | 79811/117208 [03:56<01:01, 609.24it/s]
+Running loglikelihood requests:  68%|██████▊   | 79889/117208 [03:56<01:04, 577.21it/s]
+Running loglikelihood requests:  68%|██████▊   | 79982/117208 [03:56<01:03, 581.67it/s]
+Running loglikelihood requests:  68%|██████▊   | 80072/117208 [03:56<01:04, 579.41it/s]
+Running loglikelihood requests:  68%|██████▊   | 80168/117208 [03:56<01:02, 589.12it/s]
+Running loglikelihood requests:  68%|██████▊   | 80264/117208 [03:56<01:01, 596.00it/s]
+Running loglikelihood requests:  69%|██████▊   | 80360/117208 [03:56<01:01, 601.04it/s]
+Running loglikelihood requests:  69%|██████▊   | 80456/117208 [03:57<01:00, 604.20it/s]
+Running loglikelihood requests:  69%|██████▊   | 80552/117208 [03:57<01:00, 606.36it/s]
+Running loglikelihood requests:  69%|██████▉   | 80645/117208 [03:57<01:00, 603.11it/s]
+Running loglikelihood requests:  69%|██████▉   | 80741/117208 [03:57<01:00, 606.16it/s]
+Running loglikelihood requests:  69%|██████▉   | 80837/117208 [03:57<00:59, 608.57it/s]
+Running loglikelihood requests:  69%|██████▉   | 80933/117208 [03:57<00:59, 610.86it/s]
+Running loglikelihood requests:  69%|██████▉   | 81029/117208 [03:58<00:59, 611.98it/s]
+Running loglikelihood requests:  69%|██████▉   | 81125/117208 [03:58<00:58, 612.50it/s]
+Running loglikelihood requests:  69%|██████▉   | 81221/117208 [03:58<00:58, 612.96it/s]
+Running loglikelihood requests:  69%|██████▉   | 81290/117208 [03:58<01:03, 564.10it/s]
+Running loglikelihood requests:  69%|██████▉   | 81380/117208 [03:58<01:03, 567.96it/s]
+Running loglikelihood requests:  70%|██████▉   | 81467/117208 [03:58<01:03, 565.33it/s]
+Running loglikelihood requests:  70%|██████▉   | 81563/117208 [03:59<01:01, 579.68it/s]
+Running loglikelihood requests:  70%|██████▉   | 81659/117208 [03:59<01:00, 589.96it/s]
+Running loglikelihood requests:  70%|██████▉   | 81755/117208 [03:59<00:59, 597.01it/s]
+Running loglikelihood requests:  70%|██████▉   | 81851/117208 [03:59<00:58, 602.07it/s]
+Running loglikelihood requests:  70%|██████▉   | 81935/117208 [03:59<01:00, 584.10it/s]
+Running loglikelihood requests:  70%|██████▉   | 82031/117208 [03:59<00:59, 593.11it/s]
+Running loglikelihood requests:  70%|███████   | 82127/117208 [03:59<00:58, 599.79it/s]
+Running loglikelihood requests:  70%|███████   | 82223/117208 [04:00<00:57, 604.62it/s]
+Running loglikelihood requests:  70%|███████   | 82319/117208 [04:00<00:57, 607.90it/s]
+Running loglikelihood requests:  70%|███████   | 82415/117208 [04:00<00:57, 609.89it/s]
+Running loglikelihood requests:  70%|███████   | 82511/117208 [04:00<00:56, 611.55it/s]
+Running loglikelihood requests:  70%|███████   | 82607/117208 [04:00<00:56, 612.36it/s]
+Running loglikelihood requests:  71%|███████   | 82669/117208 [04:00<01:02, 550.84it/s]
+Running loglikelihood requests:  71%|███████   | 82752/117208 [04:01<01:03, 545.97it/s]
+Running loglikelihood requests:  71%|███████   | 82839/117208 [04:01<01:02, 549.49it/s]
+Running loglikelihood requests:  71%|███████   | 82935/117208 [04:01<01:00, 568.97it/s]
+Running loglikelihood requests:  71%|███████   | 83031/117208 [04:01<00:58, 582.99it/s]
+Running loglikelihood requests:  71%|███████   | 83127/117208 [04:01<00:57, 592.92it/s]
+Running loglikelihood requests:  71%|███████   | 83223/117208 [04:01<00:56, 599.14it/s]
+Running loglikelihood requests:  71%|███████   | 83304/117208 [04:01<00:58, 576.89it/s]
+Running loglikelihood requests:  71%|███████   | 83400/117208 [04:02<00:57, 588.70it/s]
+Running loglikelihood requests:  71%|███████   | 83496/117208 [04:02<00:56, 597.63it/s]
+Running loglikelihood requests:  71%|███████▏  | 83592/117208 [04:02<00:55, 603.77it/s]
+Running loglikelihood requests:  71%|███████▏  | 83688/117208 [04:02<00:55, 607.99it/s]
+Running loglikelihood requests:  71%|███████▏  | 83784/117208 [04:02<00:54, 610.74it/s]
+Running loglikelihood requests:  72%|███████▏  | 83880/117208 [04:02<00:54, 612.80it/s]
+Running loglikelihood requests:  72%|███████▏  | 83973/117208 [04:03<00:54, 608.84it/s]
+Running loglikelihood requests:  72%|███████▏  | 84034/117208 [04:03<01:00, 545.80it/s]
+Running loglikelihood requests:  72%|███████▏  | 84114/117208 [04:03<01:01, 538.25it/s]
+Running loglikelihood requests:  72%|███████▏  | 84210/117208 [04:03<00:58, 562.07it/s]
+Running loglikelihood requests:  72%|███████▏  | 84306/117208 [04:03<00:56, 578.81it/s]
+Running loglikelihood requests:  72%|███████▏  | 84402/117208 [04:03<00:55, 590.36it/s]
+Running loglikelihood requests:  72%|███████▏  | 84498/117208 [04:04<00:54, 598.41it/s]
+Running loglikelihood requests:  72%|███████▏  | 84594/117208 [04:04<00:53, 604.04it/s]
+Running loglikelihood requests:  72%|███████▏  | 84687/117208 [04:04<00:53, 603.43it/s]
+Running loglikelihood requests:  72%|███████▏  | 84783/117208 [04:04<00:53, 608.19it/s]
+Running loglikelihood requests:  72%|███████▏  | 84879/117208 [04:04<00:52, 611.48it/s]
+Running loglikelihood requests:  72%|███████▏  | 84975/117208 [04:04<00:52, 614.03it/s]
+Running loglikelihood requests:  73%|███████▎  | 85071/117208 [04:04<00:52, 615.97it/s]
+Running loglikelihood requests:  73%|███████▎  | 85167/117208 [04:05<00:51, 616.70it/s]
+Running loglikelihood requests:  73%|███████▎  | 85257/117208 [04:05<00:52, 606.28it/s]
+Running loglikelihood requests:  73%|███████▎  | 85323/117208 [04:05<00:57, 554.71it/s]
+Running loglikelihood requests:  73%|███████▎  | 85416/117208 [04:05<00:55, 568.28it/s]
+Running loglikelihood requests:  73%|███████▎  | 85512/117208 [04:05<00:54, 583.39it/s]
+Running loglikelihood requests:  73%|███████▎  | 85608/117208 [04:05<00:53, 593.91it/s]
+Running loglikelihood requests:  73%|███████▎  | 85704/117208 [04:06<00:52, 601.29it/s]
+Running loglikelihood requests:  73%|███████▎  | 85800/117208 [04:06<00:51, 606.75it/s]
+Running loglikelihood requests:  73%|███████▎  | 85896/117208 [04:06<00:51, 610.37it/s]
+Running loglikelihood requests:  73%|███████▎  | 85974/117208 [04:06<00:52, 593.93it/s]
+Running loglikelihood requests:  73%|███████▎  | 86067/117208 [04:06<00:51, 609.59it/s]
+Running loglikelihood requests:  74%|███████▎  | 86163/117208 [04:06<00:49, 626.55it/s]
+Running loglikelihood requests:  74%|███████▎  | 86259/117208 [04:06<00:48, 639.28it/s]
+Running loglikelihood requests:  74%|███████▎  | 86355/117208 [04:07<00:47, 648.41it/s]
+Running loglikelihood requests:  74%|███████▍  | 86451/117208 [04:07<00:46, 654.96it/s]
+Running loglikelihood requests:  74%|███████▍  | 86547/117208 [04:07<00:46, 659.55it/s]
+Running loglikelihood requests:  74%|███████▍  | 86643/117208 [04:07<00:46, 661.96it/s]
+Running loglikelihood requests:  74%|███████▍  | 86710/117208 [04:07<00:50, 606.91it/s]
+Running loglikelihood requests:  74%|███████▍  | 86784/117208 [04:07<00:52, 581.37it/s]
+Running loglikelihood requests:  74%|███████▍  | 86871/117208 [04:07<00:51, 589.36it/s]
+Running loglikelihood requests:  74%|███████▍  | 86964/117208 [04:08<00:49, 607.44it/s]
+Running loglikelihood requests:  74%|███████▍  | 87060/117208 [04:08<00:48, 625.88it/s]
+Running loglikelihood requests:  74%|███████▍  | 87156/117208 [04:08<00:47, 638.98it/s]
+Running loglikelihood requests:  74%|███████▍  | 87252/117208 [04:08<00:46, 647.53it/s]
+Running loglikelihood requests:  75%|███████▍  | 87345/117208 [04:08<00:46, 647.82it/s]
+Running loglikelihood requests:  75%|███████▍  | 87438/117208 [04:08<00:45, 648.70it/s]
+Running loglikelihood requests:  75%|███████▍  | 87534/117208 [04:08<00:45, 655.19it/s]
+Running loglikelihood requests:  75%|███████▍  | 87630/117208 [04:09<00:44, 659.87it/s]
+Running loglikelihood requests:  75%|███████▍  | 87726/117208 [04:09<00:44, 663.20it/s]
+Running loglikelihood requests:  75%|███████▍  | 87822/117208 [04:09<00:44, 665.29it/s]
+Running loglikelihood requests:  75%|███████▌  | 87918/117208 [04:09<00:43, 666.53it/s]
+Running loglikelihood requests:  75%|███████▌  | 88014/117208 [04:09<00:43, 667.45it/s]
+Running loglikelihood requests:  75%|███████▌  | 88086/117208 [04:09<00:46, 621.07it/s]
+Running loglikelihood requests:  75%|███████▌  | 88173/117208 [04:09<00:47, 617.51it/s]
+Running loglikelihood requests:  75%|███████▌  | 88269/117208 [04:10<00:45, 633.55it/s]
+Running loglikelihood requests:  75%|███████▌  | 88365/117208 [04:10<00:44, 645.03it/s]
+Running loglikelihood requests:  75%|███████▌  | 88458/117208 [04:10<00:44, 646.49it/s]
+Running loglikelihood requests:  76%|███████▌  | 88554/117208 [04:10<00:43, 653.38it/s]
+Running loglikelihood requests:  76%|███████▌  | 88650/117208 [04:10<00:43, 657.52it/s]
+Running loglikelihood requests:  76%|███████▌  | 88746/117208 [04:10<00:43, 660.75it/s]
+Running loglikelihood requests:  76%|███████▌  | 88830/117208 [04:10<00:44, 640.51it/s]
+Running loglikelihood requests:  76%|███████▌  | 88926/117208 [04:11<00:43, 650.28it/s]
+Running loglikelihood requests:  76%|███████▌  | 89022/117208 [04:11<00:42, 656.15it/s]
+Running loglikelihood requests:  76%|███████▌  | 89118/117208 [04:11<00:42, 661.03it/s]
+Running loglikelihood requests:  76%|███████▌  | 89214/117208 [04:11<00:42, 664.26it/s]
+Running loglikelihood requests:  76%|███████▌  | 89314/117208 [04:11<00:41, 674.47it/s]
+Running loglikelihood requests:  76%|███████▋  | 89410/117208 [04:11<00:41, 673.98it/s]
+Running loglikelihood requests:  76%|███████▋  | 89506/117208 [04:11<00:41, 672.97it/s]
+Running loglikelihood requests:  76%|███████▋  | 89574/117208 [04:12<00:44, 617.41it/s]
+Running loglikelihood requests:  76%|███████▋  | 89653/117208 [04:12<00:45, 599.49it/s]
+Running loglikelihood requests:  77%|███████▋  | 89737/117208 [04:12<00:45, 597.29it/s]
+Running loglikelihood requests:  77%|███████▋  | 89827/117208 [04:12<00:45, 607.15it/s]
+Running loglikelihood requests:  77%|███████▋  | 89923/117208 [04:12<00:43, 625.59it/s]
+Running loglikelihood requests:  77%|███████▋  | 90019/117208 [04:12<00:42, 639.18it/s]
+Running loglikelihood requests:  77%|███████▋  | 90115/117208 [04:12<00:41, 649.15it/s]
+Running loglikelihood requests:  77%|███████▋  | 90196/117208 [04:13<00:43, 626.61it/s]
+Running loglikelihood requests:  77%|███████▋  | 90296/117208 [04:13<00:41, 647.89it/s]
+Running loglikelihood requests:  77%|███████▋  | 90392/117208 [04:13<00:40, 654.75it/s]
+Running loglikelihood requests:  77%|███████▋  | 90488/117208 [04:13<00:40, 659.51it/s]
+Running loglikelihood requests:  77%|███████▋  | 90584/117208 [04:13<00:40, 663.33it/s]
+Running loglikelihood requests:  77%|███████▋  | 90680/117208 [04:13<00:39, 666.21it/s]
+Running loglikelihood requests:  77%|███████▋  | 90776/117208 [04:13<00:39, 668.10it/s]
+Running loglikelihood requests:  78%|███████▊  | 90872/117208 [04:14<00:39, 669.71it/s]
+Running loglikelihood requests:  78%|███████▊  | 90939/117208 [04:14<00:42, 612.63it/s]
+Running loglikelihood requests:  78%|███████▊  | 91010/117208 [04:14<00:45, 580.54it/s]
+Running loglikelihood requests:  78%|███████▊  | 91088/117208 [04:14<00:45, 571.86it/s]
+Running loglikelihood requests:  78%|███████▊  | 91181/117208 [04:14<00:43, 596.12it/s]
+Running loglikelihood requests:  78%|███████▊  | 91277/117208 [04:14<00:41, 618.76it/s]
+Running loglikelihood requests:  78%|███████▊  | 91373/117208 [04:14<00:40, 634.34it/s]
+Running loglikelihood requests:  78%|███████▊  | 91469/117208 [04:15<00:39, 645.44it/s]
+Running loglikelihood requests:  78%|███████▊  | 91553/117208 [04:15<00:40, 629.95it/s]
+Running loglikelihood requests:  78%|███████▊  | 91646/117208 [04:15<00:40, 637.88it/s]
+Running loglikelihood requests:  78%|███████▊  | 91742/117208 [04:15<00:39, 648.85it/s]
+Running loglikelihood requests:  78%|███████▊  | 91838/117208 [04:15<00:38, 657.66it/s]
+Running loglikelihood requests:  78%|███████▊  | 91934/117208 [04:15<00:38, 663.87it/s]
+Running loglikelihood requests:  79%|███████▊  | 92030/117208 [04:15<00:37, 667.77it/s]
+Running loglikelihood requests:  79%|███████▊  | 92126/117208 [04:16<00:37, 670.24it/s]
+Running loglikelihood requests:  79%|███████▊  | 92222/117208 [04:16<00:37, 672.19it/s]
+Running loglikelihood requests:  79%|███████▉  | 92306/117208 [04:16<00:38, 650.14it/s]
+Running loglikelihood requests:  79%|███████▉  | 92371/117208 [04:16<00:41, 595.54it/s]
+Running loglikelihood requests:  79%|███████▉  | 92436/117208 [04:16<00:44, 558.26it/s]
+Running loglikelihood requests:  79%|███████▉  | 92523/117208 [04:16<00:42, 575.50it/s]
+Running loglikelihood requests:  79%|███████▉  | 92616/117208 [04:16<00:40, 600.03it/s]
+Running loglikelihood requests:  79%|███████▉  | 92712/117208 [04:17<00:39, 622.82it/s]
+Running loglikelihood requests:  79%|███████▉  | 92808/117208 [04:17<00:38, 637.39it/s]
+Running loglikelihood requests:  79%|███████▉  | 92904/117208 [04:17<00:37, 648.71it/s]
+Running loglikelihood requests:  79%|███████▉  | 92982/117208 [04:17<00:38, 621.56it/s]
+Running loglikelihood requests:  79%|███████▉  | 93082/117208 [04:17<00:37, 645.28it/s]
+Running loglikelihood requests:  79%|███████▉  | 93178/117208 [04:17<00:36, 654.31it/s]
+Running loglikelihood requests:  80%|███████▉  | 93274/117208 [04:17<00:36, 661.30it/s]
+Running loglikelihood requests:  80%|███████▉  | 93370/117208 [04:18<00:35, 665.86it/s]
+Running loglikelihood requests:  80%|███████▉  | 93466/117208 [04:18<00:35, 669.09it/s]
+Running loglikelihood requests:  80%|███████▉  | 93562/117208 [04:18<00:35, 671.25it/s]
+Running loglikelihood requests:  80%|███████▉  | 93658/117208 [04:18<00:34, 673.00it/s]
+Running loglikelihood requests:  80%|███████▉  | 93745/117208 [04:18<00:35, 656.31it/s]
+Running loglikelihood requests:  80%|████████  | 93811/117208 [04:18<00:38, 600.90it/s]
+Running loglikelihood requests:  80%|████████  | 93877/117208 [04:18<00:41, 564.11it/s]
+Running loglikelihood requests:  80%|████████  | 93964/117208 [04:18<00:40, 579.45it/s]
+Running loglikelihood requests:  80%|████████  | 94060/117208 [04:19<00:38, 608.81it/s]
+Running loglikelihood requests:  80%|████████  | 94156/117208 [04:19<00:36, 629.14it/s]
+Running loglikelihood requests:  80%|████████  | 94252/117208 [04:19<00:35, 643.55it/s]
+Running loglikelihood requests:  80%|████████  | 94333/117208 [04:19<00:36, 624.92it/s]
+Running loglikelihood requests:  81%|████████  | 94433/117208 [04:19<00:35, 649.32it/s]
+Running loglikelihood requests:  81%|████████  | 94533/117208 [04:19<00:33, 667.08it/s]
+Running loglikelihood requests:  81%|████████  | 94629/117208 [04:19<00:33, 671.48it/s]
+Running loglikelihood requests:  81%|████████  | 94725/117208 [04:20<00:33, 674.63it/s]
+Running loglikelihood requests:  81%|████████  | 94821/117208 [04:20<00:33, 676.59it/s]
+Running loglikelihood requests:  81%|████████  | 94917/117208 [04:20<00:32, 677.91it/s]
+Running loglikelihood requests:  81%|████████  | 95013/117208 [04:20<00:32, 678.77it/s]
+Running loglikelihood requests:  81%|████████  | 95109/117208 [04:20<00:32, 679.43it/s]
+Running loglikelihood requests:  81%|████████  | 95177/117208 [04:20<00:35, 622.66it/s]
+Running loglikelihood requests:  81%|████████▏ | 95240/117208 [04:20<00:38, 573.39it/s]
+Running loglikelihood requests:  81%|████████▏ | 95298/117208 [04:21<00:41, 528.82it/s]
+Running loglikelihood requests:  81%|████████▏ | 95391/117208 [04:21<00:38, 568.60it/s]
+Running loglikelihood requests:  81%|████████▏ | 95481/117208 [04:21<00:36, 590.15it/s]
+Running loglikelihood requests:  82%|████████▏ | 95577/117208 [04:21<00:35, 617.13it/s]
+Running loglikelihood requests:  82%|████████▏ | 95658/117208 [04:21<00:35, 606.37it/s]
+Running loglikelihood requests:  82%|████████▏ | 95745/117208 [04:21<00:35, 610.82it/s]
+Running loglikelihood requests:  82%|████████▏ | 95841/117208 [04:21<00:33, 632.54it/s]
+Running loglikelihood requests:  82%|████████▏ | 95937/117208 [04:22<00:32, 647.72it/s]
+Running loglikelihood requests:  82%|████████▏ | 96033/117208 [04:22<00:32, 658.25it/s]
+Running loglikelihood requests:  82%|████████▏ | 96129/117208 [04:22<00:31, 665.49it/s]
+Running loglikelihood requests:  82%|████████▏ | 96225/117208 [04:22<00:31, 670.53it/s]
+Running loglikelihood requests:  82%|████████▏ | 96321/117208 [04:22<00:30, 674.03it/s]
+Running loglikelihood requests:  82%|████████▏ | 96417/117208 [04:22<00:30, 675.91it/s]
+Running loglikelihood requests:  82%|████████▏ | 96498/117208 [04:22<00:31, 648.11it/s]
+Running loglikelihood requests:  82%|████████▏ | 96563/117208 [04:23<00:34, 592.70it/s]
+Running loglikelihood requests:  82%|████████▏ | 96623/117208 [04:23<00:37, 546.78it/s]
+Running loglikelihood requests:  82%|████████▏ | 96690/117208 [04:23<00:38, 529.36it/s]
+Running loglikelihood requests:  83%|████████▎ | 96783/117208 [04:23<00:35, 569.35it/s]
+Running loglikelihood requests:  83%|████████▎ | 96879/117208 [04:23<00:33, 603.71it/s]
+Running loglikelihood requests:  83%|████████▎ | 96975/117208 [04:23<00:32, 627.71it/s]
+Running loglikelihood requests:  83%|████████▎ | 97071/117208 [04:23<00:31, 643.85it/s]
+Running loglikelihood requests:  83%|████████▎ | 97155/117208 [04:24<00:32, 611.92it/s]
+Running loglikelihood requests:  83%|████████▎ | 97251/117208 [04:24<00:32, 614.95it/s]
+Running loglikelihood requests:  83%|████████▎ | 97347/117208 [04:24<00:32, 616.19it/s]
+Running loglikelihood requests:  83%|████████▎ | 97443/117208 [04:24<00:32, 616.87it/s]
+Running loglikelihood requests:  83%|████████▎ | 97539/117208 [04:24<00:31, 617.95it/s]
+Running loglikelihood requests:  83%|████████▎ | 97635/117208 [04:24<00:31, 618.90it/s]
+Running loglikelihood requests:  83%|████████▎ | 97731/117208 [04:24<00:31, 618.82it/s]
+Running loglikelihood requests:  83%|████████▎ | 97827/117208 [04:25<00:31, 619.13it/s]
+Running loglikelihood requests:  84%|████████▎ | 97908/117208 [04:25<00:32, 592.22it/s]
+Running loglikelihood requests:  84%|████████▎ | 97968/117208 [04:25<00:36, 532.65it/s]
+Running loglikelihood requests:  84%|████████▎ | 98026/117208 [04:25<00:39, 487.98it/s]
+Running loglikelihood requests:  84%|████████▎ | 98110/117208 [04:25<00:37, 505.44it/s]
+Running loglikelihood requests:  84%|████████▍ | 98203/117208 [04:25<00:35, 534.19it/s]
+Running loglikelihood requests:  84%|████████▍ | 98299/117208 [04:26<00:33, 559.89it/s]
+Running loglikelihood requests:  84%|████████▍ | 98395/117208 [04:26<00:32, 578.01it/s]
+Running loglikelihood requests:  84%|████████▍ | 98482/117208 [04:26<00:32, 574.44it/s]
+Running loglikelihood requests:  84%|████████▍ | 98572/117208 [04:26<00:32, 576.98it/s]
+Running loglikelihood requests:  84%|████████▍ | 98668/117208 [04:26<00:31, 589.96it/s]
+Running loglikelihood requests:  84%|████████▍ | 98764/117208 [04:26<00:30, 599.19it/s]
+Running loglikelihood requests:  84%|████████▍ | 98860/117208 [04:26<00:30, 605.58it/s]
+Running loglikelihood requests:  84%|████████▍ | 98956/117208 [04:27<00:29, 609.55it/s]
+Running loglikelihood requests:  85%|████████▍ | 99052/117208 [04:27<00:29, 612.12it/s]
+Running loglikelihood requests:  85%|████████▍ | 99148/117208 [04:27<00:29, 614.45it/s]
+Running loglikelihood requests:  85%|████████▍ | 99244/117208 [04:27<00:29, 616.11it/s]
+Running loglikelihood requests:  85%|████████▍ | 99306/117208 [04:27<00:32, 553.78it/s]
+Running loglikelihood requests:  85%|████████▍ | 99362/117208 [04:27<00:35, 498.25it/s]
+Running loglikelihood requests:  85%|████████▍ | 99432/117208 [04:28<00:36, 487.17it/s]
+Running loglikelihood requests:  85%|████████▍ | 99510/117208 [04:28<00:35, 493.36it/s]
+Running loglikelihood requests:  85%|████████▍ | 99591/117208 [04:28<00:34, 504.01it/s]
+Running loglikelihood requests:  85%|████████▌ | 99687/117208 [04:28<00:32, 539.13it/s]
+Running loglikelihood requests:  85%|████████▌ | 99783/117208 [04:28<00:31, 559.98it/s]
+Running loglikelihood requests:  85%|████████▌ | 99879/117208 [04:28<00:29, 578.67it/s]
+Running loglikelihood requests:  85%|████████▌ | 99963/117208 [04:28<00:30, 570.43it/s]
+Running loglikelihood requests:  85%|████████▌ | 100059/117208 [04:29<00:29, 586.60it/s]
+Running loglikelihood requests:  85%|████████▌ | 100155/117208 [04:29<00:28, 597.90it/s]
+Running loglikelihood requests:  86%|████████▌ | 100251/117208 [04:29<00:27, 605.82it/s]
+Running loglikelihood requests:  86%|████████▌ | 100347/117208 [04:29<00:27, 611.33it/s]
+Running loglikelihood requests:  86%|████████▌ | 100443/117208 [04:29<00:27, 615.28it/s]
+Running loglikelihood requests:  86%|████████▌ | 100539/117208 [04:29<00:26, 618.25it/s]
+Running loglikelihood requests:  86%|████████▌ | 100629/117208 [04:30<00:27, 609.10it/s]
+Running loglikelihood requests:  86%|████████▌ | 100690/117208 [04:30<00:34, 479.75it/s]
+Running loglikelihood requests:  86%|████████▌ | 100750/117208 [04:30<00:35, 457.31it/s]
+Running loglikelihood requests:  86%|████████▌ | 100822/117208 [04:30<00:35, 461.66it/s]
+Running loglikelihood requests:  86%|████████▌ | 100906/117208 [04:30<00:33, 486.66it/s]
+Running loglikelihood requests:  86%|████████▌ | 101002/117208 [04:30<00:30, 526.27it/s]
+Running loglikelihood requests:  86%|████████▋ | 101098/117208 [04:31<00:29, 555.02it/s]
+Running loglikelihood requests:  86%|████████▋ | 101194/117208 [04:31<00:27, 575.38it/s]
+Running loglikelihood requests:  86%|████████▋ | 101275/117208 [04:31<00:28, 563.04it/s]
+Running loglikelihood requests:  86%|████████▋ | 101356/117208 [04:31<00:28, 556.36it/s]
+Running loglikelihood requests:  87%|████████▋ | 101452/117208 [04:31<00:27, 579.33it/s]
+Running loglikelihood requests:  87%|████████▋ | 101548/117208 [04:31<00:26, 595.84it/s]
+Running loglikelihood requests:  87%|████████▋ | 101644/117208 [04:31<00:25, 607.27it/s]
+Running loglikelihood requests:  87%|████████▋ | 101740/117208 [04:32<00:25, 615.23it/s]
+Running loglikelihood requests:  87%|████████▋ | 101836/117208 [04:32<00:24, 621.54it/s]
+Running loglikelihood requests:  87%|████████▋ | 101932/117208 [04:32<00:24, 624.72it/s]
+Running loglikelihood requests:  87%|████████▋ | 102028/117208 [04:32<00:24, 627.81it/s]
+Running loglikelihood requests:  87%|████████▋ | 102091/117208 [04:32<00:26, 567.09it/s]
+Running loglikelihood requests:  87%|████████▋ | 102148/117208 [04:32<00:29, 513.34it/s]
+Running loglikelihood requests:  87%|████████▋ | 102229/117208 [04:33<00:28, 520.80it/s]
+Running loglikelihood requests:  87%|████████▋ | 102325/117208 [04:33<00:26, 555.01it/s]
+Running loglikelihood requests:  87%|████████▋ | 102418/117208 [04:33<00:25, 572.96it/s]
+Running loglikelihood requests:  87%|████████▋ | 102514/117208 [04:33<00:24, 591.01it/s]
+Running loglikelihood requests:  88%|████████▊ | 102598/117208 [04:33<00:25, 581.79it/s]
+Running loglikelihood requests:  88%|████████▊ | 102673/117208 [04:33<00:26, 558.49it/s]
+Running loglikelihood requests:  88%|████████▊ | 102769/117208 [04:33<00:24, 582.09it/s]
+Running loglikelihood requests:  88%|████████▊ | 102865/117208 [04:34<00:23, 599.03it/s]
+Running loglikelihood requests:  88%|████████▊ | 102961/117208 [04:34<00:23, 610.35it/s]
+Running loglikelihood requests:  88%|████████▊ | 103057/117208 [04:34<00:22, 618.44it/s]
+Running loglikelihood requests:  88%|████████▊ | 103153/117208 [04:34<00:22, 624.75it/s]
+Running loglikelihood requests:  88%|████████▊ | 103249/117208 [04:34<00:22, 628.45it/s]
+Running loglikelihood requests:  88%|████████▊ | 103321/117208 [04:34<00:23, 586.21it/s]
+Running loglikelihood requests:  88%|████████▊ | 103380/117208 [04:34<00:26, 530.40it/s]
+Running loglikelihood requests:  88%|████████▊ | 103433/117208 [04:35<00:28, 480.12it/s]
+Running loglikelihood requests:  88%|████████▊ | 103508/117208 [04:35<00:28, 487.75it/s]
+Running loglikelihood requests:  88%|████████▊ | 103595/117208 [04:35<00:26, 515.14it/s]
+Running loglikelihood requests:  88%|████████▊ | 103691/117208 [04:35<00:24, 552.59it/s]
+Running loglikelihood requests:  89%|████████▊ | 103787/117208 [04:35<00:23, 578.13it/s]
+Running loglikelihood requests:  89%|████████▊ | 103865/117208 [04:35<00:23, 562.79it/s]
+Running loglikelihood requests:  89%|████████▊ | 103965/117208 [04:36<00:22, 593.09it/s]
+Running loglikelihood requests:  89%|████████▉ | 104061/117208 [04:36<00:21, 607.60it/s]
+Running loglikelihood requests:  89%|████████▉ | 104157/117208 [04:36<00:21, 616.78it/s]
+Running loglikelihood requests:  89%|████████▉ | 104253/117208 [04:36<00:20, 623.69it/s]
+Running loglikelihood requests:  89%|████████▉ | 104349/117208 [04:36<00:20, 628.16it/s]
+Running loglikelihood requests:  89%|████████▉ | 104445/117208 [04:36<00:20, 632.06it/s]
+Running loglikelihood requests:  89%|████████▉ | 104509/117208 [04:36<00:22, 572.02it/s]
+Running loglikelihood requests:  89%|████████▉ | 104567/117208 [04:37<00:24, 519.92it/s]
+Running loglikelihood requests:  89%|████████▉ | 104620/117208 [04:37<00:26, 473.83it/s]
+Running loglikelihood requests:  89%|████████▉ | 104704/117208 [04:37<00:24, 500.38it/s]
+Running loglikelihood requests:  89%|████████▉ | 104800/117208 [04:37<00:22, 542.49it/s]
+Running loglikelihood requests:  89%|████████▉ | 104896/117208 [04:37<00:21, 570.99it/s]
+Running loglikelihood requests:  90%|████████▉ | 104965/117208 [04:37<00:23, 530.75it/s]
+Running loglikelihood requests:  90%|████████▉ | 105061/117208 [04:37<00:21, 556.54it/s]
+Running loglikelihood requests:  90%|████████▉ | 105157/117208 [04:38<00:20, 574.36it/s]
+Running loglikelihood requests:  90%|████████▉ | 105253/117208 [04:38<00:20, 586.53it/s]
+Running loglikelihood requests:  90%|████████▉ | 105349/117208 [04:38<00:19, 595.13it/s]
+Running loglikelihood requests:  90%|████████▉ | 105445/117208 [04:38<00:19, 601.46it/s]
+Running loglikelihood requests:  90%|█████████ | 105506/117208 [04:38<00:24, 473.26it/s]
+Running loglikelihood requests:  90%|█████████ | 105571/117208 [04:38<00:25, 459.59it/s]
+Running loglikelihood requests:  90%|█████████ | 105652/117208 [04:39<00:24, 477.20it/s]
+Running loglikelihood requests:  90%|█████████ | 105739/117208 [04:39<00:22, 500.59it/s]
+Running loglikelihood requests:  90%|█████████ | 105835/117208 [04:39<00:21, 533.69it/s]
+Running loglikelihood requests:  90%|█████████ | 105931/117208 [04:39<00:20, 557.14it/s]
+Running loglikelihood requests:  90%|█████████ | 105997/117208 [04:39<00:21, 520.96it/s]
+Running loglikelihood requests:  91%|█████████ | 106093/117208 [04:39<00:20, 549.55it/s]
+Running loglikelihood requests:  91%|█████████ | 106189/117208 [04:40<00:19, 569.71it/s]
+Running loglikelihood requests:  91%|█████████ | 106285/117208 [04:40<00:18, 583.85it/s]
+Running loglikelihood requests:  91%|█████████ | 106381/117208 [04:40<00:18, 594.13it/s]
+Running loglikelihood requests:  91%|█████████ | 106468/117208 [04:40<00:18, 585.04it/s]
+Running loglikelihood requests:  91%|█████████ | 106527/117208 [04:40<00:23, 460.65it/s]
+Running loglikelihood requests:  91%|█████████ | 106576/117208 [04:40<00:25, 423.33it/s]
+Running loglikelihood requests:  91%|█████████ | 106639/117208 [04:41<00:25, 420.41it/s]
+Running loglikelihood requests:  91%|█████████ | 106729/117208 [04:41<00:22, 466.46it/s]
+Running loglikelihood requests:  91%|█████████ | 106825/117208 [04:41<00:20, 510.52it/s]
+Running loglikelihood requests:  91%|█████████ | 106897/117208 [04:41<00:20, 511.66it/s]
+Running loglikelihood requests:  91%|█████████▏| 106993/117208 [04:41<00:18, 559.63it/s]
+Running loglikelihood requests:  91%|█████████▏| 107089/117208 [04:41<00:17, 595.04it/s]
+Running loglikelihood requests:  91%|█████████▏| 107185/117208 [04:41<00:16, 620.89it/s]
+Running loglikelihood requests:  92%|█████████▏| 107281/117208 [04:42<00:15, 639.40it/s]
+Running loglikelihood requests:  92%|█████████▏| 107368/117208 [04:42<00:15, 634.95it/s]
+Running loglikelihood requests:  92%|█████████▏| 107432/117208 [04:42<00:19, 512.59it/s]
+Running loglikelihood requests:  92%|█████████▏| 107489/117208 [04:42<00:19, 486.14it/s]
+Running loglikelihood requests:  92%|█████████▏| 107570/117208 [04:42<00:18, 512.52it/s]
+Running loglikelihood requests:  92%|█████████▏| 107657/117208 [04:42<00:17, 543.42it/s]
+Running loglikelihood requests:  92%|█████████▏| 107753/117208 [04:42<00:16, 583.65it/s]
+Running loglikelihood requests:  92%|█████████▏| 107849/117208 [04:43<00:15, 612.13it/s]
+Running loglikelihood requests:  92%|█████████▏| 107921/117208 [04:43<00:15, 585.61it/s]
+Running loglikelihood requests:  92%|█████████▏| 108017/117208 [04:43<00:14, 615.22it/s]
+Running loglikelihood requests:  92%|█████████▏| 108113/117208 [04:43<00:14, 636.20it/s]
+Running loglikelihood requests:  92%|█████████▏| 108209/117208 [04:43<00:13, 650.76it/s]
+Running loglikelihood requests:  92%|█████████▏| 108275/117208 [04:43<00:14, 600.07it/s]
+Running loglikelihood requests:  92%|█████████▏| 108336/117208 [04:44<00:18, 484.54it/s]
+Running loglikelihood requests:  92%|█████████▏| 108392/117208 [04:44<00:19, 463.85it/s]
+Running loglikelihood requests:  93%|█████████▎| 108467/117208 [04:44<00:17, 485.69it/s]
+Running loglikelihood requests:  93%|█████████▎| 108563/117208 [04:44<00:15, 542.85it/s]
+Running loglikelihood requests:  93%|█████████▎| 108626/117208 [04:44<00:16, 518.96it/s]
+Running loglikelihood requests:  93%|█████████▎| 108722/117208 [04:44<00:14, 568.17it/s]
+Running loglikelihood requests:  93%|█████████▎| 108818/117208 [04:44<00:13, 603.40it/s]
+Running loglikelihood requests:  93%|█████████▎| 108914/117208 [04:44<00:13, 628.54it/s]
+Running loglikelihood requests:  93%|█████████▎| 108978/117208 [04:45<00:14, 580.63it/s]
+Running loglikelihood requests:  93%|█████████▎| 109037/117208 [04:45<00:17, 469.45it/s]
+Running loglikelihood requests:  93%|█████████▎| 109087/117208 [04:45<00:18, 442.49it/s]
+Running loglikelihood requests:  93%|█████████▎| 109175/117208 [04:45<00:16, 496.07it/s]
+Running loglikelihood requests:  93%|█████████▎| 109271/117208 [04:45<00:14, 550.76it/s]
+Running loglikelihood requests:  93%|█████████▎| 109343/117208 [04:45<00:14, 544.78it/s]
+Running loglikelihood requests:  93%|█████████▎| 109439/117208 [04:46<00:13, 588.55it/s]
+Running loglikelihood requests:  93%|█████████▎| 109535/117208 [04:46<00:12, 620.28it/s]
+Running loglikelihood requests:  94%|█████████▎| 109598/117208 [04:46<00:13, 575.72it/s]
+Running loglikelihood requests:  94%|█████████▎| 109656/117208 [04:46<00:16, 466.29it/s]
+Running loglikelihood requests:  94%|█████████▎| 109706/117208 [04:46<00:16, 441.40it/s]
+Running loglikelihood requests:  94%|█████████▎| 109766/117208 [04:46<00:16, 442.31it/s]
+Running loglikelihood requests:  94%|█████████▎| 109844/117208 [04:46<00:15, 479.36it/s]
+Running loglikelihood requests:  94%|█████████▍| 109940/117208 [04:47<00:13, 542.46it/s]
+Running loglikelihood requests:  94%|█████████▍| 110003/117208 [04:47<00:13, 521.45it/s]
+Running loglikelihood requests:  94%|█████████▍| 110099/117208 [04:47<00:12, 574.25it/s]
+Running loglikelihood requests:  94%|█████████▍| 110195/117208 [04:47<00:11, 612.07it/s]
+Running loglikelihood requests:  94%|█████████▍| 110257/117208 [04:47<00:13, 497.34it/s]
+Running loglikelihood requests:  94%|█████████▍| 110310/117208 [04:47<00:16, 416.10it/s]
+Running loglikelihood requests:  94%|█████████▍| 110355/117208 [04:47<00:17, 396.58it/s]
+Running loglikelihood requests:  94%|█████████▍| 110442/117208 [04:48<00:14, 461.34it/s]
+Running loglikelihood requests:  94%|█████████▍| 110535/117208 [04:48<00:12, 521.07it/s]
+Running loglikelihood requests:  94%|█████████▍| 110598/117208 [04:48<00:13, 506.45it/s]
+Running loglikelihood requests:  94%|█████████▍| 110694/117208 [04:48<00:11, 563.17it/s]
+Running loglikelihood requests:  95%|█████████▍| 110781/117208 [04:48<00:10, 585.90it/s]
+Running loglikelihood requests:  95%|█████████▍| 110841/117208 [04:48<00:13, 478.54it/s]
+Running loglikelihood requests:  95%|█████████▍| 110892/117208 [04:49<00:15, 402.68it/s]
+Running loglikelihood requests:  95%|█████████▍| 110967/117208 [04:49<00:14, 442.13it/s]
+Running loglikelihood requests:  95%|█████████▍| 111051/117208 [04:49<00:12, 499.90it/s]
+Running loglikelihood requests:  95%|█████████▍| 111126/117208 [04:49<00:11, 540.33it/s]
+Running loglikelihood requests:  95%|█████████▍| 111198/117208 [04:49<00:10, 568.18it/s]
+Running loglikelihood requests:  95%|█████████▍| 111258/117208 [04:49<00:12, 495.48it/s]
+Running loglikelihood requests:  95%|█████████▍| 111311/117208 [04:49<00:13, 438.51it/s]
+Running loglikelihood requests:  95%|█████████▌| 111383/117208 [04:49<00:11, 487.01it/s]
+Running loglikelihood requests:  95%|█████████▌| 111458/117208 [04:50<00:10, 534.30it/s]
+Running loglikelihood requests:  95%|█████████▌| 111515/117208 [04:50<00:10, 528.63it/s]
+Running loglikelihood requests:  95%|█████████▌| 111570/117208 [04:50<00:13, 412.02it/s]
+Running loglikelihood requests:  95%|█████████▌| 111616/117208 [04:50<00:13, 412.88it/s]
+Running loglikelihood requests:  95%|█████████▌| 111666/117208 [04:50<00:13, 424.37it/s]
+Running loglikelihood requests:  95%|█████████▌| 111717/117208 [04:50<00:12, 437.10it/s]
+Running loglikelihood requests:  95%|█████████▌| 111792/117208 [04:50<00:10, 503.30it/s]
+Running loglikelihood requests:  95%|█████████▌| 111845/117208 [04:51<00:13, 393.85it/s]
+Running loglikelihood requests:  95%|█████████▌| 111890/117208 [04:51<00:13, 398.31it/s]
+Running loglikelihood requests:  96%|█████████▌| 111944/117208 [04:51<00:12, 423.80it/s]
+Running loglikelihood requests:  96%|█████████▌| 112010/117208 [04:51<00:11, 471.66it/s]
+Running loglikelihood requests:  96%|█████████▌| 112060/117208 [04:51<00:10, 468.04it/s]
+Running loglikelihood requests:  96%|█████████▌| 112109/117208 [04:51<00:12, 408.21it/s]
+Running loglikelihood requests:  96%|█████████▌| 112153/117208 [04:51<00:13, 363.06it/s]
+Running loglikelihood requests:  96%|█████████▌| 112198/117208 [04:51<00:13, 377.28it/s]
+Running loglikelihood requests:  96%|█████████▌| 112261/117208 [04:52<00:11, 430.67it/s]
+Running loglikelihood requests:  96%|█████████▌| 112307/117208 [04:52<00:12, 380.85it/s]
+Running loglikelihood requests:  96%|█████████▌| 112348/117208 [04:52<00:14, 340.06it/s]
+Running loglikelihood requests:  96%|█████████▌| 112384/117208 [04:52<00:14, 339.37it/s]
+Running loglikelihood requests:  96%|█████████▌| 112426/117208 [04:52<00:13, 354.25it/s]
+Running loglikelihood requests:  96%|█████████▌| 112471/117208 [04:52<00:12, 372.45it/s]
+Running loglikelihood requests:  96%|█████████▌| 112510/117208 [04:52<00:12, 370.47it/s]
+Running loglikelihood requests:  96%|█████████▌| 112548/117208 [04:52<00:14, 323.11it/s]
+Running loglikelihood requests:  96%|█████████▌| 112582/117208 [04:53<00:14, 322.67it/s]
+Running loglikelihood requests:  96%|█████████▌| 112616/117208 [04:53<00:14, 322.80it/s]
+Running loglikelihood requests:  96%|█████████▌| 112667/117208 [04:53<00:12, 366.17it/s]
+Running loglikelihood requests:  96%|█████████▌| 112705/117208 [04:53<00:12, 364.90it/s]
+Running loglikelihood requests:  96%|█████████▌| 112742/117208 [04:53<00:12, 361.69it/s]
+Running loglikelihood requests:  96%|█████████▌| 112779/117208 [04:53<00:14, 314.39it/s]
+Running loglikelihood requests:  96%|█████████▌| 112812/117208 [04:53<00:15, 279.05it/s]
+Running loglikelihood requests:  96%|█████████▋| 112842/117208 [04:53<00:15, 281.05it/s]
+Running loglikelihood requests:  96%|█████████▋| 112879/117208 [04:53<00:14, 300.69it/s]
+Running loglikelihood requests:  96%|█████████▋| 112918/117208 [04:54<00:13, 319.78it/s]
+Running loglikelihood requests:  96%|█████████▋| 112951/117208 [04:54<00:13, 318.93it/s]
+Running loglikelihood requests:  96%|█████████▋| 112984/117208 [04:54<00:13, 317.70it/s]
+Running loglikelihood requests:  96%|█████████▋| 113017/117208 [04:54<00:15, 278.68it/s]
+Running loglikelihood requests:  96%|█████████▋| 113046/117208 [04:54<00:14, 278.62it/s]
+Running loglikelihood requests:  96%|█████████▋| 113077/117208 [04:54<00:14, 281.50it/s]
+Running loglikelihood requests:  97%|█████████▋| 113106/117208 [04:54<00:14, 275.70it/s]
+Running loglikelihood requests:  97%|█████████▋| 113134/117208 [04:54<00:15, 268.75it/s]
+Running loglikelihood requests:  97%|█████████▋| 113162/117208 [04:54<00:15, 263.95it/s]
+Running loglikelihood requests:  97%|█████████▋| 113189/117208 [04:55<00:15, 258.12it/s]
+Running loglikelihood requests:  97%|█████████▋| 113215/117208 [04:55<00:15, 251.67it/s]
+Running loglikelihood requests:  97%|█████████▋| 113254/117208 [04:55<00:14, 282.30it/s]
+Running loglikelihood requests:  97%|█████████▋| 113283/117208 [04:55<00:14, 276.19it/s]
+Running loglikelihood requests:  97%|█████████▋| 113311/117208 [04:55<00:14, 269.84it/s]
+Running loglikelihood requests:  97%|█████████▋| 113339/117208 [04:55<00:14, 265.67it/s]
+Running loglikelihood requests:  97%|█████████▋| 113366/117208 [04:55<00:14, 259.96it/s]
+Running loglikelihood requests:  97%|█████████▋| 113402/117208 [04:55<00:13, 285.90it/s]
+Running loglikelihood requests:  97%|█████████▋| 113436/117208 [04:55<00:12, 301.29it/s]
+Running loglikelihood requests:  97%|█████████▋| 113467/117208 [04:56<00:12, 302.62it/s]
+Running loglikelihood requests:  97%|█████████▋| 113498/117208 [04:56<00:13, 267.34it/s]
+Running loglikelihood requests:  97%|█████████▋| 113530/117208 [04:56<00:13, 274.91it/s]
+Running loglikelihood requests:  97%|█████████▋| 113560/117208 [04:56<00:13, 277.68it/s]
+Running loglikelihood requests:  97%|█████████▋| 113589/117208 [04:56<00:12, 278.88it/s]
+Running loglikelihood requests:  97%|█████████▋| 113618/117208 [04:56<00:12, 280.32it/s]
+Running loglikelihood requests:  97%|█████████▋| 113647/117208 [04:56<00:12, 281.88it/s]
+Running loglikelihood requests:  97%|█████████▋| 113676/117208 [04:56<00:12, 283.09it/s]
+Running loglikelihood requests:  97%|█████████▋| 113705/117208 [04:56<00:12, 283.96it/s]
+Running loglikelihood requests:  97%|█████████▋| 113734/117208 [04:57<00:12, 284.62it/s]
+Running loglikelihood requests:  97%|█████████▋| 113763/117208 [04:57<00:12, 284.31it/s]
+Running loglikelihood requests:  97%|█████████▋| 113792/117208 [04:57<00:13, 249.88it/s]
+Running loglikelihood requests:  97%|█████████▋| 113818/117208 [04:57<00:13, 252.57it/s]
+Running loglikelihood requests:  97%|█████████▋| 113844/117208 [04:57<00:13, 254.53it/s]
+Running loglikelihood requests:  97%|█████████▋| 113870/117208 [04:57<00:13, 255.85it/s]
+Running loglikelihood requests:  97%|█████████▋| 113896/117208 [04:57<00:12, 256.81it/s]
+Running loglikelihood requests:  97%|█████████▋| 113922/117208 [04:57<00:12, 256.20it/s]
+Running loglikelihood requests:  97%|█████████▋| 113949/117208 [04:57<00:12, 260.16it/s]
+Running loglikelihood requests:  97%|█████████▋| 113977/117208 [04:58<00:12, 265.73it/s]
+Running loglikelihood requests:  97%|█████████▋| 114006/117208 [04:58<00:11, 272.50it/s]
+Running loglikelihood requests:  97%|█████████▋| 114036/117208 [04:58<00:11, 280.53it/s]
+Running loglikelihood requests:  97%|█████████▋| 114065/117208 [04:58<00:11, 282.51it/s]
+Running loglikelihood requests:  97%|█████████▋| 114094/117208 [04:58<00:12, 248.97it/s]
+Running loglikelihood requests:  97%|█████████▋| 114120/117208 [04:58<00:12, 251.43it/s]
+Running loglikelihood requests:  97%|█████████▋| 114146/117208 [04:58<00:12, 253.65it/s]
+Running loglikelihood requests:  97%|█████████▋| 114174/117208 [04:58<00:11, 261.04it/s]
+Running loglikelihood requests:  97%|█████████▋| 114201/117208 [04:58<00:11, 263.05it/s]
+Running loglikelihood requests:  97%|█████████▋| 114233/117208 [04:59<00:12, 245.55it/s]
+Running loglikelihood requests:  97%|█████████▋| 114263/117208 [04:59<00:11, 260.12it/s]
+Running loglikelihood requests:  98%|█████████▊| 114290/117208 [04:59<00:11, 262.63it/s]
+Running loglikelihood requests:  98%|█████████▊| 114317/117208 [04:59<00:12, 232.88it/s]
+Running loglikelihood requests:  98%|█████████▊| 114346/117208 [04:59<00:11, 247.78it/s]
+Running loglikelihood requests:  98%|█████████▊| 114377/117208 [04:59<00:10, 264.62it/s]
+Running loglikelihood requests:  98%|█████████▊| 114405/117208 [04:59<00:11, 237.73it/s]
+Running loglikelihood requests:  98%|█████████▊| 114435/117208 [04:59<00:10, 253.96it/s]
+Running loglikelihood requests:  98%|█████████▊| 114463/117208 [04:59<00:10, 261.02it/s]
+Running loglikelihood requests:  98%|█████████▊| 114490/117208 [05:00<00:11, 232.71it/s]
+Running loglikelihood requests:  98%|█████████▊| 114516/117208 [05:00<00:11, 239.79it/s]
+Running loglikelihood requests:  98%|█████████▊| 114547/117208 [05:00<00:10, 258.76it/s]
+Running loglikelihood requests:  98%|█████████▊| 114577/117208 [05:00<00:09, 270.18it/s]
+Running loglikelihood requests:  98%|█████████▊| 114606/117208 [05:00<00:10, 243.43it/s]
+Running loglikelihood requests:  98%|█████████▊| 114637/117208 [05:00<00:09, 261.00it/s]
+Running loglikelihood requests:  98%|█████████▊| 114669/117208 [05:00<00:10, 245.07it/s]
+Running loglikelihood requests:  98%|█████████▊| 114698/117208 [05:00<00:09, 256.61it/s]
+Running loglikelihood requests:  98%|█████████▊| 114727/117208 [05:00<00:09, 265.49it/s]
+Running loglikelihood requests:  98%|█████████▊| 114756/117208 [05:01<00:10, 240.93it/s]
+Running loglikelihood requests:  98%|█████████▊| 114784/117208 [05:01<00:09, 251.05it/s]
+Running loglikelihood requests:  98%|█████████▊| 114817/117208 [05:01<00:08, 272.28it/s]
+Running loglikelihood requests:  98%|█████████▊| 114845/117208 [05:01<00:09, 242.92it/s]
+Running loglikelihood requests:  98%|█████████▊| 114875/117208 [05:01<00:09, 257.69it/s]
+Running loglikelihood requests:  98%|█████████▊| 114904/117208 [05:01<00:09, 237.75it/s]
+Running loglikelihood requests:  98%|█████████▊| 114943/117208 [05:01<00:09, 247.50it/s]
+Running loglikelihood requests:  98%|█████████▊| 114986/117208 [05:01<00:07, 292.47it/s]
+Running loglikelihood requests:  98%|█████████▊| 115017/117208 [05:02<00:08, 266.17it/s]
+Running loglikelihood requests:  98%|█████████▊| 115051/117208 [05:02<00:08, 255.91it/s]
+Running loglikelihood requests:  98%|█████████▊| 115087/117208 [05:02<00:08, 253.12it/s]
+Running loglikelihood requests:  98%|█████████▊| 115123/117208 [05:02<00:08, 251.52it/s]
+Running loglikelihood requests:  98%|█████████▊| 115159/117208 [05:02<00:08, 249.87it/s]
+Running loglikelihood requests:  98%|█████████▊| 115192/117208 [05:02<00:07, 268.45it/s]
+Running loglikelihood requests:  98%|█████████▊| 115220/117208 [05:02<00:08, 238.80it/s]
+Running loglikelihood requests:  98%|█████████▊| 115249/117208 [05:03<00:07, 247.98it/s]
+Running loglikelihood requests:  98%|█████████▊| 115279/117208 [05:03<00:07, 257.77it/s]
+Running loglikelihood requests:  98%|█████████▊| 115306/117208 [05:03<00:07, 258.06it/s]
+Running loglikelihood requests:  98%|█████████▊| 115334/117208 [05:03<00:07, 260.76it/s]
+Running loglikelihood requests:  98%|█████████▊| 115361/117208 [05:03<00:07, 261.99it/s]
+Running loglikelihood requests:  98%|█████████▊| 115394/117208 [05:03<00:07, 248.31it/s]
+Running loglikelihood requests:  98%|█████████▊| 115427/117208 [05:03<00:06, 269.63it/s]
+Running loglikelihood requests:  99%|█████████▊| 115455/117208 [05:03<00:07, 241.58it/s]
+Running loglikelihood requests:  99%|█████████▊| 115487/117208 [05:03<00:06, 261.61it/s]
+Running loglikelihood requests:  99%|█████████▊| 115516/117208 [05:04<00:06, 269.12it/s]
+Running loglikelihood requests:  99%|█████████▊| 115545/117208 [05:04<00:06, 243.13it/s]
+Running loglikelihood requests:  99%|█████████▊| 115576/117208 [05:04<00:06, 260.41it/s]
+Running loglikelihood requests:  99%|█████████▊| 115606/117208 [05:04<00:06, 240.70it/s]
+Running loglikelihood requests:  99%|█████████▊| 115641/117208 [05:04<00:05, 268.50it/s]
+Running loglikelihood requests:  99%|█████████▊| 115670/117208 [05:04<00:06, 244.69it/s]
+Running loglikelihood requests:  99%|█████████▊| 115705/117208 [05:04<00:06, 243.44it/s]
+Running loglikelihood requests:  99%|█████████▉| 115746/117208 [05:04<00:05, 255.76it/s]
+Running loglikelihood requests:  99%|█████████▉| 115790/117208 [05:05<00:05, 269.94it/s]
+Running loglikelihood requests:  99%|█████████▉| 115833/117208 [05:05<00:04, 277.60it/s]
+Running loglikelihood requests:  99%|█████████▉| 115861/117208 [05:05<00:04, 275.83it/s]
+Running loglikelihood requests:  99%|█████████▉| 115889/117208 [05:05<00:04, 273.31it/s]
+Running loglikelihood requests:  99%|█████████▉| 115917/117208 [05:05<00:05, 246.72it/s]
+Running loglikelihood requests:  99%|█████████▉| 115953/117208 [05:05<00:05, 247.65it/s]
+Running loglikelihood requests:  99%|█████████▉| 115979/117208 [05:05<00:04, 249.02it/s]
+Running loglikelihood requests:  99%|█████████▉| 116010/117208 [05:05<00:04, 262.36it/s]
+Running loglikelihood requests:  99%|█████████▉| 116038/117208 [05:06<00:04, 265.10it/s]
+Running loglikelihood requests:  99%|█████████▉| 116065/117208 [05:06<00:04, 264.30it/s]
+Running loglikelihood requests:  99%|█████████▉| 116094/117208 [05:06<00:04, 271.40it/s]
+Running loglikelihood requests:  99%|█████████▉| 116122/117208 [05:06<00:04, 238.67it/s]
+Running loglikelihood requests:  99%|█████████▉| 116147/117208 [05:06<00:04, 240.10it/s]
+Running loglikelihood requests:  99%|█████████▉| 116172/117208 [05:06<00:04, 240.89it/s]
+Running loglikelihood requests:  99%|█████████▉| 116197/117208 [05:06<00:04, 241.60it/s]
+Running loglikelihood requests:  99%|█████████▉| 116225/117208 [05:06<00:03, 252.39it/s]
+Running loglikelihood requests:  99%|█████████▉| 116251/117208 [05:06<00:03, 252.67it/s]
+Running loglikelihood requests:  99%|█████████▉| 116277/117208 [05:07<00:03, 252.77it/s]
+Running loglikelihood requests:  99%|█████████▉| 116303/117208 [05:07<00:03, 253.03it/s]
+Running loglikelihood requests:  99%|█████████▉| 116329/117208 [05:07<00:03, 253.29it/s]
+Running loglikelihood requests:  99%|█████████▉| 116355/117208 [05:07<00:03, 253.60it/s]
+Running loglikelihood requests:  99%|█████████▉| 116381/117208 [05:07<00:03, 253.63it/s]
+Running loglikelihood requests:  99%|█████████▉| 116407/117208 [05:07<00:03, 218.67it/s]
+Running loglikelihood requests:  99%|█████████▉| 116430/117208 [05:07<00:03, 219.90it/s]
+Running loglikelihood requests:  99%|█████████▉| 116454/117208 [05:07<00:03, 223.94it/s]
+Running loglikelihood requests:  99%|█████████▉| 116482/117208 [05:07<00:03, 237.40it/s]
+Running loglikelihood requests:  99%|█████████▉| 116511/117208 [05:07<00:02, 250.14it/s]
+Running loglikelihood requests:  99%|█████████▉| 116541/117208 [05:08<00:02, 261.95it/s]
+Running loglikelihood requests:  99%|█████████▉| 116568/117208 [05:08<00:02, 262.17it/s]
+Running loglikelihood requests:  99%|█████████▉| 116595/117208 [05:08<00:02, 262.28it/s]
+Running loglikelihood requests: 100%|█████████▉| 116622/117208 [05:08<00:02, 262.42it/s]
+Running loglikelihood requests: 100%|█████████▉| 116649/117208 [05:08<00:02, 229.30it/s]
+Running loglikelihood requests: 100%|█████████▉| 116673/117208 [05:08<00:02, 230.79it/s]
+Running loglikelihood requests: 100%|█████████▉| 116697/117208 [05:08<00:02, 231.92it/s]
+Running loglikelihood requests: 100%|█████████▉| 116725/117208 [05:08<00:01, 242.67it/s]
+Running loglikelihood requests: 100%|█████████▉| 116752/117208 [05:08<00:01, 250.36it/s]
+Running loglikelihood requests: 100%|█████████▉| 116791/117208 [05:09<00:01, 256.57it/s]
+Running loglikelihood requests: 100%|█████████▉| 116827/117208 [05:09<00:01, 253.62it/s]
+Running loglikelihood requests: 100%|█████████▉| 116863/117208 [05:09<00:01, 253.70it/s]
+Running loglikelihood requests: 100%|█████████▉| 116909/117208 [05:09<00:01, 275.34it/s]
+Running loglikelihood requests: 100%|█████████▉| 116948/117208 [05:09<00:00, 275.07it/s]
+Running loglikelihood requests: 100%|█████████▉| 116987/117208 [05:09<00:00, 271.36it/s]
+Running loglikelihood requests: 100%|█████████▉| 117028/117208 [05:09<00:00, 274.52it/s]
+Running loglikelihood requests: 100%|█████████▉| 117075/117208 [05:10<00:00, 288.81it/s]
+Running loglikelihood requests: 100%|█████████▉| 117120/117208 [05:10<00:00, 293.24it/s]
+Running loglikelihood requests: 100%|█████████▉| 117169/117208 [05:10<00:00, 338.16it/s]
+Running loglikelihood requests: 100%|█████████▉| 117205/117208 [05:10<00:00, 295.86it/s]
+Running loglikelihood requests: 100%|██████████| 117208/117208 [05:10<00:00, 377.42it/s]
+hf ({'pretrained': '/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints'}), gen_kwargs: ({}), limit: 400.0, num_fewshot: None, batch_size: 12
+|                       Tasks                        |Version|Filter|n-shot| Metric |   |Value |   |Stderr|
+|----------------------------------------------------|------:|------|-----:|--------|---|-----:|---|------|
+|arc_challenge                                       |      1|none  |     0|acc     |↑  |0.4825|±  |0.0250|
+|                                                    |       |none  |     0|acc_norm|↑  |0.5325|±  |0.0250|
+|arc_easy                                            |      1|none  |     0|acc     |↑  |0.7800|±  |0.0207|
+|                                                    |       |none  |     0|acc_norm|↑  |0.7950|±  |0.0202|
+|hellaswag                                           |      1|none  |     0|acc     |↑  |0.4975|±  |0.0250|
+|                                                    |       |none  |     0|acc_norm|↑  |0.6300|±  |0.0242|
+|kmmlu                                               |      2|none  |      |acc     |↑  |0.4693|±  |0.0039|
+| - kmmlu_applied_science                            |      2|none  |      |acc     |↑  |0.4537|±  |0.0071|
+|  - kmmlu_aviation_engineering_and_maintenance      |      2|none  |     0|acc     |↑  |0.4600|±  |0.0250|
+|  - kmmlu_electronics_engineering                   |      2|none  |     0|acc     |↑  |0.6275|±  |0.0242|
+|  - kmmlu_energy_management                         |      2|none  |     0|acc     |↑  |0.3950|±  |0.0245|
+|  - kmmlu_environmental_science                     |      2|none  |     0|acc     |↑  |0.3700|±  |0.0242|
+|  - kmmlu_gas_technology_and_engineering            |      2|none  |     0|acc     |↑  |0.4050|±  |0.0246|
+|  - kmmlu_geomatics                                 |      2|none  |     0|acc     |↑  |0.4250|±  |0.0247|
+|  - kmmlu_industrial_engineer                       |      2|none  |     0|acc     |↑  |0.4275|±  |0.0248|
+|  - kmmlu_machine_design_and_manufacturing          |      2|none  |     0|acc     |↑  |0.4975|±  |0.0250|
+|  - kmmlu_maritime_engineering                      |      2|none  |     0|acc     |↑  |0.4075|±  |0.0246|
+|  - kmmlu_nondestructive_testing                    |      2|none  |     0|acc     |↑  |0.4750|±  |0.0250|
+|  - kmmlu_railway_and_automotive_engineering        |      2|none  |     0|acc     |↑  |0.3825|±  |0.0243|
+|  - kmmlu_telecommunications_and_wireless_technology|      2|none  |     0|acc     |↑  |0.5725|±  |0.0248|
+| - kmmlu_humss                                      |      2|none  |      |acc     |↑  |0.4777|±  |0.0094|
+|  - kmmlu_accounting                                |      2|none  |     0|acc     |↑  |0.5000|±  |0.0503|
+|  - kmmlu_criminal_law                              |      2|none  |     0|acc     |↑  |0.3900|±  |0.0346|
+|  - kmmlu_economics                                 |      2|none  |     0|acc     |↑  |0.5462|±  |0.0438|
+|  - kmmlu_education                                 |      2|none  |     0|acc     |↑  |0.6400|±  |0.0482|
+|  - kmmlu_korean_history                            |      2|none  |     0|acc     |↑  |0.3000|±  |0.0461|
+|  - kmmlu_law                                       |      2|none  |     0|acc     |↑  |0.3750|±  |0.0242|
+|  - kmmlu_management                                |      2|none  |     0|acc     |↑  |0.5225|±  |0.0250|
+|  - kmmlu_political_science_and_sociology           |      2|none  |     0|acc     |↑  |0.5500|±  |0.0288|
+|  - kmmlu_psychology                                |      2|none  |     0|acc     |↑  |0.4500|±  |0.0249|
+|  - kmmlu_social_welfare                            |      2|none  |     0|acc     |↑  |0.5700|±  |0.0248|
+|  - kmmlu_taxation                                  |      2|none  |     0|acc     |↑  |0.3950|±  |0.0347|
+| - kmmlu_other                                      |      2|none  |      |acc     |↑  |0.4697|±  |0.0080|
+|  - kmmlu_agricultural_sciences                     |      2|none  |     0|acc     |↑  |0.3625|±  |0.0241|
+|  - kmmlu_construction                              |      2|none  |     0|acc     |↑  |0.4000|±  |0.0245|
+|  - kmmlu_fashion                                   |      2|none  |     0|acc     |↑  |0.4500|±  |0.0249|
+|  - kmmlu_food_processing                           |      2|none  |     0|acc     |↑  |0.3675|±  |0.0241|
+|  - kmmlu_health                                    |      2|none  |     0|acc     |↑  |0.5800|±  |0.0496|
+|  - kmmlu_interior_architecture_and_design          |      2|none  |     0|acc     |↑  |0.6175|±  |0.0243|
+|  - kmmlu_marketing                                 |      2|none  |     0|acc     |↑  |0.7650|±  |0.0212|
+|  - kmmlu_patent                                    |      2|none  |     0|acc     |↑  |0.4200|±  |0.0496|
+|  - kmmlu_public_safety                             |      2|none  |     0|acc     |↑  |0.3800|±  |0.0243|
+|  - kmmlu_real_estate                               |      2|none  |     0|acc     |↑  |0.4500|±  |0.0353|
+|  - kmmlu_refrigerating_machinery                   |      2|none  |     0|acc     |↑  |0.4100|±  |0.0246|
+| - kmmlu_stem                                       |      2|none  |      |acc     |↑  |0.4809|±  |0.0073|
+|  - kmmlu_biology                                   |      2|none  |     0|acc     |↑  |0.3125|±  |0.0232|
+|  - kmmlu_chemical_engineering                      |      2|none  |     0|acc     |↑  |0.4875|±  |0.0250|
+|  - kmmlu_chemistry                                 |      2|none  |     0|acc     |↑  |0.5175|±  |0.0250|
+|  - kmmlu_civil_engineering                         |      2|none  |     0|acc     |↑  |0.3925|±  |0.0244|
+|  - kmmlu_computer_science                          |      2|none  |     0|acc     |↑  |0.7400|±  |0.0220|
+|  - kmmlu_ecology                                   |      2|none  |     0|acc     |↑  |0.5050|±  |0.0250|
+|  - kmmlu_electrical_engineering                    |      2|none  |     0|acc     |↑  |0.3425|±  |0.0238|
+|  - kmmlu_information_technology                    |      2|none  |     0|acc     |↑  |0.7525|±  |0.0216|
+|  - kmmlu_materials_engineering                     |      2|none  |     0|acc     |↑  |0.4750|±  |0.0250|
+|  - kmmlu_math                                      |      2|none  |     0|acc     |↑  |0.3333|±  |0.0273|
+|  - kmmlu_mechanical_engineering                    |      2|none  |     0|acc     |↑  |0.3950|±  |0.0245|
+|kobest_boolq                                        |      1|none  |     0|acc     |↑  |0.7550|±  |0.0215|
+|                                                    |       |none  |     0|f1      |↑  |0.7380|±  |   N/A|
+|kobest_copa                                         |      1|none  |     0|acc     |↑  |0.6525|±  |0.0238|
+|                                                    |       |none  |     0|f1      |↑  |0.6524|±  |   N/A|
+|kobest_hellaswag                                    |      1|none  |     0|acc     |↑  |0.4325|±  |0.0248|
+|                                                    |       |none  |     0|acc_norm|↑  |0.5650|±  |0.0248|
+|                                                    |       |none  |     0|f1      |↑  |0.4265|±  |   N/A|
+|mmlu                                                |      2|none  |      |acc     |↑  |0.7353|±  |0.0039|
+| - humanities                                       |      2|none  |      |acc     |↑  |0.6863|±  |0.0078|
+|  - formal_logic                                    |      1|none  |     0|acc     |↑  |0.5873|±  |0.0440|
+|  - high_school_european_history                    |      1|none  |     0|acc     |↑  |0.7818|±  |0.0323|
+|  - high_school_us_history                          |      1|none  |     0|acc     |↑  |0.8186|±  |0.0270|
+|  - high_school_world_history                       |      1|none  |     0|acc     |↑  |0.8481|±  |0.0234|
+|  - international_law                               |      1|none  |     0|acc     |↑  |0.8264|±  |0.0346|
+|  - jurisprudence                                   |      1|none  |     0|acc     |↑  |0.8148|±  |0.0376|
+|  - logical_fallacies                               |      1|none  |     0|acc     |↑  |0.8466|±  |0.0283|
+|  - moral_disputes                                  |      1|none  |     0|acc     |↑  |0.7543|±  |0.0232|
+|  - moral_scenarios                                 |      1|none  |     0|acc     |↑  |0.3225|±  |0.0234|
+|  - philosophy                                      |      1|none  |     0|acc     |↑  |0.7331|±  |0.0251|
+|  - prehistory                                      |      1|none  |     0|acc     |↑  |0.7870|±  |0.0228|
+|  - professional_law                                |      1|none  |     0|acc     |↑  |0.5075|±  |0.0250|
+|  - world_religions                                 |      1|none  |     0|acc     |↑  |0.8070|±  |0.0303|
+| - other                                            |      2|none  |      |acc     |↑  |0.7416|±  |0.0081|
+|  - business_ethics                                 |      1|none  |     0|acc     |↑  |0.7600|±  |0.0429|
+|  - clinical_knowledge                              |      1|none  |     0|acc     |↑  |0.7698|±  |0.0259|
+|  - college_medicine                                |      1|none  |     0|acc     |↑  |0.7457|±  |0.0332|
+|  - global_facts                                    |      1|none  |     0|acc     |↑  |0.4400|±  |0.0499|
+|  - human_aging                                     |      1|none  |     0|acc     |↑  |0.7399|±  |0.0294|
+|  - management                                      |      1|none  |     0|acc     |↑  |0.8641|±  |0.0339|
+|  - marketing                                       |      1|none  |     0|acc     |↑  |0.8932|±  |0.0202|
+|  - medical_genetics                                |      1|none  |     0|acc     |↑  |0.8000|±  |0.0402|
+|  - miscellaneous                                   |      1|none  |     0|acc     |↑  |0.8225|±  |0.0191|
+|  - nutrition                                       |      1|none  |     0|acc     |↑  |0.7778|±  |0.0238|
+|  - professional_accounting                         |      1|none  |     0|acc     |↑  |0.5745|±  |0.0295|
+|  - professional_medicine                           |      1|none  |     0|acc     |↑  |0.7757|±  |0.0253|
+|  - virology                                        |      1|none  |     0|acc     |↑  |0.5060|±  |0.0389|
+| - social sciences                                  |      2|none  |      |acc     |↑  |0.8158|±  |0.0073|
+|  - econometrics                                    |      1|none  |     0|acc     |↑  |0.6579|±  |0.0446|
+|  - high_school_geography                           |      1|none  |     0|acc     |↑  |0.8586|±  |0.0248|
+|  - high_school_government_and_politics             |      1|none  |     0|acc     |↑  |0.8705|±  |0.0242|
+|  - high_school_macroeconomics                      |      1|none  |     0|acc     |↑  |0.8077|±  |0.0200|
+|  - high_school_microeconomics                      |      1|none  |     0|acc     |↑  |0.8992|±  |0.0196|
+|  - high_school_psychology                          |      1|none  |     0|acc     |↑  |0.9050|±  |0.0147|
+|  - human_sexuality                                 |      1|none  |     0|acc     |↑  |0.7786|±  |0.0364|
+|  - professional_psychology                         |      1|none  |     0|acc     |↑  |0.7400|±  |0.0220|
+|  - public_relations                                |      1|none  |     0|acc     |↑  |0.6727|±  |0.0449|
+|  - security_studies                                |      1|none  |     0|acc     |↑  |0.7429|±  |0.0280|
+|  - sociology                                       |      1|none  |     0|acc     |↑  |0.8557|±  |0.0248|
+|  - us_foreign_policy                               |      1|none  |     0|acc     |↑  |0.8900|±  |0.0314|
+| - stem                                             |      2|none  |      |acc     |↑  |0.7082|±  |0.0078|
+|  - abstract_algebra                                |      1|none  |     0|acc     |↑  |0.4600|±  |0.0501|
+|  - anatomy                                         |      1|none  |     0|acc     |↑  |0.7111|±  |0.0392|
+|  - astronomy                                       |      1|none  |     0|acc     |↑  |0.8487|±  |0.0292|
+|  - college_biology                                 |      1|none  |     0|acc     |↑  |0.8264|±  |0.0317|
+|  - college_chemistry                               |      1|none  |     0|acc     |↑  |0.5200|±  |0.0502|
+|  - college_computer_science                        |      1|none  |     0|acc     |↑  |0.6800|±  |0.0469|
+|  - college_mathematics                             |      1|none  |     0|acc     |↑  |0.5300|±  |0.0502|
+|  - college_physics                                 |      1|none  |     0|acc     |↑  |0.5784|±  |0.0491|
+|  - computer_security                               |      1|none  |     0|acc     |↑  |0.8300|±  |0.0378|
+|  - conceptual_physics                              |      1|none  |     0|acc     |↑  |0.8000|±  |0.0261|
+|  - electrical_engineering                          |      1|none  |     0|acc     |↑  |0.7586|±  |0.0357|
+|  - elementary_mathematics                          |      1|none  |     0|acc     |↑  |0.6746|±  |0.0241|
+|  - high_school_biology                             |      1|none  |     0|acc     |↑  |0.9000|±  |0.0171|
+|  - high_school_chemistry                           |      1|none  |     0|acc     |↑  |0.7291|±  |0.0313|
+|  - high_school_computer_science                    |      1|none  |     0|acc     |↑  |0.8500|±  |0.0359|
+|  - high_school_mathematics                         |      1|none  |     0|acc     |↑  |0.5296|±  |0.0304|
+|  - high_school_physics                             |      1|none  |     0|acc     |↑  |0.6755|±  |0.0382|
+|  - high_school_statistics                          |      1|none  |     0|acc     |↑  |0.7037|±  |0.0311|
+|  - machine_learning                                |      1|none  |     0|acc     |↑  |0.5893|±  |0.0467|
+|winogrande                                          |      1|none  |     0|acc     |↑  |0.7225|±  |0.0224|
+
+|         Groups         |Version|Filter|n-shot|Metric|   |Value |   |Stderr|
+|------------------------|------:|------|------|------|---|-----:|---|-----:|
+|kmmlu                   |      2|none  |      |acc   |↑  |0.4693|±  |0.0039|
+| - kmmlu_applied_science|      2|none  |      |acc   |↑  |0.4537|±  |0.0071|
+| - kmmlu_humss          |      2|none  |      |acc   |↑  |0.4777|±  |0.0094|
+| - kmmlu_other          |      2|none  |      |acc   |↑  |0.4697|±  |0.0080|
+| - kmmlu_stem           |      2|none  |      |acc   |↑  |0.4809|±  |0.0073|
+|mmlu                    |      2|none  |      |acc   |↑  |0.7353|±  |0.0039|
+| - humanities           |      2|none  |      |acc   |↑  |0.6863|±  |0.0078|
+| - other                |      2|none  |      |acc   |↑  |0.7416|±  |0.0081|
+| - social sciences      |      2|none  |      |acc   |↑  |0.8158|±  |0.0073|
+| - stem                 |      2|none  |      |acc   |↑  |0.7082|±  |0.0078|
+
diff --git a/eval/summary.json b/eval/summary.json
new file mode 100644
index 0000000..fc81e75
--- /dev/null
+++ b/eval/summary.json
@@ -0,0 +1,8 @@
+[
+  {
+    "label": "final",
+    "ppl": 5.476611910214701,
+    "base_ppl": 6.23081716116593,
+    "path": "/home/unsloth/scp_stage1_cpt/artifacts/cpt_full_96gb_qwen3_4b/checkpoints"
+  }
+]
\ No newline at end of file
diff --git a/eval_results.json b/eval_results.json
new file mode 100644
index 0000000..5bf1680
--- /dev/null
+++ b/eval_results.json
@@ -0,0 +1,7 @@
+{
+    "epoch": 1.0,
+    "eval_loss": 1.7002116441726685,
+    "eval_runtime": 173.1669,
+    "eval_samples_per_second": 5.526,
+    "eval_steps_per_second": 0.693
+}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..43f602b
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "eos_token_id": [
+    151643
+  ],
+  "max_length": 32768,
+  "max_new_tokens": 2048,
+  "pad_token_id": 151669,
+  "transformers_version": "5.5.3"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000..3e5ceb1
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3a4a2abfa757af075d0fa804b2093ad46c4d9bc4a227a070907a885eea69e97
+size 8044982080
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..73037fe
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45c4ffda6666cf6d75d0b1f961f25964e2a52a62e78aaecb2f458e9ba9824112
+size 11422840
diff --git a/tokenizer/tokenizer.json b/tokenizer/tokenizer.json
new file mode 100644
index 0000000..73037fe
--- /dev/null
+++ b/tokenizer/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45c4ffda6666cf6d75d0b1f961f25964e2a52a62e78aaecb2f458e9ba9824112
+size 11422840
diff --git a/tokenizer/tokenizer_config.json b/tokenizer/tokenizer_config.json
new file mode 100644
index 0000000..1f1e6cf
--- /dev/null
+++ b/tokenizer/tokenizer_config.json
@@ -0,0 +1,15 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 32768,
+  "pad_token": "<|PAD_TOKEN|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..1f1e6cf
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,15 @@
+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 32768,
+  "pad_token": "<|PAD_TOKEN|>",
+  "padding_side": "left",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}
diff --git a/train_results.json b/train_results.json
new file mode 100644
index 0000000..d037da7
--- /dev/null
+++ b/train_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 1.0,
+    "total_flos": 2.103177196962902e+18,
+    "train_loss": 1.7256613558986822,
+    "train_runtime": 29239.084,
+    "train_samples_per_second": 1.616,
+    "train_steps_per_second": 0.051
+}
\ No newline at end of file
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000..87bdcbc
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,1096 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1477,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.006771626883358727,
+      "grad_norm": 1.5234375,
+      "learning_rate": 6.081081081081082e-07,
+      "loss": 1.8358331680297852,
+      "step": 10
+    },
+    {
+      "epoch": 0.013543253766717453,
+      "grad_norm": 1.5078125,
+      "learning_rate": 1.2837837837837838e-06,
+      "loss": 1.840726089477539,
+      "step": 20
+    },
+    {
+      "epoch": 0.02031488065007618,
+      "grad_norm": 1.0859375,
+      "learning_rate": 1.9594594594594595e-06,
+      "loss": 1.8267410278320313,
+      "step": 30
+    },
+    {
+      "epoch": 0.027086507533434907,
+      "grad_norm": 1.1640625,
+      "learning_rate": 2.6351351351351353e-06,
+      "loss": 1.8383310317993165,
+      "step": 40
+    },
+    {
+      "epoch": 0.03385813441679363,
+      "grad_norm": 1.0859375,
+      "learning_rate": 3.310810810810811e-06,
+      "loss": 1.8384885787963867,
+      "step": 50
+    },
+    {
+      "epoch": 0.04062976130015236,
+      "grad_norm": 1.03125,
+      "learning_rate": 3.986486486486487e-06,
+      "loss": 1.8087802886962892,
+      "step": 60
+    },
+    {
+      "epoch": 0.04740138818351109,
+      "grad_norm": 1.015625,
+      "learning_rate": 4.6621621621621625e-06,
+      "loss": 1.8259227752685547,
+      "step": 70
+    },
+    {
+      "epoch": 0.05417301506686981,
+      "grad_norm": 1.046875,
+      "learning_rate": 5.337837837837838e-06,
+      "loss": 1.8241001129150392,
+      "step": 80
+    },
+    {
+      "epoch": 0.06094464195022854,
+      "grad_norm": 0.96484375,
+      "learning_rate": 6.013513513513514e-06,
+      "loss": 1.82220516204834,
+      "step": 90
+    },
+    {
+      "epoch": 0.06771626883358726,
+      "grad_norm": 0.953125,
+      "learning_rate": 6.689189189189191e-06,
+      "loss": 1.7921783447265625,
+      "step": 100
+    },
+    {
+      "epoch": 0.074487895716946,
+      "grad_norm": 0.9296875,
+      "learning_rate": 7.3648648648648655e-06,
+      "loss": 1.797548484802246,
+      "step": 110
+    },
+    {
+      "epoch": 0.08125952260030472,
+      "grad_norm": 0.89453125,
+      "learning_rate": 8.040540540540541e-06,
+      "loss": 1.7889528274536133,
+      "step": 120
+    },
+    {
+      "epoch": 0.08803114948366345,
+      "grad_norm": 0.90234375,
+      "learning_rate": 8.716216216216217e-06,
+      "loss": 1.7663179397583009,
+      "step": 130
+    },
+    {
+      "epoch": 0.09480277636702218,
+      "grad_norm": 0.89453125,
+      "learning_rate": 9.391891891891893e-06,
+      "loss": 1.7635225296020507,
+      "step": 140
+    },
+    {
+      "epoch": 0.1015744032503809,
+      "grad_norm": 0.91015625,
+      "learning_rate": 9.999986030219255e-06,
+      "loss": 1.7774492263793946,
+      "step": 150
+    },
+    {
+      "epoch": 0.10834603013373963,
+      "grad_norm": 0.91796875,
+      "learning_rate": 9.998309750982693e-06,
+      "loss": 1.7622718811035156,
+      "step": 160
+    },
+    {
+      "epoch": 0.11511765701709836,
+      "grad_norm": 0.890625,
+      "learning_rate": 9.993840588849743e-06,
+      "loss": 1.7750001907348634,
+      "step": 170
+    },
+    {
+      "epoch": 0.12188928390045708,
+      "grad_norm": 0.890625,
+      "learning_rate": 9.986581041033881e-06,
+      "loss": 1.767216110229492,
+      "step": 180
+    },
+    {
+      "epoch": 0.1286609107838158,
+      "grad_norm": 0.921875,
+      "learning_rate": 9.976535163919757e-06,
+      "loss": 1.7609657287597655,
+      "step": 190
+    },
+    {
+      "epoch": 0.13543253766717453,
+      "grad_norm": 0.87109375,
+      "learning_rate": 9.96370857079661e-06,
+      "loss": 1.7535722732543946,
+      "step": 200
+    },
+    {
+      "epoch": 0.14220416455053325,
+      "grad_norm": 0.86328125,
+      "learning_rate": 9.948108428721782e-06,
+      "loss": 1.7395360946655274,
+      "step": 210
+    },
+    {
+      "epoch": 0.148975791433892,
+      "grad_norm": 0.88671875,
+      "learning_rate": 9.92974345451598e-06,
+      "loss": 1.7465991973876953,
+      "step": 220
+    },
+    {
+      "epoch": 0.15574741831725072,
+      "grad_norm": 0.87890625,
+      "learning_rate": 9.908623909892651e-06,
+      "loss": 1.7506902694702149,
+      "step": 230
+    },
+    {
+      "epoch": 0.16251904520060945,
+      "grad_norm": 0.8984375,
+      "learning_rate": 9.884761595724068e-06,
+      "loss": 1.7368896484375,
+      "step": 240
+    },
+    {
+      "epoch": 0.16929067208396817,
+      "grad_norm": 0.8671875,
+      "learning_rate": 9.858169845447417e-06,
+      "loss": 1.7515613555908203,
+      "step": 250
+    },
+    {
+      "epoch": 0.1760622989673269,
+      "grad_norm": 0.85546875,
+      "learning_rate": 9.828863517614533e-06,
+      "loss": 1.7509956359863281,
+      "step": 260
+    },
+    {
+      "epoch": 0.1828339258506856,
+      "grad_norm": 0.9140625,
+      "learning_rate": 9.796858987589462e-06,
+      "loss": 1.753628921508789,
+      "step": 270
+    },
+    {
+      "epoch": 0.18960555273404436,
+      "grad_norm": 0.85546875,
+      "learning_rate": 9.762174138398456e-06,
+      "loss": 1.7379936218261718,
+      "step": 280
+    },
+    {
+      "epoch": 0.19637717961740309,
+      "grad_norm": 0.88671875,
+      "learning_rate": 9.724828350737574e-06,
+      "loss": 1.7442964553833007,
+      "step": 290
+    },
+    {
+      "epoch": 0.2031488065007618,
+      "grad_norm": 0.87109375,
+      "learning_rate": 9.684842492143399e-06,
+      "loss": 1.7366142272949219,
+      "step": 300
+    },
+    {
+      "epoch": 0.20992043338412053,
+      "grad_norm": 0.84765625,
+      "learning_rate": 9.642238905333e-06,
+      "loss": 1.7396051406860351,
+      "step": 310
+    },
+    {
+      "epoch": 0.21669206026747925,
+      "grad_norm": 0.87109375,
+      "learning_rate": 9.597041395719573e-06,
+      "loss": 1.732611083984375,
+      "step": 320
+    },
+    {
+      "epoch": 0.22346368715083798,
+      "grad_norm": 0.8828125,
+      "learning_rate": 9.549275218110818e-06,
+      "loss": 1.7453182220458985,
+      "step": 330
+    },
+    {
+      "epoch": 0.23023531403419673,
+      "grad_norm": 0.875,
+      "learning_rate": 9.498967062597403e-06,
+      "loss": 1.7297761917114258,
+      "step": 340
+    },
+    {
+      "epoch": 0.23700694091755545,
+      "grad_norm": 0.875,
+      "learning_rate": 9.446145039639486e-06,
+      "loss": 1.728118324279785,
+      "step": 350
+    },
+    {
+      "epoch": 0.24377856780091417,
+      "grad_norm": 0.890625,
+      "learning_rate": 9.390838664359539e-06,
+      "loss": 1.7387624740600587,
+      "step": 360
+    },
+    {
+      "epoch": 0.2505501946842729,
+      "grad_norm": 0.85546875,
+      "learning_rate": 9.333078840050331e-06,
+      "loss": 1.7364713668823242,
+      "step": 370
+    },
+    {
+      "epoch": 0.2573218215676316,
+      "grad_norm": 0.8828125,
+      "learning_rate": 9.27289784090723e-06,
+      "loss": 1.7236080169677734,
+      "step": 380
+    },
+    {
+      "epoch": 0.26409344845099036,
+      "grad_norm": 0.890625,
+      "learning_rate": 9.210329293994495e-06,
+      "loss": 1.7224924087524414,
+      "step": 390
+    },
+    {
+      "epoch": 0.27086507533434906,
+      "grad_norm": 0.8671875,
+      "learning_rate": 9.145408160455642e-06,
+      "loss": 1.7099193572998046,
+      "step": 400
+    },
+    {
+      "epoch": 0.2776367022177078,
+      "grad_norm": 0.8515625,
+      "learning_rate": 9.078170715978353e-06,
+      "loss": 1.737176513671875,
+      "step": 410
+    },
+    {
+      "epoch": 0.2844083291010665,
+      "grad_norm": 0.9140625,
+      "learning_rate": 9.008654530524883e-06,
+      "loss": 1.73763427734375,
+      "step": 420
+    },
+    {
+      "epoch": 0.29117995598442525,
+      "grad_norm": 0.85546875,
+      "learning_rate": 8.936898447339257e-06,
+      "loss": 1.7290821075439453,
+      "step": 430
+    },
+    {
+      "epoch": 0.297951582867784,
+      "grad_norm": 0.8984375,
+      "learning_rate": 8.86294256124301e-06,
+      "loss": 1.7403568267822265,
+      "step": 440
+    },
+    {
+      "epoch": 0.3047232097511427,
+      "grad_norm": 0.859375,
+      "learning_rate": 8.786828196231584e-06,
+      "loss": 1.7217792510986327,
+      "step": 450
+    },
+    {
+      "epoch": 0.31149483663450145,
+      "grad_norm": 0.87109375,
+      "learning_rate": 8.708597882383908e-06,
+      "loss": 1.7103708267211915,
+      "step": 460
+    },
+    {
+      "epoch": 0.31826646351786014,
+      "grad_norm": 0.91796875,
+      "learning_rate": 8.62829533209805e-06,
+      "loss": 1.7208784103393555,
+      "step": 470
+    },
+    {
+      "epoch": 0.3250380904012189,
+      "grad_norm": 0.859375,
+      "learning_rate": 8.545965415666254e-06,
+      "loss": 1.7223230361938477,
+      "step": 480
+    },
+    {
+      "epoch": 0.33180971728457764,
+      "grad_norm": 0.8671875,
+      "learning_rate": 8.46165413620295e-06,
+      "loss": 1.719701385498047,
+      "step": 490
+    },
+    {
+      "epoch": 0.33858134416793634,
+      "grad_norm": 0.85546875,
+      "learning_rate": 8.375408603939827e-06,
+      "loss": 1.721092987060547,
+      "step": 500
+    },
+    {
+      "epoch": 0.33858134416793634,
+      "eval_loss": 1.7143864631652832,
+      "eval_runtime": 177.179,
+      "eval_samples_per_second": 5.401,
+      "eval_steps_per_second": 0.677,
+      "step": 500
+    },
+    {
+      "epoch": 0.3453529710512951,
+      "grad_norm": 0.859375,
+      "learning_rate": 8.287277009902237e-06,
+      "loss": 1.7325265884399415,
+      "step": 510
+    },
+    {
+      "epoch": 0.3521245979346538,
+      "grad_norm": 0.83984375,
+      "learning_rate": 8.197308598981731e-06,
+      "loss": 1.7298921585083007,
+      "step": 520
+    },
+    {
+      "epoch": 0.35889622481801253,
+      "grad_norm": 0.8828125,
+      "learning_rate": 8.105553642419708e-06,
+      "loss": 1.6982412338256836,
+      "step": 530
+    },
+    {
+      "epoch": 0.3656678517013712,
+      "grad_norm": 0.91015625,
+      "learning_rate": 8.012063409717578e-06,
+      "loss": 1.7173789978027343,
+      "step": 540
+    },
+    {
+      "epoch": 0.37243947858473,
+      "grad_norm": 0.875,
+      "learning_rate": 7.916890139989147e-06,
+      "loss": 1.724541473388672,
+      "step": 550
+    },
+    {
+      "epoch": 0.3792111054680887,
+      "grad_norm": 0.859375,
+      "learning_rate": 7.820087012771184e-06,
+      "loss": 1.701674461364746,
+      "step": 560
+    },
+    {
+      "epoch": 0.3859827323514474,
+      "grad_norm": 0.85546875,
+      "learning_rate": 7.721708118308556e-06,
+      "loss": 1.7177881240844726,
+      "step": 570
+    },
+    {
+      "epoch": 0.39275435923480617,
+      "grad_norm": 0.87890625,
+      "learning_rate": 7.621808427330447e-06,
+      "loss": 1.6985021591186524,
+      "step": 580
+    },
+    {
+      "epoch": 0.39952598611816487,
+      "grad_norm": 0.87109375,
+      "learning_rate": 7.5204437603346224e-06,
+      "loss": 1.709127426147461,
+      "step": 590
+    },
+    {
+      "epoch": 0.4062976130015236,
+      "grad_norm": 0.88671875,
+      "learning_rate": 7.417670756396863e-06,
+      "loss": 1.7201419830322267,
+      "step": 600
+    },
+    {
+      "epoch": 0.41306923988488237,
+      "grad_norm": 0.8984375,
+      "learning_rate": 7.313546841522998e-06,
+      "loss": 1.7153247833251952,
+      "step": 610
+    },
+    {
+      "epoch": 0.41984086676824106,
+      "grad_norm": 0.875,
+      "learning_rate": 7.2081301965612435e-06,
+      "loss": 1.707881546020508,
+      "step": 620
+    },
+    {
+      "epoch": 0.4266124936515998,
+      "grad_norm": 0.87109375,
+      "learning_rate": 7.10147972469275e-06,
+      "loss": 1.7271339416503906,
+      "step": 630
+    },
+    {
+      "epoch": 0.4333841205349585,
+      "grad_norm": 1.3515625,
+      "learning_rate": 6.993655018518541e-06,
+      "loss": 1.7222976684570312,
+      "step": 640
+    },
+    {
+      "epoch": 0.44015574741831726,
+      "grad_norm": 0.85546875,
+      "learning_rate": 6.884716326761218e-06,
+      "loss": 1.7006675720214843,
+      "step": 650
+    },
+    {
+      "epoch": 0.44692737430167595,
+      "grad_norm": 0.87109375,
+      "learning_rate": 6.774724520600069e-06,
+      "loss": 1.6978439331054687,
+      "step": 660
+    },
+    {
+      "epoch": 0.4536990011850347,
+      "grad_norm": 0.87890625,
+      "learning_rate": 6.663741059658337e-06,
+      "loss": 1.7124168395996093,
+      "step": 670
+    },
+    {
+      "epoch": 0.46047062806839345,
+      "grad_norm": 0.87890625,
+      "learning_rate": 6.551827957661722e-06,
+      "loss": 1.7023361206054688,
+      "step": 680
+    },
+    {
+      "epoch": 0.46724225495175215,
+      "grad_norm": 0.86328125,
+      "learning_rate": 6.439047747787242e-06,
+      "loss": 1.700748825073242,
+      "step": 690
+    },
+    {
+      "epoch": 0.4740138818351109,
+      "grad_norm": 0.85546875,
+      "learning_rate": 6.325463447721852e-06,
+      "loss": 1.6977190017700194,
+      "step": 700
+    },
+    {
+      "epoch": 0.4807855087184696,
+      "grad_norm": 0.8984375,
+      "learning_rate": 6.211138524450347e-06,
+      "loss": 1.7250362396240235,
+      "step": 710
+    },
+    {
+      "epoch": 0.48755713560182834,
+      "grad_norm": 0.90234375,
+      "learning_rate": 6.096136858792193e-06,
+      "loss": 1.7249008178710938,
+      "step": 720
+    },
+    {
+      "epoch": 0.4943287624851871,
+      "grad_norm": 0.8671875,
+      "learning_rate": 5.980522709707132e-06,
+      "loss": 1.7153186798095703,
+      "step": 730
+    },
+    {
+      "epoch": 0.5011003893685458,
+      "grad_norm": 0.8828125,
+      "learning_rate": 5.864360678389497e-06,
+      "loss": 1.6841873168945312,
+      "step": 740
+    },
+    {
+      "epoch": 0.5078720162519045,
+      "grad_norm": 0.8515625,
+      "learning_rate": 5.747715672171295e-06,
+      "loss": 1.7151117324829102,
+      "step": 750
+    },
+    {
+      "epoch": 0.5146436431352632,
+      "grad_norm": 0.95703125,
+      "learning_rate": 5.630652868254229e-06,
+      "loss": 1.704267692565918,
+      "step": 760
+    },
+    {
+      "epoch": 0.521415270018622,
+      "grad_norm": 0.88671875,
+      "learning_rate": 5.51323767729093e-06,
+      "loss": 1.7240329742431642,
+      "step": 770
+    },
+    {
+      "epoch": 0.5281868969019807,
+      "grad_norm": 0.87890625,
+      "learning_rate": 5.395535706835744e-06,
+      "loss": 1.7058921813964845,
+      "step": 780
+    },
+    {
+      "epoch": 0.5349585237853395,
+      "grad_norm": 0.8828125,
+      "learning_rate": 5.27761272468549e-06,
+      "loss": 1.6999113082885742,
+      "step": 790
+    },
+    {
+      "epoch": 0.5417301506686981,
+      "grad_norm": 0.9140625,
+      "learning_rate": 5.159534622130695e-06,
+      "loss": 1.7173538208007812,
+      "step": 800
+    },
+    {
+      "epoch": 0.5485017775520569,
+      "grad_norm": 0.85546875,
+      "learning_rate": 5.04136737713781e-06,
+      "loss": 1.706464958190918,
+      "step": 810
+    },
+    {
+      "epoch": 0.5552734044354156,
+      "grad_norm": 0.84765625,
+      "learning_rate": 4.923177017483002e-06,
+      "loss": 1.7123580932617188,
+      "step": 820
+    },
+    {
+      "epoch": 0.5620450313187744,
+      "grad_norm": 0.84765625,
+      "learning_rate": 4.805029583858115e-06,
+      "loss": 1.7076505661010741,
+      "step": 830
+    },
+    {
+      "epoch": 0.568816658202133,
+      "grad_norm": 0.87109375,
+      "learning_rate": 4.686991092969408e-06,
+      "loss": 1.7007432937622071,
+      "step": 840
+    },
+    {
+      "epoch": 0.5755882850854918,
+      "grad_norm": 0.83984375,
+      "learning_rate": 4.569127500649701e-06,
+      "loss": 1.7156892776489259,
+      "step": 850
+    },
+    {
+      "epoch": 0.5823599119688505,
+      "grad_norm": 0.85546875,
+      "learning_rate": 4.4515046650045316e-06,
+      "loss": 1.6989547729492187,
+      "step": 860
+    },
+    {
+      "epoch": 0.5891315388522093,
+      "grad_norm": 0.859375,
+      "learning_rate": 4.334188309612923e-06,
+      "loss": 1.701683235168457,
+      "step": 870
+    },
+    {
+      "epoch": 0.595903165735568,
+      "grad_norm": 0.875,
+      "learning_rate": 4.217243986803315e-06,
+      "loss": 1.7004409790039063,
+      "step": 880
+    },
+    {
+      "epoch": 0.6026747926189266,
+      "grad_norm": 0.88671875,
+      "learning_rate": 4.100737041025188e-06,
+      "loss": 1.727794075012207,
+      "step": 890
+    },
+    {
+      "epoch": 0.6094464195022854,
+      "grad_norm": 0.89453125,
+      "learning_rate": 3.984732572336837e-06,
+      "loss": 1.6976716995239258,
+      "step": 900
+    },
+    {
+      "epoch": 0.6162180463856441,
+      "grad_norm": 0.89453125,
+      "learning_rate": 3.869295400029714e-06,
+      "loss": 1.6927717208862305,
+      "step": 910
+    },
+    {
+      "epoch": 0.6229896732690029,
+      "grad_norm": 0.84375,
+      "learning_rate": 3.754490026409637e-06,
+      "loss": 1.6997186660766601,
+      "step": 920
+    },
+    {
+      "epoch": 0.6297613001523616,
+      "grad_norm": 0.93359375,
+      "learning_rate": 3.6403806007551373e-06,
+      "loss": 1.7196897506713866,
+      "step": 930
+    },
+    {
+      "epoch": 0.6365329270357203,
+      "grad_norm": 0.83203125,
+      "learning_rate": 3.527030883473055e-06,
+      "loss": 1.7054462432861328,
+      "step": 940
+    },
+    {
+      "epoch": 0.643304553919079,
+      "grad_norm": 0.890625,
+      "learning_rate": 3.414504210471421e-06,
+      "loss": 1.7200759887695312,
+      "step": 950
+    },
+    {
+      "epoch": 0.6500761808024378,
+      "grad_norm": 0.890625,
+      "learning_rate": 3.302863457769544e-06,
+      "loss": 1.6951274871826172,
+      "step": 960
+    },
+    {
+      "epoch": 0.6568478076857965,
+      "grad_norm": 0.90625,
+      "learning_rate": 3.192171006365061e-06,
+      "loss": 1.7151849746704102,
+      "step": 970
+    },
+    {
+      "epoch": 0.6636194345691553,
+      "grad_norm": 0.8984375,
+      "learning_rate": 3.0824887073775877e-06,
+      "loss": 1.713322067260742,
+      "step": 980
+    },
+    {
+      "epoch": 0.6703910614525139,
+      "grad_norm": 0.83984375,
+      "learning_rate": 2.973877847488451e-06,
+      "loss": 1.7172536849975586,
+      "step": 990
+    },
+    {
+      "epoch": 0.6771626883358727,
+      "grad_norm": 0.859375,
+      "learning_rate": 2.8663991146958064e-06,
+      "loss": 1.7149576187133788,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6771626883358727,
+      "eval_loss": 1.7007688283920288,
+      "eval_runtime": 165.432,
+      "eval_samples_per_second": 5.785,
+      "eval_steps_per_second": 0.725,
+      "step": 1000
+    },
+    {
+      "epoch": 0.6839343152192314,
+      "grad_norm": 0.90625,
+      "learning_rate": 2.7601125644042777e-06,
+      "loss": 1.714142417907715,
+      "step": 1010
+    },
+    {
+      "epoch": 0.6907059421025902,
+      "grad_norm": 0.859375,
+      "learning_rate": 2.6550775858680793e-06,
+      "loss": 1.7104360580444335,
+      "step": 1020
+    },
+    {
+      "epoch": 0.6974775689859489,
+      "grad_norm": 0.90234375,
+      "learning_rate": 2.551352869006338e-06,
+      "loss": 1.7032684326171874,
+      "step": 1030
+    },
+    {
+      "epoch": 0.7042491958693076,
+      "grad_norm": 0.86328125,
+      "learning_rate": 2.4489963716092096e-06,
+      "loss": 1.701323890686035,
+      "step": 1040
+    },
+    {
+      "epoch": 0.7110208227526663,
+      "grad_norm": 0.890625,
+      "learning_rate": 2.348065286953048e-06,
+      "loss": 1.7169862747192384,
+      "step": 1050
+    },
+    {
+      "epoch": 0.7177924496360251,
+      "grad_norm": 0.87890625,
+      "learning_rate": 2.2486160118427958e-06,
+      "loss": 1.701096534729004,
+      "step": 1060
+    },
+    {
+      "epoch": 0.7245640765193838,
+      "grad_norm": 0.88671875,
+      "learning_rate": 2.1507041150993813e-06,
+      "loss": 1.700172233581543,
+      "step": 1070
+    },
+    {
+      "epoch": 0.7313357034027425,
+      "grad_norm": 0.859375,
+      "learning_rate": 2.054384306509794e-06,
+      "loss": 1.7045093536376954,
+      "step": 1080
+    },
+    {
+      "epoch": 0.7381073302861012,
+      "grad_norm": 0.859375,
+      "learning_rate": 1.9597104062571337e-06,
+      "loss": 1.7091920852661133,
+      "step": 1090
+    },
+    {
+      "epoch": 0.74487895716946,
+      "grad_norm": 0.86328125,
+      "learning_rate": 1.8667353148477547e-06,
+      "loss": 1.7001871109008788,
+      "step": 1100
+    },
+    {
+      "epoch": 0.7516505840528187,
+      "grad_norm": 0.85546875,
+      "learning_rate": 1.7755109835522938e-06,
+      "loss": 1.7016315460205078,
+      "step": 1110
+    },
+    {
+      "epoch": 0.7584222109361775,
+      "grad_norm": 0.87890625,
+      "learning_rate": 1.6860883853770848e-06,
+      "loss": 1.7196449279785155,
+      "step": 1120
+    },
+    {
+      "epoch": 0.7651938378195361,
+      "grad_norm": 0.89453125,
+      "learning_rate": 1.5985174865822146e-06,
+      "loss": 1.701955223083496,
+      "step": 1130
+    },
+    {
+      "epoch": 0.7719654647028948,
+      "grad_norm": 0.85546875,
+      "learning_rate": 1.5128472187620886e-06,
+      "loss": 1.703407096862793,
+      "step": 1140
+    },
+    {
+      "epoch": 0.7787370915862536,
+      "grad_norm": 0.875,
+      "learning_rate": 1.4291254515041592e-06,
+      "loss": 1.7057323455810547,
+      "step": 1150
+    },
+    {
+      "epoch": 0.7855087184696123,
+      "grad_norm": 0.8828125,
+      "learning_rate": 1.3473989656410413e-06,
+      "loss": 1.6963571548461913,
+      "step": 1160
+    },
+    {
+      "epoch": 0.7922803453529711,
+      "grad_norm": 0.8671875,
+      "learning_rate": 1.2677134271110082e-06,
+      "loss": 1.7136796951293944,
+      "step": 1170
+    },
+    {
+      "epoch": 0.7990519722363297,
+      "grad_norm": 0.89453125,
+      "learning_rate": 1.1901133614414352e-06,
+      "loss": 1.7095062255859375,
+      "step": 1180
+    },
+    {
+      "epoch": 0.8058235991196885,
+      "grad_norm": 0.875,
+      "learning_rate": 1.114642128869473e-06,
+      "loss": 1.7052017211914063,
+      "step": 1190
+    },
+    {
+      "epoch": 0.8125952260030472,
+      "grad_norm": 0.8984375,
+      "learning_rate": 1.0413419001138525e-06,
+      "loss": 1.7166055679321288,
+      "step": 1200
+    },
+    {
+      "epoch": 0.819366852886406,
+      "grad_norm": 0.87890625,
+      "learning_rate": 9.702536328113305e-07,
+      "loss": 1.7042055130004883,
+      "step": 1210
+    },
+    {
+      "epoch": 0.8261384797697647,
+      "grad_norm": 0.8671875,
+      "learning_rate": 9.014170486309875e-07,
+      "loss": 1.6885286331176759,
+      "step": 1220
+    },
+    {
+      "epoch": 0.8329101066531234,
+      "grad_norm": 0.84375,
+      "learning_rate": 8.348706110791238e-07,
+      "loss": 1.7065910339355468,
+      "step": 1230
+    },
+    {
+      "epoch": 0.8396817335364821,
+      "grad_norm": 0.87109375,
+      "learning_rate": 7.706515040071854e-07,
+      "loss": 1.6999498367309571,
+      "step": 1240
+    },
+    {
+      "epoch": 0.8464533604198409,
+      "grad_norm": 0.8828125,
+      "learning_rate": 7.08795610834706e-07,
+      "loss": 1.7021600723266601,
+      "step": 1250
+    },
+    {
+      "epoch": 0.8532249873031996,
+      "grad_norm": 0.87890625,
+      "learning_rate": 6.493374944988984e-07,
+      "loss": 1.722920799255371,
+      "step": 1260
+    },
+    {
+      "epoch": 0.8599966141865584,
+      "grad_norm": 0.8671875,
+      "learning_rate": 5.923103781420708e-07,
+      "loss": 1.7148597717285157,
+      "step": 1270
+    },
+    {
+      "epoch": 0.866768241069917,
+      "grad_norm": 0.890625,
+      "learning_rate": 5.377461265476868e-07,
+      "loss": 1.7151250839233398,
+      "step": 1280
+    },
+    {
+      "epoch": 0.8735398679532758,
+      "grad_norm": 0.8671875,
+      "learning_rate": 4.856752283354277e-07,
+      "loss": 1.7023918151855468,
+      "step": 1290
+    },
+    {
+      "epoch": 0.8803114948366345,
+      "grad_norm": 0.8671875,
+      "learning_rate": 4.3612677892519496e-07,
+      "loss": 1.7045417785644532,
+      "step": 1300
+    },
+    {
+      "epoch": 0.8870831217199933,
+      "grad_norm": 0.86328125,
+      "learning_rate": 3.891284642796045e-07,
+      "loss": 1.7008039474487304,
+      "step": 1310
+    },
+    {
+      "epoch": 0.8938547486033519,
+      "grad_norm": 0.8671875,
+      "learning_rate": 3.447065454340198e-07,
+      "loss": 1.7126380920410156,
+      "step": 1320
+    },
+    {
+      "epoch": 0.9006263754867107,
+      "grad_norm": 0.88671875,
+      "learning_rate": 3.028858438227966e-07,
+      "loss": 1.7127569198608399,
+      "step": 1330
+    },
+    {
+      "epoch": 0.9073980023700694,
+      "grad_norm": 0.86328125,
+      "learning_rate": 2.636897274099187e-07,
+      "loss": 1.7151193618774414,
+      "step": 1340
+    },
+    {
+      "epoch": 0.9141696292534282,
+      "grad_norm": 0.8515625,
+      "learning_rate": 2.2714009763178945e-07,
+      "loss": 1.704157829284668,
+      "step": 1350
+    },
+    {
+      "epoch": 0.9209412561367869,
+      "grad_norm": 0.87890625,
+      "learning_rate": 1.932573771594648e-07,
+      "loss": 1.7036989212036133,
+      "step": 1360
+    },
+    {
+      "epoch": 0.9277128830201455,
+      "grad_norm": 0.8671875,
+      "learning_rate": 1.6206049848716765e-07,
+      "loss": 1.7044996261596679,
+      "step": 1370
+    },
+    {
+      "epoch": 0.9344845099035043,
+      "grad_norm": 1.109375,
+      "learning_rate": 1.3356689335346728e-07,
+      "loss": 1.7029462814331056,
+      "step": 1380
+    },
+    {
+      "epoch": 0.941256136786863,
+      "grad_norm": 0.91015625,
+      "learning_rate": 1.0779248300102352e-07,
+      "loss": 1.7133670806884767,
+      "step": 1390
+    },
+    {
+      "epoch": 0.9480277636702218,
+      "grad_norm": 0.859375,
+      "learning_rate": 8.475166928034684e-08,
+      "loss": 1.6992549896240234,
+      "step": 1400
+    },
+    {
+      "epoch": 0.9547993905535805,
+      "grad_norm": 0.85546875,
+      "learning_rate": 6.445732660254056e-08,
+      "loss": 1.7066579818725587,
+      "step": 1410
+    },
+    {
+      "epoch": 0.9615710174369392,
+      "grad_norm": 0.9140625,
+      "learning_rate": 4.692079474552691e-08,
+      "loss": 1.6963106155395509,
+      "step": 1420
+    },
+    {
+      "epoch": 0.9683426443202979,
+      "grad_norm": 0.8515625,
+      "learning_rate": 3.2151872517767194e-08,
+      "loss": 1.7118385314941407,
+      "step": 1430
+    },
+    {
+      "epoch": 0.9751142712036567,
+      "grad_norm": 0.84375,
+      "learning_rate": 2.0158812283030403e-08,
+      "loss": 1.6870197296142577,
+      "step": 1440
+    },
+    {
+      "epoch": 0.9818858980870154,
+      "grad_norm": 0.87109375,
+      "learning_rate": 1.094831534925289e-08,
+      "loss": 1.7051671981811523,
+      "step": 1450
+    },
+    {
+      "epoch": 0.9886575249703742,
+      "grad_norm": 0.86328125,
+      "learning_rate": 4.5255282240802554e-09,
+      "loss": 1.7082006454467773,
+      "step": 1460
+    },
+    {
+      "epoch": 0.9954291518537328,
+      "grad_norm": 0.8828125,
+      "learning_rate": 8.940397391787869e-10,
+      "loss": 1.707107162475586,
+      "step": 1470
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.7002202272415161,
+      "eval_runtime": 169.1979,
+      "eval_samples_per_second": 5.656,
+      "eval_steps_per_second": 0.709,
+      "step": 1477
+    },
+    {
+      "epoch": 1.0,
+      "step": 1477,
+      "total_flos": 2.103177196962902e+18,
+      "train_loss": 1.7256613558986822,
+      "train_runtime": 29239.084,
+      "train_samples_per_second": 1.616,
+      "train_steps_per_second": 0.051
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1477,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.103177196962902e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}
diff --git a/training_args.bin b/training_args.bin
new file mode 100644
index 0000000..70b9526
--- /dev/null
+++ b/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:021e20fabb8f12442e13effbcc63f0a47b25ed87f82c678b87ee5792f87ef9bc
+size 5777