commit cda0f8938a1c1acd6c3889ad4b2be067738652dc
Author: ModelHub XC <noreply@modelhub.org.cn>
Date:   Fri Apr 24 12:05:40 2026 +0800

    初始化项目，由ModelHub XC社区提供模型
    
    Model: W-61/llama-3-8b-base-sft-hh-helpful-8xh200
    Source: Original Platform

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..52373fe
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,36 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..738338a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,65 @@
+---
+library_name: transformers
+base_model: meta-llama/Meta-Llama-3-8B
+tags:
+- alignment-handbook
+- generated_from_trainer
+datasets:
+- Anthropic/hh-rlhf
+model-index:
+- name: llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758
+  results: []
+---
+
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+
+# llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758
+
+This model is a fine-tuned version of [/scratch/feng.yulu/dynamic-dpo-v4/base_models/Meta-Llama-3-8B](https://huggingface.co//scratch/feng.yulu/dynamic-dpo-v4/base_models/Meta-Llama-3-8B) on the Anthropic/hh-rlhf dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.3882
+
+## Model description
+
+More information needed
+
+## Intended uses & limitations
+
+More information needed
+
+## Training and evaluation data
+
+More information needed
+
+## Training procedure
+
+### Training hyperparameters
+
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 8
+- total_train_batch_size: 128
+- total_eval_batch_size: 128
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
+
+### Training results
+
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 1.3924        | 0.7692 | 100  | 1.3882          |
+
+
+### Framework versions
+
+- Transformers 4.51.0
+- Pytorch 2.3.1+cu121
+- Datasets 2.21.0
+- Tokenizers 0.21.4
diff --git a/all_results.json b/all_results.json
new file mode 100644
index 0000000..c28ac47
--- /dev/null
+++ b/all_results.json
@@ -0,0 +1,14 @@
+{
+    "epoch": 1.0,
+    "eval_loss": 1.3572595119476318,
+    "eval_runtime": 2.107,
+    "eval_samples": 2339,
+    "eval_samples_per_second": 424.776,
+    "eval_steps_per_second": 3.322,
+    "total_flos": 4.795466914988032e+16,
+    "train_loss": 1.8291644793290358,
+    "train_runtime": 404.0129,
+    "train_samples": 43598,
+    "train_samples_per_second": 40.88,
+    "train_steps_per_second": 0.322
+}
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..5092b09
--- /dev/null
+++ b/config.json
@@ -0,0 +1,29 @@
+{
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": 128001,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "vocab_size": 128256
+}
diff --git a/eval_results.json b/eval_results.json
new file mode 100644
index 0000000..d4be8df
--- /dev/null
+++ b/eval_results.json
@@ -0,0 +1,8 @@
+{
+    "epoch": 1.0,
+    "eval_loss": 1.3572595119476318,
+    "eval_runtime": 2.107,
+    "eval_samples": 2339,
+    "eval_samples_per_second": 424.776,
+    "eval_steps_per_second": 3.322
+}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..76247c9
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,9 @@
+{
+  "bos_token_id": 128000,
+  "do_sample": true,
+  "eos_token_id": 128001,
+  "max_length": 4096,
+  "temperature": 0.6,
+  "top_p": 0.9,
+  "transformers_version": "4.51.0"
+}
diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors
new file mode 100644
index 0000000..605a32b
--- /dev/null
+++ b/model-00001-of-00007.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf6aba6eeffa7f614975db614e595d1240ab844ba3fb51150b268b07b7cc987f
+size 4886466168
diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors
new file mode 100644
index 0000000..7c22e70
--- /dev/null
+++ b/model-00002-of-00007.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9e88afce8061d677df83c92b41b7b7c7e31a131f4194b903c1cb151ef0264b4
+size 4832007448
diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors
new file mode 100644
index 0000000..0341e40
--- /dev/null
+++ b/model-00003-of-00007.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef0e60735d684283b06b0120c4a98b63b6cf60bdffb8b5f268b30763969b9066
+size 4999813112
diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors
new file mode 100644
index 0000000..f8be0c0
--- /dev/null
+++ b/model-00004-of-00007.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d571b772dcd21a276d136cd4ba950d2821c5688c8dab3a6eb468d2b19fcf4a53
+size 4999813128
diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors
new file mode 100644
index 0000000..72a3ff1
--- /dev/null
+++ b/model-00005-of-00007.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04554f5bfc6a067119812a56252a33ddc477717c981c8cd756643883167275bd
+size 4832007496
diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors
new file mode 100644
index 0000000..06c5208
--- /dev/null
+++ b/model-00006-of-00007.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfd35d703a6cff35f4f5be99a7e5d06de63f8db6f934730f52df61fda922a4e5
+size 4999813120
diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors
new file mode 100644
index 0000000..31294be
--- /dev/null
+++ b/model-00007-of-00007.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:048ff2f02529f1ced889e2ae8ba7e28741166ebb6dc12927c3963eec31ec5465
+size 2571158184
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000..0985084
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,298 @@
+{
+  "metadata": {
+    "total_size": 32121044992
+  },
+  "weight_map": {
+    "lm_head.weight": "model-00007-of-00007.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors",
+    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors",
+    "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors",
+    "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors",
+    "model.norm.weight": "model-00007-of-00007.safetensors"
+  }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..04829af
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,17 @@
+{
+  "bos_token": {
+    "content": "<|begin_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|end_of_text|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|end_of_text|>"
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..86a3394
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393
+size 17209961
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..8c6916a
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,2064 @@
+{
+  "added_tokens_decoder": {
+    "128000": {
+      "content": "<|begin_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128001": {
+      "content": "<|end_of_text|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128002": {
+      "content": "<|reserved_special_token_0|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128003": {
+      "content": "<|reserved_special_token_1|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128004": {
+      "content": "<|reserved_special_token_2|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128005": {
+      "content": "<|reserved_special_token_3|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128006": {
+      "content": "<|start_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128007": {
+      "content": "<|end_header_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128008": {
+      "content": "<|reserved_special_token_4|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128009": {
+      "content": "<|eot_id|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128010": {
+      "content": "<|reserved_special_token_5|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128011": {
+      "content": "<|reserved_special_token_6|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128012": {
+      "content": "<|reserved_special_token_7|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128013": {
+      "content": "<|reserved_special_token_8|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128014": {
+      "content": "<|reserved_special_token_9|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128015": {
+      "content": "<|reserved_special_token_10|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128016": {
+      "content": "<|reserved_special_token_11|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128017": {
+      "content": "<|reserved_special_token_12|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128018": {
+      "content": "<|reserved_special_token_13|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128019": {
+      "content": "<|reserved_special_token_14|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128020": {
+      "content": "<|reserved_special_token_15|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128021": {
+      "content": "<|reserved_special_token_16|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128022": {
+      "content": "<|reserved_special_token_17|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128023": {
+      "content": "<|reserved_special_token_18|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128024": {
+      "content": "<|reserved_special_token_19|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128025": {
+      "content": "<|reserved_special_token_20|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128026": {
+      "content": "<|reserved_special_token_21|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128027": {
+      "content": "<|reserved_special_token_22|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128028": {
+      "content": "<|reserved_special_token_23|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128029": {
+      "content": "<|reserved_special_token_24|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128030": {
+      "content": "<|reserved_special_token_25|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128031": {
+      "content": "<|reserved_special_token_26|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128032": {
+      "content": "<|reserved_special_token_27|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128033": {
+      "content": "<|reserved_special_token_28|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128034": {
+      "content": "<|reserved_special_token_29|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128035": {
+      "content": "<|reserved_special_token_30|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128036": {
+      "content": "<|reserved_special_token_31|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128037": {
+      "content": "<|reserved_special_token_32|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128038": {
+      "content": "<|reserved_special_token_33|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128039": {
+      "content": "<|reserved_special_token_34|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128040": {
+      "content": "<|reserved_special_token_35|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128041": {
+      "content": "<|reserved_special_token_36|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128042": {
+      "content": "<|reserved_special_token_37|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128043": {
+      "content": "<|reserved_special_token_38|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128044": {
+      "content": "<|reserved_special_token_39|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128045": {
+      "content": "<|reserved_special_token_40|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128046": {
+      "content": "<|reserved_special_token_41|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128047": {
+      "content": "<|reserved_special_token_42|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128048": {
+      "content": "<|reserved_special_token_43|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128049": {
+      "content": "<|reserved_special_token_44|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128050": {
+      "content": "<|reserved_special_token_45|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128051": {
+      "content": "<|reserved_special_token_46|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128052": {
+      "content": "<|reserved_special_token_47|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128053": {
+      "content": "<|reserved_special_token_48|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128054": {
+      "content": "<|reserved_special_token_49|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128055": {
+      "content": "<|reserved_special_token_50|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128056": {
+      "content": "<|reserved_special_token_51|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128057": {
+      "content": "<|reserved_special_token_52|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128058": {
+      "content": "<|reserved_special_token_53|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128059": {
+      "content": "<|reserved_special_token_54|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128060": {
+      "content": "<|reserved_special_token_55|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128061": {
+      "content": "<|reserved_special_token_56|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128062": {
+      "content": "<|reserved_special_token_57|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128063": {
+      "content": "<|reserved_special_token_58|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128064": {
+      "content": "<|reserved_special_token_59|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128065": {
+      "content": "<|reserved_special_token_60|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128066": {
+      "content": "<|reserved_special_token_61|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128067": {
+      "content": "<|reserved_special_token_62|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128068": {
+      "content": "<|reserved_special_token_63|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128069": {
+      "content": "<|reserved_special_token_64|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128070": {
+      "content": "<|reserved_special_token_65|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128071": {
+      "content": "<|reserved_special_token_66|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128072": {
+      "content": "<|reserved_special_token_67|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128073": {
+      "content": "<|reserved_special_token_68|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128074": {
+      "content": "<|reserved_special_token_69|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128075": {
+      "content": "<|reserved_special_token_70|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128076": {
+      "content": "<|reserved_special_token_71|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128077": {
+      "content": "<|reserved_special_token_72|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128078": {
+      "content": "<|reserved_special_token_73|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128079": {
+      "content": "<|reserved_special_token_74|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128080": {
+      "content": "<|reserved_special_token_75|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128081": {
+      "content": "<|reserved_special_token_76|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128082": {
+      "content": "<|reserved_special_token_77|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128083": {
+      "content": "<|reserved_special_token_78|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128084": {
+      "content": "<|reserved_special_token_79|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128085": {
+      "content": "<|reserved_special_token_80|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128086": {
+      "content": "<|reserved_special_token_81|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128087": {
+      "content": "<|reserved_special_token_82|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128088": {
+      "content": "<|reserved_special_token_83|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128089": {
+      "content": "<|reserved_special_token_84|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128090": {
+      "content": "<|reserved_special_token_85|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128091": {
+      "content": "<|reserved_special_token_86|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128092": {
+      "content": "<|reserved_special_token_87|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128093": {
+      "content": "<|reserved_special_token_88|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128094": {
+      "content": "<|reserved_special_token_89|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128095": {
+      "content": "<|reserved_special_token_90|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128096": {
+      "content": "<|reserved_special_token_91|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128097": {
+      "content": "<|reserved_special_token_92|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128098": {
+      "content": "<|reserved_special_token_93|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128099": {
+      "content": "<|reserved_special_token_94|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128100": {
+      "content": "<|reserved_special_token_95|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128101": {
+      "content": "<|reserved_special_token_96|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128102": {
+      "content": "<|reserved_special_token_97|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128103": {
+      "content": "<|reserved_special_token_98|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128104": {
+      "content": "<|reserved_special_token_99|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128105": {
+      "content": "<|reserved_special_token_100|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128106": {
+      "content": "<|reserved_special_token_101|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128107": {
+      "content": "<|reserved_special_token_102|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128108": {
+      "content": "<|reserved_special_token_103|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128109": {
+      "content": "<|reserved_special_token_104|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128110": {
+      "content": "<|reserved_special_token_105|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128111": {
+      "content": "<|reserved_special_token_106|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128112": {
+      "content": "<|reserved_special_token_107|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128113": {
+      "content": "<|reserved_special_token_108|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128114": {
+      "content": "<|reserved_special_token_109|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128115": {
+      "content": "<|reserved_special_token_110|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128116": {
+      "content": "<|reserved_special_token_111|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128117": {
+      "content": "<|reserved_special_token_112|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128118": {
+      "content": "<|reserved_special_token_113|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128119": {
+      "content": "<|reserved_special_token_114|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128120": {
+      "content": "<|reserved_special_token_115|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128121": {
+      "content": "<|reserved_special_token_116|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128122": {
+      "content": "<|reserved_special_token_117|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128123": {
+      "content": "<|reserved_special_token_118|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128124": {
+      "content": "<|reserved_special_token_119|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128125": {
+      "content": "<|reserved_special_token_120|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128126": {
+      "content": "<|reserved_special_token_121|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128127": {
+      "content": "<|reserved_special_token_122|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128128": {
+      "content": "<|reserved_special_token_123|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128129": {
+      "content": "<|reserved_special_token_124|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128130": {
+      "content": "<|reserved_special_token_125|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128131": {
+      "content": "<|reserved_special_token_126|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128132": {
+      "content": "<|reserved_special_token_127|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128133": {
+      "content": "<|reserved_special_token_128|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128134": {
+      "content": "<|reserved_special_token_129|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128135": {
+      "content": "<|reserved_special_token_130|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128136": {
+      "content": "<|reserved_special_token_131|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128137": {
+      "content": "<|reserved_special_token_132|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128138": {
+      "content": "<|reserved_special_token_133|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128139": {
+      "content": "<|reserved_special_token_134|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128140": {
+      "content": "<|reserved_special_token_135|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128141": {
+      "content": "<|reserved_special_token_136|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128142": {
+      "content": "<|reserved_special_token_137|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128143": {
+      "content": "<|reserved_special_token_138|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128144": {
+      "content": "<|reserved_special_token_139|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128145": {
+      "content": "<|reserved_special_token_140|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128146": {
+      "content": "<|reserved_special_token_141|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128147": {
+      "content": "<|reserved_special_token_142|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128148": {
+      "content": "<|reserved_special_token_143|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128149": {
+      "content": "<|reserved_special_token_144|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128150": {
+      "content": "<|reserved_special_token_145|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128151": {
+      "content": "<|reserved_special_token_146|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128152": {
+      "content": "<|reserved_special_token_147|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128153": {
+      "content": "<|reserved_special_token_148|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128154": {
+      "content": "<|reserved_special_token_149|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128155": {
+      "content": "<|reserved_special_token_150|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128156": {
+      "content": "<|reserved_special_token_151|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128157": {
+      "content": "<|reserved_special_token_152|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128158": {
+      "content": "<|reserved_special_token_153|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128159": {
+      "content": "<|reserved_special_token_154|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128160": {
+      "content": "<|reserved_special_token_155|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128161": {
+      "content": "<|reserved_special_token_156|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128162": {
+      "content": "<|reserved_special_token_157|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128163": {
+      "content": "<|reserved_special_token_158|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128164": {
+      "content": "<|reserved_special_token_159|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128165": {
+      "content": "<|reserved_special_token_160|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128166": {
+      "content": "<|reserved_special_token_161|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128167": {
+      "content": "<|reserved_special_token_162|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128168": {
+      "content": "<|reserved_special_token_163|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128169": {
+      "content": "<|reserved_special_token_164|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128170": {
+      "content": "<|reserved_special_token_165|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128171": {
+      "content": "<|reserved_special_token_166|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128172": {
+      "content": "<|reserved_special_token_167|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128173": {
+      "content": "<|reserved_special_token_168|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128174": {
+      "content": "<|reserved_special_token_169|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128175": {
+      "content": "<|reserved_special_token_170|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128176": {
+      "content": "<|reserved_special_token_171|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128177": {
+      "content": "<|reserved_special_token_172|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128178": {
+      "content": "<|reserved_special_token_173|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128179": {
+      "content": "<|reserved_special_token_174|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128180": {
+      "content": "<|reserved_special_token_175|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128181": {
+      "content": "<|reserved_special_token_176|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128182": {
+      "content": "<|reserved_special_token_177|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128183": {
+      "content": "<|reserved_special_token_178|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128184": {
+      "content": "<|reserved_special_token_179|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128185": {
+      "content": "<|reserved_special_token_180|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128186": {
+      "content": "<|reserved_special_token_181|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128187": {
+      "content": "<|reserved_special_token_182|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128188": {
+      "content": "<|reserved_special_token_183|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128189": {
+      "content": "<|reserved_special_token_184|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128190": {
+      "content": "<|reserved_special_token_185|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128191": {
+      "content": "<|reserved_special_token_186|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128192": {
+      "content": "<|reserved_special_token_187|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128193": {
+      "content": "<|reserved_special_token_188|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128194": {
+      "content": "<|reserved_special_token_189|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128195": {
+      "content": "<|reserved_special_token_190|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128196": {
+      "content": "<|reserved_special_token_191|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128197": {
+      "content": "<|reserved_special_token_192|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128198": {
+      "content": "<|reserved_special_token_193|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128199": {
+      "content": "<|reserved_special_token_194|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128200": {
+      "content": "<|reserved_special_token_195|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128201": {
+      "content": "<|reserved_special_token_196|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128202": {
+      "content": "<|reserved_special_token_197|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128203": {
+      "content": "<|reserved_special_token_198|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128204": {
+      "content": "<|reserved_special_token_199|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128205": {
+      "content": "<|reserved_special_token_200|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128206": {
+      "content": "<|reserved_special_token_201|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128207": {
+      "content": "<|reserved_special_token_202|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128208": {
+      "content": "<|reserved_special_token_203|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128209": {
+      "content": "<|reserved_special_token_204|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128210": {
+      "content": "<|reserved_special_token_205|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128211": {
+      "content": "<|reserved_special_token_206|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128212": {
+      "content": "<|reserved_special_token_207|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128213": {
+      "content": "<|reserved_special_token_208|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128214": {
+      "content": "<|reserved_special_token_209|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128215": {
+      "content": "<|reserved_special_token_210|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128216": {
+      "content": "<|reserved_special_token_211|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128217": {
+      "content": "<|reserved_special_token_212|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128218": {
+      "content": "<|reserved_special_token_213|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128219": {
+      "content": "<|reserved_special_token_214|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128220": {
+      "content": "<|reserved_special_token_215|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128221": {
+      "content": "<|reserved_special_token_216|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128222": {
+      "content": "<|reserved_special_token_217|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128223": {
+      "content": "<|reserved_special_token_218|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128224": {
+      "content": "<|reserved_special_token_219|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128225": {
+      "content": "<|reserved_special_token_220|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128226": {
+      "content": "<|reserved_special_token_221|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128227": {
+      "content": "<|reserved_special_token_222|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128228": {
+      "content": "<|reserved_special_token_223|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128229": {
+      "content": "<|reserved_special_token_224|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128230": {
+      "content": "<|reserved_special_token_225|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128231": {
+      "content": "<|reserved_special_token_226|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128232": {
+      "content": "<|reserved_special_token_227|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128233": {
+      "content": "<|reserved_special_token_228|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128234": {
+      "content": "<|reserved_special_token_229|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128235": {
+      "content": "<|reserved_special_token_230|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128236": {
+      "content": "<|reserved_special_token_231|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128237": {
+      "content": "<|reserved_special_token_232|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128238": {
+      "content": "<|reserved_special_token_233|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128239": {
+      "content": "<|reserved_special_token_234|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128240": {
+      "content": "<|reserved_special_token_235|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128241": {
+      "content": "<|reserved_special_token_236|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128242": {
+      "content": "<|reserved_special_token_237|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128243": {
+      "content": "<|reserved_special_token_238|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128244": {
+      "content": "<|reserved_special_token_239|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128245": {
+      "content": "<|reserved_special_token_240|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128246": {
+      "content": "<|reserved_special_token_241|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128247": {
+      "content": "<|reserved_special_token_242|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128248": {
+      "content": "<|reserved_special_token_243|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128249": {
+      "content": "<|reserved_special_token_244|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128250": {
+      "content": "<|reserved_special_token_245|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128251": {
+      "content": "<|reserved_special_token_246|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128252": {
+      "content": "<|reserved_special_token_247|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128253": {
+      "content": "<|reserved_special_token_248|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128254": {
+      "content": "<|reserved_special_token_249|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "128255": {
+      "content": "<|reserved_special_token_250|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<|begin_of_text|>",
+  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|end_of_text|>",
+  "extra_special_tokens": {},
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 2048,
+  "pad_token": "<|end_of_text|>",
+  "tokenizer_class": "PreTrainedTokenizer"
+}
diff --git a/train.log b/train.log
new file mode 100644
index 0000000..1d2d0a6
--- /dev/null
+++ b/train.log
@@ -0,0 +1,1240 @@
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/transformers/utils/hub.py:105: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/transformers/utils/hub.py:105: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/transformers/utils/hub.py:105: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/transformers/utils/hub.py:105: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/transformers/utils/hub.py:105: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/transformers/utils/hub.py:105: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/transformers/utils/hub.py:105: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/transformers/utils/hub.py:105: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/transformers/utils/hub.py:105: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead.
+  warnings.warn(
+2026-04-10 13:38:21 - WARNING - __main__ - Process rank: 2, device: cuda:2, n_gpu: 1 distributed training: True, 16-bits training: False
+2026-04-10 13:38:21 - WARNING - __main__ - Process rank: 7, device: cuda:7, n_gpu: 1 distributed training: True, 16-bits training: False
+2026-04-10 13:38:21 - WARNING - __main__ - Process rank: 1, device: cuda:1, n_gpu: 1 distributed training: True, 16-bits training: False
+2026-04-10 13:38:21 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1 distributed training: True, 16-bits training: False
+2026-04-10 13:38:21 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/scratch/feng.yulu/dynamic-dpo-v4/base_models/Meta-Llama-3-8B', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8')
+2026-04-10 13:38:21 - INFO - __main__ - Data parameters DataArguments(chat_template="{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train', 'test'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=False, hf_cache_dir=None, truncation_side=None, auto_insert_empty_system_msg=True, preprocessing_log_samples=0, preprocessing_log_dir=None)
+2026-04-10 13:38:21 - INFO - __main__ - Training/evaluation parameters SFTConfig(
+_n_gpu=1,
+accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+average_tokens_across_devices=False,
+batch_eval_metrics=False,
+bf16=True,
+bf16_full_eval=False,
+chars_per_token=<CHARS_PER_TOKEN>,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_persistent_workers=False,
+dataloader_pin_memory=True,
+dataloader_prefetch_factor=None,
+dataset_batch_size=1000,
+dataset_kwargs=None,
+dataset_num_proc=None,
+dataset_text_field=None,
+ddp_backend=None,
+ddp_broadcast_buffers=None,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+ddp_timeout=1800,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=False,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_do_concat_batches=True,
+eval_on_start=False,
+eval_packing=None,
+eval_steps=100,
+eval_strategy=IntervalStrategy.STEPS,
+eval_use_gather_object=False,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=1,
+gradient_checkpointing=True,
+gradient_checkpointing_kwargs={'use_reentrant': False},
+greater_is_better=None,
+group_by_length=False,
+half_precision_backend=auto,
+hub_always_push=False,
+hub_model_id=W-61/llama-3-8b-base-sft-hh-helpful-4xh200,
+hub_model_revision=main,
+hub_private_repo=None,
+hub_strategy=HubStrategy.END,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_for_metrics=[],
+include_inputs_for_metrics=False,
+include_num_input_tokens_seen=False,
+include_tokens_per_second=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=2e-05,
+length_column_name=length,
+load_best_model_at_end=False,
+local_rank=0,
+log_level=info,
+log_level_replica=warning,
+log_on_each_node=True,
+logging_dir=outputs/llama-3-8b-base-sft-hh-helpful-4xh200/runs/Apr10_13-38-20_d4054,
+logging_first_step=True,
+logging_nan_inf_filter=True,
+logging_steps=5,
+logging_strategy=IntervalStrategy.STEPS,
+lr_scheduler_kwargs={},
+lr_scheduler_type=SchedulerType.COSINE,
+max_grad_norm=1.0,
+max_seq_length=512,
+max_steps=-1,
+metric_for_best_model=None,
+model_init_kwargs=None,
+mp_parameters=,
+neftune_noise_alpha=None,
+no_cuda=False,
+num_of_sequences=1024,
+num_train_epochs=1,
+optim=OptimizerNames.ADAMW_TORCH,
+optim_args=None,
+optim_target_modules=None,
+output_dir=/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758,
+overwrite_output_dir=True,
+packing=False,
+past_index=-1,
+per_device_eval_batch_size=16,
+per_device_train_batch_size=16,
+prediction_loss_only=False,
+push_to_hub=False,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['wandb'],
+restore_callback_states_from_checkpoint=False,
+resume_from_checkpoint=None,
+run_name=llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758,
+save_on_each_node=False,
+save_only_model=False,
+save_safetensors=True,
+save_steps=200,
+save_strategy=SaveStrategy.STEPS,
+save_total_limit=2,
+seed=42,
+skip_memory_metrics=True,
+tf32=None,
+torch_compile=False,
+torch_compile_backend=None,
+torch_compile_mode=None,
+torch_empty_cache_steps=None,
+torchdynamo=None,
+tp_size=0,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_cpu=False,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+use_liger=False,
+use_liger_kernel=False,
+use_mps_device=False,
+warmup_ratio=0.1,
+warmup_steps=0,
+weight_decay=0.0,
+)
+2026-04-10 13:38:21 - WARNING - __main__ - Process rank: 4, device: cuda:4, n_gpu: 1 distributed training: True, 16-bits training: False
+2026-04-10 13:38:21 - WARNING - __main__ - Process rank: 5, device: cuda:5, n_gpu: 1 distributed training: True, 16-bits training: False
+2026-04-10 13:38:21 - WARNING - __main__ - Process rank: 3, device: cuda:3, n_gpu: 1 distributed training: True, 16-bits training: False
+2026-04-10 13:38:21 - WARNING - __main__ - Process rank: 6, device: cuda:6, n_gpu: 1 distributed training: True, 16-bits training: False
+Downloading readme:   0%|          | 0.00/5.77k [00:00<?, ?B/s]Downloading readme:  39%|███▉      | 2.24k/5.77k [00:00<00:00, 19.7kB/s]Downloading readme: 100%|██████████| 5.77k/5.77k [00:00<00:00, 49.3kB/s]
+Downloading data:   0%|          | 0.00/16.2M [00:00<?, ?B/s]No config specified, defaulting to the single config: hh-rlhf/default
+2026-04-10 13:38:22 - INFO - datasets.builder - No config specified, defaulting to the single config: hh-rlhf/default
+Using custom data configuration default-cfba128a0ab1b99f
+2026-04-10 13:38:22 - INFO - datasets.builder - Using custom data configuration default-cfba128a0ab1b99f
+Loading Dataset Infos from /home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/datasets/packaged_modules/json
+2026-04-10 13:38:22 - INFO - datasets.info - Loading Dataset Infos from /home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/datasets/packaged_modules/json
+Downloading data:  65%|██████▍   | 10.5M/16.2M [00:00<00:00, 14.0MB/s]Downloading data: 100%|██████████| 16.2M/16.2M [00:01<00:00, 15.4MB/s]Downloading data: 100%|██████████| 16.2M/16.2M [00:01<00:00, 14.6MB/s]
+Downloading data:   0%|          | 0.00/875k [00:00<?, ?B/s]Downloading data: 100%|██████████| 875k/875k [00:00<00:00, 7.10MB/s]Downloading data: 100%|██████████| 875k/875k [00:00<00:00, 6.86MB/s]
+Generating train split: 0 examples [00:00, ? examples/s]Generating train split: 13255 examples [00:00, 92408.74 examples/s]Generating train split: 26541 examples [00:00, 97720.85 examples/s]Generating train split: 39887 examples [00:00, 102388.55 examples/s]Generating train split: 43835 examples [00:00, 88529.69 examples/s] 
+Generating test split: 0 examples [00:00, ? examples/s]Generating test split: 2354 examples [00:00, 70834.38 examples/s]
+Found cached dataset hh-rlhf (/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa)
+2026-04-10 13:38:24 - INFO - datasets.builder - Found cached dataset hh-rlhf (/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa)
+Loading Dataset info from /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa
+2026-04-10 13:38:24 - INFO - datasets.info - Loading Dataset info from /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa
+2026-04-10 13:38:26 - WARNING - alignment.data - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+2026-04-10 13:38:26 - WARNING - alignment.data - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (train):   0%|          | 0/43598 [00:00<?, ? examples/s]Normalizing raw HH preferences (train):   0%|          | 0/43598 [00:00<?, ? examples/s]2026-04-10 13:38:26 - WARNING - alignment.data - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (train):   0%|          | 0/43598 [00:00<?, ? examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d6e6bfbe34161664.arrow
+2026-04-10 13:38:26 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d6e6bfbe34161664.arrow
+2026-04-10 13:38:26 - WARNING - alignment.data - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (train):   0%|          | 0/43598 [00:00<?, ? examples/s]2026-04-10 13:38:26 - WARNING - alignment.data - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (train):   0%|          | 0/43598 [00:00<?, ? examples/s]2026-04-10 13:38:26 - WARNING - alignment.data - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (train):   0%|          | 0/43598 [00:00<?, ? examples/s]2026-04-10 13:38:26 - WARNING - alignment.data - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+2026-04-10 13:38:26 - WARNING - alignment.data - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (train):   0%|          | 0/43598 [00:00<?, ? examples/s]Normalizing raw HH preferences (train):   0%|          | 0/43598 [00:00<?, ? examples/s]Normalizing raw HH preferences (train):   3%|▎         | 1101/43598 [00:00<00:03, 10959.87 examples/s]Normalizing raw HH preferences (train):   3%|▎         | 1128/43598 [00:00<00:03, 11219.64 examples/s]Normalizing raw HH preferences (train):   3%|▎         | 1125/43598 [00:00<00:03, 11174.40 examples/s]Normalizing raw HH preferences (train):   3%|▎         | 1164/43598 [00:00<00:03, 11586.70 examples/s]Normalizing raw HH preferences (train):   3%|▎         | 1162/43598 [00:00<00:03, 11564.67 examples/s]Normalizing raw HH preferences (train):   3%|▎         | 1154/43598 [00:00<00:03, 11484.37 examples/s]Normalizing raw HH preferences (train):   3%|▎         | 1167/43598 [00:00<00:03, 11623.90 examples/s]Normalizing raw HH preferences (train):   3%|▎         | 1161/43598 [00:00<00:03, 11555.60 examples/s]Normalizing raw HH preferences (train):   5%|▌         | 2341/43598 [00:00<00:03, 11801.60 examples/s]Normalizing raw HH preferences (train):   5%|▌         | 2369/43598 [00:00<00:03, 11908.11 examples/s]Normalizing raw HH preferences (train):   5%|▌         | 2374/43598 [00:00<00:03, 11940.63 examples/s]Normalizing raw HH preferences (train):   6%|▌         | 2456/43598 [00:00<00:03, 12363.79 examples/s]Normalizing raw HH preferences (train):   6%|▌         | 2460/43598 [00:00<00:03, 12391.17 examples/s]Normalizing raw HH preferences (train):   6%|▌         | 2429/43598 [00:00<00:03, 12224.80 examples/s]Normalizing raw HH preferences (train):   6%|▌         | 2466/43598 [00:00<00:03, 12419.94 examples/s]Normalizing raw HH preferences (train):   6%|▌         | 2451/43598 [00:00<00:03, 12338.10 examples/s]Normalizing raw HH preferences (train):   8%|▊         | 3704/43598 [00:00<00:03, 12380.88 examples/s]Normalizing raw HH preferences (train):   8%|▊         | 3700/43598 [00:00<00:03, 12307.91 examples/s]Normalizing raw HH preferences (train):   8%|▊         | 3705/43598 [00:00<00:03, 12412.59 examples/s]Normalizing raw HH preferences (train):   9%|▊         | 3741/43598 [00:00<00:03, 12583.64 examples/s]Normalizing raw HH preferences (train):   9%|▊         | 3748/43598 [00:00<00:03, 12610.03 examples/s]Normalizing raw HH preferences (train):   8%|▊         | 3702/43598 [00:00<00:03, 12448.81 examples/s]Normalizing raw HH preferences (train):   9%|▊         | 3757/43598 [00:00<00:03, 12635.97 examples/s]Normalizing raw HH preferences (train):   9%|▊         | 3732/43598 [00:00<00:03, 12550.55 examples/s]Normalizing raw HH preferences (train):  11%|█▏        | 4974/43598 [00:00<00:03, 12499.00 examples/s]Normalizing raw HH preferences (train):  11%|█▏        | 4936/43598 [00:00<00:03, 12324.82 examples/s]Normalizing raw HH preferences (train):  11%|█▏        | 4977/43598 [00:00<00:03, 12529.11 examples/s]Normalizing raw HH preferences (train):  11%|█▏        | 4951/43598 [00:00<00:03, 12459.13 examples/s]Normalizing raw HH preferences (train):  13%|█▎        | 5474/43598 [00:00<00:03, 12070.50 examples/s]Normalizing raw HH preferences (train):  13%|█▎        | 5705/43598 [00:00<00:03, 12484.42 examples/s]Normalizing raw HH preferences (train):  13%|█▎        | 5550/43598 [00:00<00:03, 12295.75 examples/s]Normalizing raw HH preferences (train):  13%|█▎        | 5574/43598 [00:00<00:03, 12412.73 examples/s]Normalizing raw HH preferences (train):  16%|█▌        | 6838/43598 [00:00<00:02, 12461.94 examples/s]Normalizing raw HH preferences (train):  16%|█▌        | 6774/43598 [00:00<00:02, 12286.78 examples/s]Normalizing raw HH preferences (train):  16%|█▌        | 6847/43598 [00:00<00:02, 12496.35 examples/s]Normalizing raw HH preferences (train):  15%|█▌        | 6740/43598 [00:00<00:03, 12258.77 examples/s]Normalizing raw HH preferences (train):  16%|█▌        | 6983/43598 [00:00<00:02, 12573.32 examples/s]Normalizing raw HH preferences (train):  16%|█▌        | 6779/43598 [00:00<00:02, 12331.32 examples/s]Normalizing raw HH preferences (train):  16%|█▌        | 6817/43598 [00:00<00:02, 12412.03 examples/s]Normalizing raw HH preferences (train):  16%|█▌        | 6836/43598 [00:00<00:02, 12478.11 examples/s]Normalizing raw HH preferences (train):  18%|█▊        | 8000/43598 [00:00<00:02, 12160.25 examples/s]Normalizing raw HH preferences (train):  20%|█▉        | 8715/43598 [00:00<00:02, 12479.66 examples/s]Normalizing raw HH preferences (train):  20%|█▉        | 8691/43598 [00:00<00:02, 12366.95 examples/s]Normalizing raw HH preferences (train):  20%|██        | 8736/43598 [00:00<00:02, 12529.29 examples/s]Normalizing raw HH preferences (train):  20%|██        | 8869/43598 [00:00<00:02, 12570.47 examples/s]Normalizing raw HH preferences (train):  20%|█▉        | 8684/43598 [00:00<00:02, 12348.12 examples/s]Normalizing raw HH preferences (train):  20%|█▉        | 8698/43598 [00:00<00:02, 12460.95 examples/s]Normalizing raw HH preferences (train):  20%|█▉        | 8701/43598 [00:00<00:02, 12457.83 examples/s]Normalizing raw HH preferences (train):  21%|██▏       | 9279/43598 [00:00<00:02, 12350.15 examples/s]Normalizing raw HH preferences (train):  23%|██▎       | 9972/43598 [00:00<00:02, 12485.23 examples/s]Normalizing raw HH preferences (train):  23%|██▎       | 10000/43598 [00:00<00:02, 12340.30 examples/s]Normalizing raw HH preferences (train):  23%|██▎       | 10000/43598 [00:00<00:02, 12359.42 examples/s]Normalizing raw HH preferences (train):  23%|██▎       | 9956/43598 [00:00<00:02, 12447.31 examples/s]Normalizing raw HH preferences (train):  23%|██▎       | 9986/43598 [00:00<00:02, 12576.17 examples/s]Normalizing raw HH preferences (train):  23%|██▎       | 9986/43598 [00:00<00:02, 12562.60 examples/s]Normalizing raw HH preferences (train):  24%|██▍       | 10530/43598 [00:00<00:02, 12393.96 examples/s]Normalizing raw HH preferences (train):  26%|██▌       | 11308/43598 [00:00<00:02, 12542.06 examples/s]Normalizing raw HH preferences (train):  25%|██▍       | 10760/43598 [00:00<00:02, 12580.36 examples/s]Normalizing raw HH preferences (train):  26%|██▌       | 11309/43598 [00:00<00:02, 12559.61 examples/s]Normalizing raw HH preferences (train):  27%|██▋       | 11855/43598 [00:00<00:02, 12508.01 examples/s]Normalizing raw HH preferences (train):  27%|██▋       | 11819/43598 [00:00<00:02, 12539.64 examples/s]Normalizing raw HH preferences (train):  29%|██▉       | 12704/43598 [00:01<00:02, 12684.33 examples/s]Normalizing raw HH preferences (train):  27%|██▋       | 11824/43598 [00:00<00:02, 12447.62 examples/s]Normalizing raw HH preferences (train):  29%|██▉       | 12704/43598 [00:01<00:02, 12707.26 examples/s]Normalizing raw HH preferences (train):  27%|██▋       | 11887/43598 [00:00<00:02, 12607.31 examples/s]Normalizing raw HH preferences (train):  27%|██▋       | 11851/43598 [00:00<00:02, 12510.56 examples/s]Normalizing raw HH preferences (train):  29%|██▉       | 12704/43598 [00:01<00:02, 12615.09 examples/s]Normalizing raw HH preferences (train):  31%|███▏      | 13715/43598 [00:01<00:02, 12467.82 examples/s]Normalizing raw HH preferences (train):  32%|███▏      | 13976/43598 [00:01<00:02, 12691.52 examples/s]Normalizing raw HH preferences (train):  31%|███▏      | 13691/43598 [00:01<00:02, 12478.07 examples/s]Normalizing raw HH preferences (train):  32%|███▏      | 13983/43598 [00:01<00:02, 12654.79 examples/s]Normalizing raw HH preferences (train):  31%|███▏      | 13684/43598 [00:01<00:02, 12415.05 examples/s]Normalizing raw HH preferences (train):  33%|███▎      | 14527/43598 [00:01<00:02, 12495.63 examples/s]Normalizing raw HH preferences (train):  32%|███▏      | 13761/43598 [00:01<00:02, 12562.67 examples/s]Normalizing raw HH preferences (train):  31%|███▏      | 13703/43598 [00:01<00:02, 12450.01 examples/s]Normalizing raw HH preferences (train):  34%|███▍      | 14995/43598 [00:01<00:02, 12548.27 examples/s]Normalizing raw HH preferences (train):  34%|███▍      | 14970/43598 [00:01<00:02, 12560.46 examples/s]Normalizing raw HH preferences (train):  36%|███▋      | 15875/43598 [00:01<00:02, 12676.01 examples/s]Normalizing raw HH preferences (train):  34%|███▍      | 14956/43598 [00:01<00:02, 12487.60 examples/s]Normalizing raw HH preferences (train):  36%|███▋      | 15834/43598 [00:01<00:02, 12645.49 examples/s]Normalizing raw HH preferences (train):  34%|███▍      | 14976/43598 [00:01<00:02, 12518.37 examples/s]Normalizing raw HH preferences (train):  36%|███▋      | 15889/43598 [00:01<00:02, 12670.54 examples/s]Normalizing raw HH preferences (train):  36%|███▌      | 15704/43598 [00:01<00:02, 12580.64 examples/s]Normalizing raw HH preferences (train):  39%|███▊      | 16877/43598 [00:01<00:02, 12544.86 examples/s]Normalizing raw HH preferences (train):  39%|███▊      | 16849/43598 [00:01<00:02, 12544.75 examples/s]Normalizing raw HH preferences (train):  41%|████      | 17746/43598 [00:01<00:02, 12603.43 examples/s]Normalizing raw HH preferences (train):  39%|███▊      | 16822/43598 [00:01<00:02, 12467.93 examples/s]Normalizing raw HH preferences (train):  41%|████      | 17715/43598 [00:01<00:02, 12603.96 examples/s]Normalizing raw HH preferences (train):  39%|███▊      | 16848/43598 [00:01<00:02, 12503.24 examples/s]Normalizing raw HH preferences (train):  39%|███▉      | 16994/43598 [00:01<00:02, 12654.46 examples/s]Normalizing raw HH preferences (train):  41%|████      | 17765/43598 [00:01<00:02, 12613.59 examples/s]Normalizing raw HH preferences (train):  43%|████▎     | 18702/43598 [00:01<00:02, 12413.42 examples/s]Normalizing raw HH preferences (train):  43%|████▎     | 18689/43598 [00:01<00:02, 12446.70 examples/s]Normalizing raw HH preferences (train):  45%|████▍     | 19585/43598 [00:01<00:01, 12484.55 examples/s]Normalizing raw HH preferences (train):  45%|████▍     | 19566/43598 [00:01<00:01, 12511.08 examples/s]Normalizing raw HH preferences (train):  43%|████▎     | 18672/43598 [00:01<00:02, 12377.51 examples/s]Normalizing raw HH preferences (train):  43%|████▎     | 18679/43598 [00:01<00:02, 12403.68 examples/s]Normalizing raw HH preferences (train):  43%|████▎     | 18851/43598 [00:01<00:01, 12560.05 examples/s]Normalizing raw HH preferences (train):  46%|████▌     | 19970/43598 [00:01<00:01, 12475.61 examples/s]Normalizing raw HH preferences (train):  45%|████▌     | 19707/43598 [00:01<00:01, 12565.12 examples/s]Normalizing raw HH preferences (train):  46%|████▌     | 19959/43598 [00:01<00:01, 12506.03 examples/s]Normalizing raw HH preferences (train):  46%|████▌     | 19938/43598 [00:01<00:01, 12443.14 examples/s]Normalizing raw HH preferences (train):  46%|████▌     | 19952/43598 [00:01<00:01, 12478.40 examples/s]Normalizing raw HH preferences (train):  48%|████▊     | 21000/43598 [00:01<00:02, 9221.06 examples/s] Normalizing raw HH preferences (train):  47%|████▋     | 20364/43598 [00:01<00:02, 9284.15 examples/s] Normalizing raw HH preferences (train):  48%|████▊     | 21000/43598 [00:01<00:02, 9226.63 examples/s] Normalizing raw HH preferences (train):  50%|████▉     | 21696/43598 [00:01<00:02, 9278.67 examples/s] Normalizing raw HH preferences (train):  51%|█████     | 22287/43598 [00:01<00:02, 9941.96 examples/s]Normalizing raw HH preferences (train):  50%|████▉     | 21692/43598 [00:01<00:02, 9495.20 examples/s] Normalizing raw HH preferences (train):  48%|████▊     | 21000/43598 [00:01<00:02, 7707.51 examples/s] Normalizing raw HH preferences (train):  50%|████▉     | 21685/43598 [00:01<00:02, 9939.41 examples/s]Normalizing raw HH preferences (train):  51%|█████     | 22293/43598 [00:01<00:02, 9944.62 examples/s]Normalizing raw HH preferences (train):  50%|████▉     | 21694/43598 [00:01<00:02, 9093.83 examples/s] Normalizing raw HH preferences (train):  53%|█████▎    | 22957/43598 [00:01<00:02, 9935.83 examples/s]Normalizing raw HH preferences (train):  54%|█████▍    | 23581/43598 [00:02<00:01, 10601.27 examples/s]Normalizing raw HH preferences (train):  50%|████▉     | 21694/43598 [00:02<00:02, 7748.23 examples/s] Normalizing raw HH preferences (train):  53%|█████▎    | 22962/43598 [00:01<00:02, 10136.53 examples/s]Normalizing raw HH preferences (train):  51%|█████     | 22287/43598 [00:02<00:02, 8592.60 examples/s]Normalizing raw HH preferences (train):  53%|█████▎    | 22965/43598 [00:01<00:01, 10560.63 examples/s]Normalizing raw HH preferences (train):  54%|█████▍    | 23597/43598 [00:02<00:01, 10615.44 examples/s]Normalizing raw HH preferences (train):  53%|█████▎    | 22971/43598 [00:02<00:02, 9807.52 examples/s]Normalizing raw HH preferences (train):  55%|█████▌    | 24124/43598 [00:02<00:01, 10316.84 examples/s]Normalizing raw HH preferences (train):  57%|█████▋    | 24854/43598 [00:02<00:01, 11108.01 examples/s]Normalizing raw HH preferences (train):  53%|█████▎    | 22960/43598 [00:02<00:02, 8597.63 examples/s]Normalizing raw HH preferences (train):  55%|█████▌    | 24133/43598 [00:02<00:01, 10485.12 examples/s]Normalizing raw HH preferences (train):  54%|█████▍    | 23572/43598 [00:02<00:02, 9430.78 examples/s]Normalizing raw HH preferences (train):  55%|█████▌    | 24144/43598 [00:02<00:01, 10848.35 examples/s]Normalizing raw HH preferences (train):  57%|█████▋    | 24869/43598 [00:02<00:01, 11112.67 examples/s]Normalizing raw HH preferences (train):  55%|█████▌    | 24139/43598 [00:02<00:01, 10212.41 examples/s]Normalizing raw HH preferences (train):  58%|█████▊    | 25389/43598 [00:02<00:01, 10871.82 examples/s]Normalizing raw HH preferences (train):  55%|█████▌    | 24130/43598 [00:02<00:02, 9212.58 examples/s]Normalizing raw HH preferences (train):  58%|█████▊    | 25412/43598 [00:02<00:01, 11040.26 examples/s]Normalizing raw HH preferences (train):  57%|█████▋    | 24863/43598 [00:02<00:01, 10194.00 examples/s]Normalizing raw HH preferences (train):  58%|█████▊    | 25426/43598 [00:02<00:01, 11342.98 examples/s]Normalizing raw HH preferences (train):  61%|██████▏   | 26715/43598 [00:02<00:01, 11547.52 examples/s]Normalizing raw HH preferences (train):  58%|█████▊    | 25424/43598 [00:02<00:01, 10835.41 examples/s]Normalizing raw HH preferences (train):  61%|██████    | 26691/43598 [00:02<00:01, 11331.99 examples/s]Normalizing raw HH preferences (train):  61%|██████▏   | 26733/43598 [00:02<00:01, 11551.82 examples/s]Normalizing raw HH preferences (train):  58%|█████▊    | 25406/43598 [00:02<00:01, 9994.61 examples/s]Normalizing raw HH preferences (train):  61%|██████    | 26688/43598 [00:02<00:01, 11442.11 examples/s]Normalizing raw HH preferences (train):  61%|██████    | 26697/43598 [00:02<00:01, 11701.58 examples/s]Normalizing raw HH preferences (train):  64%|██████▍   | 27984/43598 [00:02<00:01, 11820.71 examples/s]Normalizing raw HH preferences (train):  61%|██████    | 26691/43598 [00:02<00:01, 11291.51 examples/s]Normalizing raw HH preferences (train):  64%|██████▍   | 27949/43598 [00:02<00:01, 11661.22 examples/s]Normalizing raw HH preferences (train):  61%|██████▏   | 26737/43598 [00:02<00:01, 10925.08 examples/s]Normalizing raw HH preferences (train):  64%|██████▍   | 27995/43598 [00:02<00:01, 11808.61 examples/s]Normalizing raw HH preferences (train):  61%|██████    | 26690/43598 [00:02<00:01, 10634.94 examples/s]Normalizing raw HH preferences (train):  64%|██████▍   | 27941/43598 [00:02<00:01, 11731.47 examples/s]Normalizing raw HH preferences (train):  64%|██████▍   | 27957/43598 [00:02<00:01, 11944.68 examples/s]Normalizing raw HH preferences (train):  64%|██████▍   | 27948/43598 [00:02<00:01, 11630.15 examples/s]Normalizing raw HH preferences (train):  64%|██████▍   | 28000/43598 [00:02<00:01, 11137.06 examples/s]Normalizing raw HH preferences (train):  69%|██████▊   | 29878/43598 [00:02<00:01, 12096.32 examples/s]Normalizing raw HH preferences (train):  64%|██████▍   | 27950/43598 [00:02<00:01, 11132.58 examples/s]Normalizing raw HH preferences (train):  68%|██████▊   | 29830/43598 [00:02<00:01, 11977.77 examples/s]Normalizing raw HH preferences (train):  69%|██████▊   | 29895/43598 [00:02<00:01, 12100.48 examples/s]Normalizing raw HH preferences (train):  68%|██████▊   | 29825/43598 [00:02<00:01, 12032.78 examples/s]Normalizing raw HH preferences (train):  67%|██████▋   | 29309/43598 [00:02<00:01, 11616.32 examples/s]Normalizing raw HH preferences (train):  68%|██████▊   | 29841/43598 [00:02<00:01, 12167.96 examples/s]Normalizing raw HH preferences (train):  68%|██████▊   | 29836/43598 [00:02<00:01, 11974.21 examples/s]Normalizing raw HH preferences (train):  73%|███████▎  | 31776/43598 [00:02<00:00, 12274.17 examples/s]Normalizing raw HH preferences (train):  68%|██████▊   | 29833/43598 [00:02<00:01, 11631.59 examples/s]Normalizing raw HH preferences (train):  73%|███████▎  | 31708/43598 [00:02<00:00, 12161.13 examples/s]Normalizing raw HH preferences (train):  70%|███████   | 30712/43598 [00:02<00:01, 12021.59 examples/s]Normalizing raw HH preferences (train):  73%|███████▎  | 31794/43598 [00:02<00:00, 12280.85 examples/s]Normalizing raw HH preferences (train):  73%|███████▎  | 31701/43598 [00:02<00:00, 12192.12 examples/s]Normalizing raw HH preferences (train):  73%|███████▎  | 31727/43598 [00:02<00:00, 12306.64 examples/s]Normalizing raw HH preferences (train):  73%|███████▎  | 31720/43598 [00:02<00:00, 12169.90 examples/s]Normalizing raw HH preferences (train):  76%|███████▌  | 32988/43598 [00:02<00:00, 12314.35 examples/s]Normalizing raw HH preferences (train):  73%|███████▎  | 32000/43598 [00:02<00:00, 12026.16 examples/s]Normalizing raw HH preferences (train):  77%|███████▋  | 33690/43598 [00:02<00:00, 12347.21 examples/s]Normalizing raw HH preferences (train):  73%|███████▎  | 31721/43598 [00:02<00:00, 11949.12 examples/s]Normalizing raw HH preferences (train):  76%|███████▌  | 32974/43598 [00:02<00:00, 12321.59 examples/s]Normalizing raw HH preferences (train):  76%|███████▌  | 32997/43598 [00:02<00:00, 12400.38 examples/s]Normalizing raw HH preferences (train):  77%|███████▋  | 33693/43598 [00:02<00:00, 12361.71 examples/s]Normalizing raw HH preferences (train):  76%|███████▌  | 32998/43598 [00:02<00:00, 12318.96 examples/s]Normalizing raw HH preferences (train):  76%|███████▋  | 33291/43598 [00:02<00:00, 12266.45 examples/s]Normalizing raw HH preferences (train):  80%|████████  | 34967/43598 [00:02<00:00, 12443.56 examples/s]Normalizing raw HH preferences (train):  76%|███████▌  | 33000/43598 [00:02<00:00, 11939.87 examples/s]Normalizing raw HH preferences (train):  80%|███████▉  | 34850/43598 [00:02<00:00, 12344.85 examples/s]Normalizing raw HH preferences (train):  80%|████████  | 34979/43598 [00:02<00:00, 12474.85 examples/s]Normalizing raw HH preferences (train):  80%|███████▉  | 34833/43598 [00:02<00:00, 12343.93 examples/s]Normalizing raw HH preferences (train):  80%|███████▉  | 34871/43598 [00:02<00:00, 12428.49 examples/s]Normalizing raw HH preferences (train):  79%|███████▉  | 34585/43598 [00:03<00:00, 12453.78 examples/s]Normalizing raw HH preferences (train):  80%|███████▉  | 34861/43598 [00:02<00:00, 12349.69 examples/s]Normalizing raw HH preferences (train):  79%|███████▊  | 34269/43598 [00:03<00:00, 12126.84 examples/s]Normalizing raw HH preferences (train):  84%|████████▍ | 36826/43598 [00:03<00:00, 12423.96 examples/s]Normalizing raw HH preferences (train):  84%|████████▍ | 36698/43598 [00:03<00:00, 12313.94 examples/s]Normalizing raw HH preferences (train):  82%|████████▏ | 35863/43598 [00:03<00:00, 12542.32 examples/s]Normalizing raw HH preferences (train):  84%|████████▍ | 36840/43598 [00:03<00:00, 12447.75 examples/s]Normalizing raw HH preferences (train):  82%|████████▏ | 35546/43598 [00:03<00:00, 12293.37 examples/s]Normalizing raw HH preferences (train):  84%|████████▍ | 36688/43598 [00:03<00:00, 12276.47 examples/s]Normalizing raw HH preferences (train):  84%|████████▍ | 36717/43598 [00:03<00:00, 12385.74 examples/s]Normalizing raw HH preferences (train):  84%|████████▍ | 36698/43598 [00:03<00:00, 12301.49 examples/s]Normalizing raw HH preferences (train):  87%|████████▋ | 37960/43598 [00:03<00:00, 12384.55 examples/s]Normalizing raw HH preferences (train):  89%|████████▉ | 38704/43598 [00:03<00:00, 12421.08 examples/s]Normalizing raw HH preferences (train):  84%|████████▍ | 36798/43598 [00:03<00:00, 12353.76 examples/s]Normalizing raw HH preferences (train):  87%|████████▋ | 37949/43598 [00:03<00:00, 12355.03 examples/s]Normalizing raw HH preferences (train):  87%|████████▋ | 37978/43598 [00:03<00:00, 12436.89 examples/s]Normalizing raw HH preferences (train):  87%|████████▋ | 37735/43598 [00:03<00:00, 12515.09 examples/s]Normalizing raw HH preferences (train):  89%|████████▉ | 38712/43598 [00:03<00:00, 12455.25 examples/s]Normalizing raw HH preferences (train):  87%|████████▋ | 37955/43598 [00:03<00:00, 12365.45 examples/s]Normalizing raw HH preferences (train):  92%|█████████▏| 39977/43598 [00:03<00:00, 12490.93 examples/s]Normalizing raw HH preferences (train):  91%|█████████▏| 39816/43598 [00:03<00:00, 12378.21 examples/s]Normalizing raw HH preferences (train):  89%|████████▉ | 39000/43598 [00:03<00:00, 12335.14 examples/s]Normalizing raw HH preferences (train):  92%|█████████▏| 39987/43598 [00:03<00:00, 12523.03 examples/s]Normalizing raw HH preferences (train):  89%|████████▉ | 38701/43598 [00:03<00:00, 12393.58 examples/s]Normalizing raw HH preferences (train):  91%|█████████▏| 39789/43598 [00:03<00:00, 12322.39 examples/s]Normalizing raw HH preferences (train):  91%|█████████▏| 39835/43598 [00:03<00:00, 12415.72 examples/s]Normalizing raw HH preferences (train):  91%|█████████▏| 39807/43598 [00:03<00:00, 12355.65 examples/s]Normalizing raw HH preferences (train):  92%|█████████▏| 40291/43598 [00:03<00:00, 12489.12 examples/s]Normalizing raw HH preferences (train):  96%|█████████▌| 41853/43598 [00:03<00:00, 12490.53 examples/s]Normalizing raw HH preferences (train):  92%|█████████▏| 39965/43598 [00:03<00:00, 12457.25 examples/s]Normalizing raw HH preferences (train):  96%|█████████▌| 41699/43598 [00:03<00:00, 12402.03 examples/s]Normalizing raw HH preferences (train):  96%|█████████▌| 41868/43598 [00:03<00:00, 12527.42 examples/s]Normalizing raw HH preferences (train):  96%|█████████▌| 41702/43598 [00:03<00:00, 12366.89 examples/s]Normalizing raw HH preferences (train):  96%|█████████▌| 41713/43598 [00:03<00:00, 12445.59 examples/s]Normalizing raw HH preferences (train):  95%|█████████▌| 41596/43598 [00:03<00:00, 12644.38 examples/s]Normalizing raw HH preferences (train):  96%|█████████▌| 41701/43598 [00:03<00:00, 12398.51 examples/s]Normalizing raw HH preferences (train):  99%|█████████▊| 42964/43598 [00:03<00:00, 12459.48 examples/s]Normalizing raw HH preferences (train):  96%|█████████▌| 41829/43598 [00:03<00:00, 12443.51 examples/s]Normalizing raw HH preferences (train):  99%|█████████▊| 42958/43598 [00:03<00:00, 12410.99 examples/s]Normalizing raw HH preferences (train):  99%|█████████▊| 42982/43598 [00:03<00:00, 12502.24 examples/s]Normalizing raw HH preferences (train):  98%|█████████▊| 42871/43598 [00:03<00:00, 12672.72 examples/s]Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 11006.83 examples/s]Normalizing raw HH preferences (train):  99%|█████████▊| 42964/43598 [00:03<00:00, 12451.89 examples/s]Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 9696.86 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 9249.74 examples/s] Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10761.19 examples/s]
+Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10670.57 examples/s]
+Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10739.50 examples/s]
+Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10703.26 examples/s]
+Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10714.52 examples/s]
+Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10685.42 examples/s]
+Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10547.11 examples/s]
+Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10475.58 examples/s]
+No config specified, defaulting to the single config: hh-rlhf/default
+2026-04-10 13:38:30 - INFO - datasets.builder - No config specified, defaulting to the single config: hh-rlhf/default
+Using custom data configuration default-cfba128a0ab1b99f
+2026-04-10 13:38:30 - INFO - datasets.builder - Using custom data configuration default-cfba128a0ab1b99f
+Loading Dataset Infos from /home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/datasets/packaged_modules/json
+2026-04-10 13:38:30 - INFO - datasets.info - Loading Dataset Infos from /home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/datasets/packaged_modules/json
+Overwrite dataset info from restored data version if exists.
+2026-04-10 13:38:30 - INFO - datasets.builder - Overwrite dataset info from restored data version if exists.
+Loading Dataset info from /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa
+2026-04-10 13:38:30 - INFO - datasets.info - Loading Dataset info from /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa
+Found cached dataset hh-rlhf (/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa)
+2026-04-10 13:38:30 - INFO - datasets.builder - Found cached dataset hh-rlhf (/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa)
+Loading Dataset info from /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa
+2026-04-10 13:38:30 - INFO - datasets.info - Loading Dataset info from /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa
+2026-04-10 13:38:30 - WARNING - alignment.data - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+2026-04-10 13:38:30 - WARNING - alignment.data - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (test):   0%|          | 0/2339 [00:00<?, ? examples/s]Normalizing raw HH preferences (test):   0%|          | 0/2339 [00:00<?, ? examples/s]2026-04-10 13:38:30 - WARNING - alignment.data - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (test):   0%|          | 0/2339 [00:00<?, ? examples/s]2026-04-10 13:38:31 - WARNING - alignment.data - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (test):   0%|          | 0/2339 [00:00<?, ? examples/s]2026-04-10 13:38:31 - WARNING - alignment.data - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (test):   0%|          | 0/2339 [00:00<?, ? examples/s]2026-04-10 13:38:31 - WARNING - alignment.data - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (test):   0%|          | 0/2339 [00:00<?, ? examples/s]Normalizing raw HH preferences (test):  49%|████▉     | 1144/2339 [00:00<00:00, 11391.80 examples/s]2026-04-10 13:38:31 - WARNING - alignment.data - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (test):  50%|████▉     | 1168/2339 [00:00<00:00, 11635.35 examples/s]Normalizing raw HH preferences (test):   0%|          | 0/2339 [00:00<?, ? examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-fa6f4b7acba8a3e1.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-fa6f4b7acba8a3e1.arrow
+Normalizing raw HH preferences (test):  51%|█████▏    | 1199/2339 [00:00<00:00, 11943.03 examples/s]2026-04-10 13:38:31 - WARNING - alignment.data - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
+Normalizing raw HH preferences (test):   0%|          | 0/2339 [00:00<?, ? examples/s]Normalizing raw HH preferences (test):  50%|████▉     | 1163/2339 [00:00<00:00, 11586.53 examples/s]Normalizing raw HH preferences (test):  49%|████▉     | 1157/2339 [00:00<00:00, 11519.39 examples/s]Normalizing raw HH preferences (test):  51%|█████     | 1188/2339 [00:00<00:00, 11831.46 examples/s]Normalizing raw HH preferences (test):  48%|████▊     | 1134/2339 [00:00<00:00, 11289.81 examples/s]Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11436.57 examples/s]Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11265.03 examples/s]Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10462.08 examples/s]
+Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10523.37 examples/s]
+Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10435.30 examples/s]
+Normalizing raw HH preferences (test):  50%|████▉     | 1165/2339 [00:00<00:00, 11601.36 examples/s]Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11198.28 examples/s]Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11425.53 examples/s]Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 9436.64 examples/s] 
+Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11051.63 examples/s]Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10304.93 examples/s]
+Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 9847.18 examples/s] 
+Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10092.37 examples/s]
+Loading cached shuffled indices for dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-be0876dd0add1b31.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Loading cached shuffled indices for dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-be0876dd0add1b31.arrow
+Loading cached shuffled indices for dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-40e942b49dfd026a.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Loading cached shuffled indices for dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-40e942b49dfd026a.arrow
+2026-04-10 13:38:31 - INFO - __main__ - Training on the following datasets and their proportions: ['train : 43598', 'test : 2339']
+[INFO|tokenization_utils_base.py:2058] 2026-04-10 13:38:31,308 >> loading file tokenizer.json
+[INFO|tokenization_utils_base.py:2058] 2026-04-10 13:38:31,308 >> loading file tokenizer.model
+[INFO|tokenization_utils_base.py:2058] 2026-04-10 13:38:31,308 >> loading file added_tokens.json
+[INFO|tokenization_utils_base.py:2058] 2026-04-10 13:38:31,308 >> loading file special_tokens_map.json
+[INFO|tokenization_utils_base.py:2058] 2026-04-10 13:38:31,308 >> loading file tokenizer_config.json
+[INFO|tokenization_utils_base.py:2058] 2026-04-10 13:38:31,308 >> loading file chat_template.jinja
+Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11399.82 examples/s]Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 9899.47 examples/s] 
+[INFO|tokenization_utils_base.py:2323] 2026-04-10 13:38:31,605 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
+2026-04-10 13:38:31 - INFO - __main__ - *** Load pretrained model ***
+Process #0 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00000_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #0 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00000_of_00012.arrow
+Process #1 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00001_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #1 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00001_of_00012.arrow
+Process #2 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00002_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #2 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00002_of_00012.arrow
+Process #3 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00003_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #3 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00003_of_00012.arrow
+Process #4 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00004_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #4 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00004_of_00012.arrow
+Process #5 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00005_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #5 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00005_of_00012.arrow
+Process #6 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00006_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #6 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00006_of_00012.arrow
+Process #7 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00007_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #7 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00007_of_00012.arrow
+Process #8 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00008_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #8 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00008_of_00012.arrow
+Process #9 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00009_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #9 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00009_of_00012.arrow
+Process #10 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00010_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #10 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00010_of_00012.arrow
+Process #11 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00011_of_00012.arrow
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Process #11 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00011_of_00012.arrow
+Applying chat template (num_proc=12):   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/43598 [00:00<?, ? examples/s]Spawning 12 processes
+2026-04-10 13:38:31 - INFO - datasets.arrow_dataset - Spawning 12 processes
+Applying chat template (num_proc=12):   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/43598 [00:00<?, ? examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00000_of_00012.arrow
+2026-04-10 13:38:32 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00000_of_00012.arrow
+Applying chat template (num_proc=12):   0%|          | 58/43598 [00:00<10:22, 69.96 examples/s]Applying chat template (num_proc=12):   0%|          | 96/43598 [00:00<06:36, 109.72 examples/s]Applying chat template (num_proc=12):   0%|          | 110/43598 [00:00<05:42, 126.83 examples/s]Applying chat template (num_proc=12):   0%|          | 1/43598 [00:00<10:29:27,  1.15 examples/s]Applying chat template (num_proc=12):   0%|          | 68/43598 [00:00<09:03, 80.07 examples/s]Applying chat template (num_proc=12):   0%|          | 1/43598 [00:00<10:36:21,  1.14 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00001_of_00012.arrow
+2026-04-10 13:38:32 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00001_of_00012.arrow
+Applying chat template (num_proc=12):   0%|          | 22/43598 [00:01<33:02, 21.98 examples/s]Applying chat template (num_proc=12):   1%|          | 271/43598 [00:01<02:11, 329.03 examples/s]Applying chat template (num_proc=12):   0%|          | 63/43598 [00:01<12:01, 60.32 examples/s]Applying chat template (num_proc=12):   1%|          | 341/43598 [00:01<01:51, 389.39 examples/s]Applying chat template (num_proc=12):   1%|          | 423/43598 [00:01<01:32, 467.71 examples/s]Applying chat template (num_proc=12):   0%|          | 111/43598 [00:01<05:56, 121.83 examples/s]Applying chat template (num_proc=12):   0%|          | 126/43598 [00:01<05:17, 136.88 examples/s]Applying chat template (num_proc=12):   1%|          | 340/43598 [00:01<02:04, 348.56 examples/s]Applying chat template (num_proc=12):   1%|▏         | 639/43598 [00:01<01:07, 635.93 examples/s]Applying chat template (num_proc=12):   3%|▎         | 1336/43598 [00:01<00:28, 1502.30 examples/s]Applying chat template (num_proc=12):   2%|▏         | 936/43598 [00:01<00:43, 985.53 examples/s]Applying chat template (num_proc=12):   3%|▎         | 1160/43598 [00:01<00:40, 1053.34 examples/s]Applying chat template (num_proc=12):   3%|▎         | 1328/43598 [00:01<00:33, 1274.29 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00002_of_00012.arrow
+2026-04-10 13:38:33 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00002_of_00012.arrow
+Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00003_of_00012.arrow
+2026-04-10 13:38:33 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00003_of_00012.arrow
+Applying chat template (num_proc=12):   4%|▍         | 1897/43598 [00:01<00:23, 1812.25 examples/s]Applying chat template (num_proc=12):   5%|▍         | 2132/43598 [00:01<00:20, 2056.49 examples/s]Applying chat template (num_proc=12):   4%|▍         | 1682/43598 [00:01<00:26, 1572.64 examples/s]Applying chat template (num_proc=12):   3%|▎         | 1404/43598 [00:01<00:34, 1206.43 examples/s]Applying chat template (num_proc=12):   5%|▍         | 2000/43598 [00:01<00:25, 1608.13 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00004_of_00012.arrow
+2026-04-10 13:38:33 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00004_of_00012.arrow
+Applying chat template (num_proc=12):   7%|▋         | 3034/43598 [00:01<00:16, 2464.02 examples/s]Applying chat template (num_proc=12):   9%|▊         | 3756/43598 [00:01<00:11, 3607.12 examples/s]Applying chat template (num_proc=12):   7%|▋         | 3138/43598 [00:01<00:17, 2364.14 examples/s]Applying chat template (num_proc=12):   7%|▋         | 3175/43598 [00:01<00:15, 2533.82 examples/s]Applying chat template (num_proc=12):  16%|█▌        | 6929/43598 [00:01<00:06, 6072.45 examples/s]Applying chat template (num_proc=12):   9%|▉         | 3974/43598 [00:01<00:13, 3024.26 examples/s]Applying chat template (num_proc=12):  11%|█         | 4882/43598 [00:02<00:11, 3448.70 examples/s]Applying chat template (num_proc=12):  11%|█▏        | 5011/43598 [00:02<00:10, 3649.16 examples/s]Applying chat template (num_proc=12):  14%|█▍        | 6111/43598 [00:02<00:07, 4691.78 examples/s]Applying chat template (num_proc=12):  12%|█▏        | 5074/43598 [00:02<00:11, 3388.25 examples/s]Applying chat template (num_proc=12):  15%|█▌        | 6690/43598 [00:02<00:07, 4636.13 examples/s]Applying chat template (num_proc=12):  20%|██        | 8885/43598 [00:02<00:05, 6255.74 examples/s]Applying chat template (num_proc=12):  15%|█▌        | 6584/43598 [00:02<00:08, 4164.55 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00005_of_00012.arrow
+2026-04-10 13:38:34 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00005_of_00012.arrow
+Applying chat template (num_proc=12):  15%|█▍        | 6388/43598 [00:02<00:09, 3886.84 examples/s]Applying chat template (num_proc=12):  20%|██        | 8917/43598 [00:02<00:06, 5772.69 examples/s]Applying chat template (num_proc=12):  19%|█▉        | 8494/43598 [00:02<00:06, 5206.20 examples/s]Applying chat template (num_proc=12):  22%|██▏       | 9454/43598 [00:02<00:05, 6219.38 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00007_of_00012.arrow
+2026-04-10 13:38:34 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00007_of_00012.arrow
+Applying chat template (num_proc=12):  20%|██        | 8861/43598 [00:02<00:07, 4781.99 examples/s]Applying chat template (num_proc=12):  25%|██▍       | 10835/43598 [00:02<00:05, 5835.92 examples/s]Applying chat template (num_proc=12):  28%|██▊       | 12210/43598 [00:02<00:05, 6209.45 examples/s]Applying chat template (num_proc=12):  26%|██▌       | 11150/43598 [00:02<00:05, 5792.92 examples/s]Applying chat template (num_proc=12):  30%|███       | 13215/43598 [00:02<00:03, 7807.57 examples/s]Applying chat template (num_proc=12):  24%|██▎       | 10325/43598 [00:02<00:06, 5072.15 examples/s]Applying chat template (num_proc=12):  27%|██▋       | 11591/43598 [00:02<00:05, 5964.63 examples/s]Applying chat template (num_proc=12):  40%|███▉      | 17299/43598 [00:03<00:02, 11494.16 examples/s]Applying chat template (num_proc=12):  22%|██▏       | 9566/43598 [00:03<00:07, 4478.78 examples/s]Applying chat template (num_proc=12):  44%|████▎     | 19028/43598 [00:03<00:02, 11740.53 examples/s]Applying chat template (num_proc=12):  32%|███▏      | 14155/43598 [00:03<00:04, 6910.36 examples/s]Applying chat template (num_proc=12):  36%|███▋      | 15881/43598 [00:03<00:03, 7572.24 examples/s]Applying chat template (num_proc=12):  38%|███▊      | 16683/43598 [00:03<00:03, 7826.60 examples/s]Applying chat template (num_proc=12):  33%|███▎      | 14271/43598 [00:03<00:04, 6423.94 examples/s]Applying chat template (num_proc=12):  47%|████▋     | 20604/43598 [00:03<00:01, 11668.41 examples/s]Applying chat template (num_proc=12):  46%|████▋     | 20221/43598 [00:03<00:02, 10514.44 examples/s]Applying chat template (num_proc=12):  31%|███▏      | 13644/43598 [00:03<00:05, 5478.94 examples/s]Applying chat template (num_proc=12):  30%|███       | 13166/43598 [00:03<00:05, 5267.21 examples/s]Applying chat template (num_proc=12):  51%|█████     | 22105/43598 [00:03<00:01, 11594.96 examples/s]Applying chat template (num_proc=12):  31%|███▏      | 13638/43598 [00:03<00:04, 6201.69 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00008_of_00012.arrow
+2026-04-10 13:38:35 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00008_of_00012.arrow
+Applying chat template (num_proc=12):  50%|█████     | 21872/43598 [00:03<00:02, 10527.31 examples/s]Applying chat template (num_proc=12):  54%|█████▍    | 23494/43598 [00:03<00:01, 11825.06 examples/s]Applying chat template (num_proc=12):  38%|███▊      | 16627/43598 [00:03<00:03, 6787.25 examples/s]Applying chat template (num_proc=12):  46%|████▌     | 19986/43598 [00:03<00:02, 8480.70 examples/s]Applying chat template (num_proc=12):  54%|█████▎    | 23402/43598 [00:03<00:01, 10456.88 examples/s]Applying chat template (num_proc=12):  41%|████      | 17813/43598 [00:03<00:03, 7374.30 examples/s]Applying chat template (num_proc=12):  57%|█████▋    | 24887/43598 [00:03<00:01, 11217.58 examples/s]Applying chat template (num_proc=12):  45%|████▌     | 19707/43598 [00:03<00:02, 9024.21 examples/s]Applying chat template (num_proc=12):  54%|█████▎    | 23420/43598 [00:03<00:01, 10943.35 examples/s]Applying chat template (num_proc=12):  52%|█████▏    | 22878/43598 [00:03<00:01, 11239.22 examples/s]Applying chat template (num_proc=12):  37%|███▋      | 16232/43598 [00:03<00:04, 5794.39 examples/s]Applying chat template (num_proc=12):  57%|█████▋    | 24769/43598 [00:03<00:01, 10055.79 examples/s]Applying chat template (num_proc=12):  60%|█████▉    | 26150/43598 [00:03<00:01, 10032.17 examples/s]Applying chat template (num_proc=12):  48%|████▊     | 21141/43598 [00:03<00:02, 8962.91 examples/s]Applying chat template (num_proc=12):  41%|████      | 17862/43598 [00:03<00:03, 6932.09 examples/s]Applying chat template (num_proc=12):  48%|████▊     | 20840/43598 [00:03<00:02, 9676.78 examples/s]Applying chat template (num_proc=12):  57%|█████▋    | 25025/43598 [00:03<00:01, 11659.65 examples/s]Applying chat template (num_proc=12):  58%|█████▊    | 25146/43598 [00:03<00:01, 10302.42 examples/s]Applying chat template (num_proc=12):  39%|███▊      | 16816/43598 [00:03<00:04, 6321.53 examples/s]Applying chat template (num_proc=12):  63%|██████▎   | 27257/43598 [00:03<00:01, 10248.85 examples/s]Applying chat template (num_proc=12):  60%|█████▉    | 25942/43598 [00:03<00:01, 9630.70 examples/s] Applying chat template (num_proc=12):  50%|████▉     | 21721/43598 [00:03<00:02, 9630.16 examples/s]Applying chat template (num_proc=12):  51%|█████     | 22298/43598 [00:03<00:02, 8862.11 examples/s]Applying chat template (num_proc=12):  62%|██████▏   | 26915/43598 [00:04<00:01, 12464.24 examples/s]Applying chat template (num_proc=12):  51%|█████     | 22299/43598 [00:03<00:02, 10541.44 examples/s]Applying chat template (num_proc=12):  66%|██████▌   | 28698/43598 [00:04<00:01, 10939.34 examples/s]Applying chat template (num_proc=12):  62%|██████▏   | 27153/43598 [00:04<00:01, 9946.67 examples/s]Applying chat template (num_proc=12):  52%|█████▏    | 22774/43598 [00:04<00:02, 9669.92 examples/s]Applying chat template (num_proc=12):  54%|█████▍    | 23508/43598 [00:04<00:02, 9379.12 examples/s]Applying chat template (num_proc=12):  61%|██████    | 26660/43598 [00:04<00:01, 9705.48 examples/s] Applying chat template (num_proc=12):  70%|██████▉   | 30385/43598 [00:04<00:01, 12378.53 examples/s]Applying chat template (num_proc=12):  65%|██████▍   | 28305/43598 [00:04<00:01, 10014.05 examples/s]Applying chat template (num_proc=12):  57%|█████▋    | 24752/43598 [00:04<00:01, 9942.93 examples/s]Applying chat template (num_proc=12):  66%|██████▌   | 28846/43598 [00:04<00:01, 12160.33 examples/s]Applying chat template (num_proc=12):  53%|█████▎    | 23288/43598 [00:04<00:02, 9088.05 examples/s]Applying chat template (num_proc=12):  56%|█████▌    | 24320/43598 [00:04<00:01, 10827.59 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00006_of_00012.arrow
+2026-04-10 13:38:36 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00006_of_00012.arrow
+Applying chat template (num_proc=12):  68%|██████▊   | 29586/43598 [00:04<00:01, 10600.97 examples/s]Applying chat template (num_proc=12):  73%|███████▎  | 31809/43598 [00:04<00:00, 12299.03 examples/s]Applying chat template (num_proc=12):  59%|█████▉    | 25932/43598 [00:04<00:01, 10321.00 examples/s]Applying chat template (num_proc=12):  56%|█████▌    | 24460/43598 [00:04<00:02, 9343.77 examples/s]Applying chat template (num_proc=12):  70%|██████▉   | 30492/43598 [00:04<00:01, 11951.60 examples/s]Applying chat template (num_proc=12):  56%|█████▋    | 24530/43598 [00:04<00:02, 8945.02 examples/s]Applying chat template (num_proc=12):  71%|███████   | 30974/43598 [00:04<00:01, 11394.34 examples/s]Applying chat template (num_proc=12):  76%|███████▋  | 33296/43598 [00:04<00:00, 12722.97 examples/s]Applying chat template (num_proc=12):  63%|██████▎   | 27458/43598 [00:04<00:01, 11437.26 examples/s]Applying chat template (num_proc=12):  64%|██████▍   | 28000/43598 [00:04<00:01, 8115.65 examples/s]Applying chat template (num_proc=12):  60%|██████    | 26175/43598 [00:04<00:01, 10421.90 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00011_of_00012.arrow
+2026-04-10 13:38:36 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00011_of_00012.arrow
+Applying chat template (num_proc=12):  59%|█████▉    | 25888/43598 [00:04<00:01, 9300.35 examples/s]Applying chat template (num_proc=12):  60%|█████▉    | 25967/43598 [00:04<00:01, 9650.55 examples/s]Applying chat template (num_proc=12):  73%|███████▎  | 31990/43598 [00:04<00:00, 11757.08 examples/s]Applying chat template (num_proc=12):  80%|███████▉  | 34871/43598 [00:04<00:00, 13532.65 examples/s]Applying chat template (num_proc=12):  64%|██████▎   | 27764/43598 [00:04<00:01, 11018.15 examples/s]Applying chat template (num_proc=12):  67%|██████▋   | 29148/43598 [00:04<00:01, 8150.13 examples/s]Applying chat template (num_proc=12):  62%|██████▏   | 27193/43598 [00:04<00:01, 9834.58 examples/s]Applying chat template (num_proc=12):  63%|██████▎   | 27398/43598 [00:04<00:01, 10436.72 examples/s]Applying chat template (num_proc=12):  66%|██████▌   | 28758/43598 [00:04<00:01, 10214.11 examples/s]Applying chat template (num_proc=12):  74%|███████▍  | 32194/43598 [00:04<00:01, 9340.31 examples/s] Applying chat template (num_proc=12):  84%|████████▍ | 36603/43598 [00:04<00:00, 14524.60 examples/s]Applying chat template (num_proc=12):  77%|███████▋  | 33520/43598 [00:04<00:00, 12103.69 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00010_of_00012.arrow
+2026-04-10 13:38:36 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00010_of_00012.arrow
+Applying chat template (num_proc=12):  67%|██████▋   | 29330/43598 [00:04<00:01, 11728.15 examples/s]Applying chat template (num_proc=12):  70%|██████▉   | 30321/43598 [00:04<00:01, 8658.42 examples/s]Applying chat template (num_proc=12):  66%|██████▋   | 28983/43598 [00:04<00:01, 11312.58 examples/s]Applying chat template (num_proc=12):  69%|██████▉   | 30083/43598 [00:04<00:01, 10917.47 examples/s]Applying chat template (num_proc=12):  76%|███████▋  | 33316/43598 [00:04<00:01, 9765.48 examples/s]Applying chat template (num_proc=12):  66%|██████▌   | 28761/43598 [00:04<00:01, 10387.48 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00009_of_00012.arrow
+2026-04-10 13:38:36 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d3917bc8eb716f92_00009_of_00012.arrow
+Applying chat template (num_proc=12):  80%|████████  | 34987/43598 [00:04<00:00, 12548.56 examples/s]Applying chat template (num_proc=12):  88%|████████▊ | 38170/43598 [00:04<00:00, 14261.91 examples/s]Applying chat template (num_proc=12):  71%|███████   | 31046/43598 [00:04<00:00, 12754.63 examples/s]Applying chat template (num_proc=12):  73%|███████▎  | 32030/43598 [00:04<00:01, 10111.71 examples/s]Applying chat template (num_proc=12):  73%|███████▎  | 31850/43598 [00:04<00:00, 12607.74 examples/s]Applying chat template (num_proc=12):  71%|███████   | 30784/43598 [00:04<00:01, 12490.47 examples/s]Applying chat template (num_proc=12):  80%|████████  | 34994/43598 [00:04<00:00, 11485.50 examples/s]Applying chat template (num_proc=12):  69%|██████▉   | 30247/43598 [00:04<00:01, 11309.21 examples/s]Applying chat template (num_proc=12):  84%|████████▍ | 36566/43598 [00:04<00:00, 12788.74 examples/s]Applying chat template (num_proc=12):  91%|█████████ | 39760/43598 [00:04<00:00, 14243.55 examples/s]Applying chat template (num_proc=12):  78%|███████▊  | 33944/43598 [00:04<00:00, 11965.67 examples/s]Applying chat template (num_proc=12):  77%|███████▋  | 33764/43598 [00:04<00:00, 14287.66 examples/s]Applying chat template (num_proc=12):  75%|███████▍  | 32504/43598 [00:04<00:00, 13556.72 examples/s]Applying chat template (num_proc=12):  84%|████████▍ | 36705/43598 [00:04<00:00, 12941.80 examples/s]Applying chat template (num_proc=12):  75%|███████▌  | 32844/43598 [00:04<00:00, 12320.60 examples/s]Applying chat template (num_proc=12):  73%|███████▎  | 31669/43598 [00:04<00:00, 11969.45 examples/s]Applying chat template (num_proc=12):  88%|████████▊ | 38259/43598 [00:04<00:00, 13780.85 examples/s]Applying chat template (num_proc=12):  95%|█████████▍| 41341/43598 [00:04<00:00, 13324.22 examples/s]Applying chat template (num_proc=12):  82%|████████▏ | 35916/43598 [00:04<00:00, 16239.63 examples/s]Applying chat template (num_proc=12):  79%|███████▉  | 34651/43598 [00:04<00:00, 15478.99 examples/s]Applying chat template (num_proc=12):  82%|████████▏ | 35780/43598 [00:04<00:00, 13158.84 examples/s]Applying chat template (num_proc=12):  89%|████████▊ | 38631/43598 [00:04<00:00, 14482.70 examples/s]Applying chat template (num_proc=12):  79%|███████▉  | 34638/43598 [00:04<00:00, 13509.56 examples/s]Applying chat template (num_proc=12):  77%|███████▋  | 33417/43598 [00:04<00:00, 13320.45 examples/s]Applying chat template (num_proc=12):  92%|█████████▏| 39966/43598 [00:05<00:00, 13115.48 examples/s]Applying chat template (num_proc=12):  86%|████████▋ | 37694/43598 [00:05<00:00, 16495.38 examples/s]Applying chat template (num_proc=12):  84%|████████▍ | 36751/43598 [00:04<00:00, 16895.70 examples/s]Applying chat template (num_proc=12):  86%|████████▋ | 37683/43598 [00:05<00:00, 14563.40 examples/s]Applying chat template (num_proc=12):  98%|█████████▊| 42841/43598 [00:05<00:00, 13275.00 examples/s]Applying chat template (num_proc=12):  82%|████████▏ | 35653/43598 [00:05<00:00, 15667.27 examples/s]Applying chat template (num_proc=12):  93%|█████████▎| 40340/43598 [00:05<00:00, 14080.38 examples/s]Applying chat template (num_proc=12):  84%|████████▎ | 36495/43598 [00:05<00:00, 13657.11 examples/s]Applying chat template (num_proc=12):  95%|█████████▍| 41360/43598 [00:05<00:00, 13319.28 examples/s]Applying chat template (num_proc=12):  91%|█████████ | 39682/43598 [00:05<00:00, 17443.41 examples/s]Applying chat template (num_proc=12):  91%|█████████ | 39751/43598 [00:05<00:00, 15692.25 examples/s]Applying chat template (num_proc=12):  88%|████████▊ | 38177/43598 [00:05<00:00, 18246.25 examples/s]Applying chat template (num_proc=12):  89%|████████▉ | 38717/43598 [00:05<00:00, 16352.15 examples/s]Applying chat template (num_proc=12):  87%|████████▋ | 38075/43598 [00:05<00:00, 14160.09 examples/s]Applying chat template (num_proc=12):  97%|█████████▋| 42364/43598 [00:05<00:00, 15140.93 examples/s]Applying chat template (num_proc=12):  96%|█████████▌| 41788/43598 [00:05<00:00, 16494.29 examples/s]Applying chat template (num_proc=12):  94%|█████████▎| 40825/43598 [00:05<00:00, 17275.08 examples/s]Applying chat template (num_proc=12):  92%|█████████▏| 40166/43598 [00:05<00:00, 17975.81 examples/s]Applying chat template (num_proc=12):  96%|█████████▌| 41702/43598 [00:05<00:00, 15936.46 examples/s]Applying chat template (num_proc=12):  91%|█████████ | 39705/43598 [00:05<00:00, 14457.72 examples/s]Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfs6b1b54126f0c5c7f000017c8'
+Applying chat template (num_proc=12):  99%|█████████▊| 42965/43598 [00:05<00:00, 11330.73 examples/s]Applying chat template (num_proc=12): 100%|██████████| 43598/43598 [00:05<00:00, 8156.47 examples/s] 
+Applying chat template (num_proc=12):  95%|█████████▌| 41512/43598 [00:05<00:00, 15401.02 examples/s]Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfs41589db42cf35be2000017c9'
+Applying chat template (num_proc=12): 100%|█████████▉| 43418/43598 [00:05<00:00, 15552.63 examples/s]Applying chat template (num_proc=12): 100%|██████████| 43598/43598 [00:05<00:00, 15012.52 examples/s]Applying chat template (num_proc=12): 100%|██████████| 43598/43598 [00:05<00:00, 8050.64 examples/s] 
+Applying chat template (num_proc=12):  98%|█████████▊| 42662/43598 [00:05<00:00, 15738.77 examples/s]Applying chat template (num_proc=12):  97%|█████████▋| 42145/43598 [00:05<00:00, 16212.44 examples/s]Applying chat template (num_proc=12):  99%|█████████▉| 43148/43598 [00:05<00:00, 15370.57 examples/s]Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfs48dd7446b3148e06000017d4'
+Applying chat template (num_proc=12): 100%|██████████| 43598/43598 [00:05<00:00, 7823.71 examples/s] 
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfsce245982b5ea6739000017d7'
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfsba63fcbf067574be000017d8'
+Applying chat template (num_proc=12): 100%|██████████| 43598/43598 [00:05<00:00, 7759.10 examples/s] 
+Applying chat template (num_proc=12): 100%|██████████| 43598/43598 [00:05<00:00, 7742.02 examples/s] 
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfse90b0255ee86dfd4000017d9'
+Concatenating 12 shards
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Concatenating 12 shards
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfs5e49ad14b0fadbef000017db'
+Applying chat template (num_proc=12): 100%|██████████| 43598/43598 [00:05<00:00, 7727.41 examples/s] 
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfsa53626dc662ef5e4000017da'
+Applying chat template (num_proc=12): 100%|██████████| 43598/43598 [00:05<00:00, 7683.55 examples/s] 
+Applying chat template (num_proc=12): 100%|██████████| 43598/43598 [00:05<00:00, 7672.16 examples/s] 
+Applying chat template (num_proc=12):   0%|          | 0/2339 [00:00<?, ? examples/s]Process #0 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00000_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #0 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00000_of_00012.arrow
+Process #1 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00001_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #1 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00001_of_00012.arrow
+Process #2 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00002_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #2 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00002_of_00012.arrow
+Process #3 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00003_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #3 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00003_of_00012.arrow
+Process #4 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00004_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #4 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00004_of_00012.arrow
+Process #5 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00005_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #5 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00005_of_00012.arrow
+Process #6 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00006_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #6 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00006_of_00012.arrow
+Process #7 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00007_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #7 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00007_of_00012.arrow
+Process #8 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00008_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #8 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00008_of_00012.arrow
+Process #9 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00009_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #9 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00009_of_00012.arrow
+Process #10 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00010_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #10 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00010_of_00012.arrow
+Process #11 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00011_of_00012.arrow
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Process #11 will write at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00011_of_00012.arrow
+Applying chat template (num_proc=12):   0%|          | 0/2339 [00:00<?, ? examples/s]Spawning 12 processes
+2026-04-10 13:38:37 - INFO - datasets.arrow_dataset - Spawning 12 processes
+Applying chat template (num_proc=12):   0%|          | 0/2339 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/2339 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/2339 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/2339 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/2339 [00:00<?, ? examples/s]Applying chat template (num_proc=12):   0%|          | 0/2339 [00:00<?, ? examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00000_of_00012.arrow
+2026-04-10 13:38:38 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00000_of_00012.arrow
+Applying chat template (num_proc=12):   8%|▊         | 195/2339 [00:00<00:08, 261.03 examples/s]Applying chat template (num_proc=12):   5%|▌         | 125/2339 [00:00<00:15, 146.41 examples/s]Applying chat template (num_proc=12):  13%|█▎        | 303/2339 [00:00<00:06, 330.98 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00001_of_00012.arrow
+2026-04-10 13:38:38 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00001_of_00012.arrow
+Applying chat template (num_proc=12):   5%|▌         | 121/2339 [00:00<00:17, 126.59 examples/s]Applying chat template (num_proc=12):   0%|          | 9/2339 [00:01<04:36,  8.42 examples/s]Applying chat template (num_proc=12):   8%|▊         | 196/2339 [00:01<00:10, 201.88 examples/s]Applying chat template (num_proc=12):   6%|▌         | 136/2339 [00:00<00:14, 153.07 examples/s]Applying chat template (num_proc=12):   3%|▎         | 66/2339 [00:01<00:34, 65.30 examples/s]Applying chat template (num_proc=12):  17%|█▋        | 401/2339 [00:01<00:05, 359.35 examples/s]Applying chat template (num_proc=12):   3%|▎         | 61/2339 [00:00<00:34, 66.45 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00002_of_00012.arrow
+2026-04-10 13:38:38 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00002_of_00012.arrow
+Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00003_of_00012.arrow
+Applying chat template (num_proc=12):   3%|▎         | 80/2339 [00:01<00:29, 75.97 examples/s]2026-04-10 13:38:38 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00003_of_00012.arrow
+Applying chat template (num_proc=12):  17%|█▋        | 391/2339 [00:01<00:04, 396.62 examples/s]Applying chat template (num_proc=12):  12%|█▏        | 291/2339 [00:01<00:07, 274.80 examples/s]Applying chat template (num_proc=12):   8%|▊         | 195/2339 [00:01<00:12, 178.08 examples/s]Applying chat template (num_proc=12):  16%|█▋        | 381/2339 [00:01<00:04, 397.10 examples/s]Applying chat template (num_proc=12):  17%|█▋        | 391/2339 [00:01<00:05, 374.20 examples/s]Applying chat template (num_proc=12):  28%|██▊       | 651/2339 [00:01<00:02, 565.18 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00004_of_00012.arrow
+2026-04-10 13:38:39 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00004_of_00012.arrow
+Applying chat template (num_proc=12):  12%|█▏        | 292/2339 [00:01<00:07, 283.62 examples/s]Applying chat template (num_proc=12):  28%|██▊       | 644/2339 [00:01<00:02, 631.55 examples/s]Applying chat template (num_proc=12):  23%|██▎       | 531/2339 [00:01<00:03, 488.41 examples/s]Applying chat template (num_proc=12):  11%|█         | 261/2339 [00:01<00:08, 231.21 examples/s]Applying chat template (num_proc=12):  40%|███▉      | 932/2339 [00:01<00:01, 769.38 examples/s]Applying chat template (num_proc=12):  28%|██▊       | 644/2339 [00:01<00:03, 521.57 examples/s]Applying chat template (num_proc=12):  30%|██▉       | 694/2339 [00:01<00:02, 592.36 examples/s]Applying chat template (num_proc=12):  19%|█▊        | 436/2339 [00:01<00:05, 359.51 examples/s]Applying chat template (num_proc=12):  22%|██▏       | 522/2339 [00:01<00:03, 458.77 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00005_of_00012.arrow
+2026-04-10 13:38:39 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00005_of_00012.arrow
+Applying chat template (num_proc=12):  33%|███▎      | 780/2339 [00:01<00:02, 648.31 examples/s]Applying chat template (num_proc=12):  47%|████▋     | 1106/2339 [00:01<00:01, 714.44 examples/s]Applying chat template (num_proc=12):  36%|███▌      | 832/2339 [00:01<00:02, 570.07 examples/s]Applying chat template (num_proc=12):  42%|████▏     | 976/2339 [00:01<00:01, 690.96 examples/s]Applying chat template (num_proc=12):  17%|█▋        | 404/2339 [00:01<00:06, 279.84 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00006_of_00012.arrow
+2026-04-10 13:38:39 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00006_of_00012.arrow
+Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00007_of_00012.arrow
+2026-04-10 13:38:39 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00007_of_00012.arrow
+Applying chat template (num_proc=12):  42%|████▏     | 975/2339 [00:01<00:02, 627.34 examples/s]Applying chat template (num_proc=12):  52%|█████▏    | 1225/2339 [00:02<00:01, 795.32 examples/s]Applying chat template (num_proc=12):  28%|██▊       | 645/2339 [00:01<00:04, 397.79 examples/s]Applying chat template (num_proc=12):  50%|█████     | 1170/2339 [00:02<00:01, 769.33 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00008_of_00012.arrow
+2026-04-10 13:38:39 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00008_of_00012.arrow
+Applying chat template (num_proc=12):  58%|█████▊    | 1365/2339 [00:02<00:01, 715.72 examples/s]Applying chat template (num_proc=12):  36%|███▋      | 850/2339 [00:02<00:03, 426.58 examples/s]Applying chat template (num_proc=12):  69%|██████▉   | 1625/2339 [00:02<00:00, 1204.88 examples/s]Applying chat template (num_proc=12):  27%|██▋       | 623/2339 [00:02<00:05, 330.00 examples/s]Applying chat template (num_proc=12):  58%|█████▊    | 1365/2339 [00:02<00:01, 948.15 examples/s]Applying chat template (num_proc=12):  35%|███▍      | 811/2339 [00:02<00:02, 518.56 examples/s]Applying chat template (num_proc=12):  47%|████▋     | 1105/2339 [00:02<00:01, 751.58 examples/s]Applying chat template (num_proc=12):  65%|██████▌   | 1525/2339 [00:02<00:00, 863.44 examples/s]Applying chat template (num_proc=12):  42%|████▏     | 975/2339 [00:02<00:02, 582.98 examples/s]Applying chat template (num_proc=12):  37%|███▋      | 858/2339 [00:02<00:03, 455.59 examples/s]Applying chat template (num_proc=12):  53%|█████▎    | 1242/2339 [00:02<00:01, 714.89 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00009_of_00012.arrow
+2026-04-10 13:38:40 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00009_of_00012.arrow
+Applying chat template (num_proc=12):  78%|███████▊  | 1819/2339 [00:02<00:00, 871.93 examples/s] Applying chat template (num_proc=12):  58%|█████▊    | 1365/2339 [00:02<00:01, 580.88 examples/s]Applying chat template (num_proc=12):  66%|██████▌   | 1535/2339 [00:02<00:01, 568.22 examples/s]Applying chat template (num_proc=12):  58%|█████▊    | 1352/2339 [00:02<00:01, 869.57 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00010_of_00012.arrow
+2026-04-10 13:38:40 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00010_of_00012.arrow
+Applying chat template (num_proc=12):  53%|█████▎    | 1235/2339 [00:02<00:02, 537.65 examples/s]Applying chat template (num_proc=12):  89%|████████▉ | 2079/2339 [00:02<00:00, 1056.37 examples/s]Applying chat template (num_proc=12):  58%|█████▊    | 1365/2339 [00:02<00:01, 684.50 examples/s]Applying chat template (num_proc=12):  48%|████▊     | 1128/2339 [00:02<00:02, 593.72 examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00011_of_00012.arrow
+2026-04-10 13:38:40 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-0f820217b8a8b27e_00011_of_00012.arrow
+Applying chat template (num_proc=12):  75%|███████▌  | 1755/2339 [00:02<00:00, 1202.39 examples/s]Applying chat template (num_proc=12):  83%|████████▎ | 1950/2339 [00:02<00:00, 969.14 examples/s]Applying chat template (num_proc=12):  83%|████████▎ | 1950/2339 [00:02<00:00, 1037.60 examples/s]Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfsc7f9bea9a34682f900001814'
+Applying chat template (num_proc=12):  67%|██████▋   | 1560/2339 [00:02<00:00, 812.93 examples/s]Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:02<00:00, 790.02 examples/s] 
+Applying chat template (num_proc=12):  65%|██████▍   | 1519/2339 [00:03<00:01, 673.72 examples/s]Concatenating 12 shards
+2026-04-10 13:38:40 - INFO - datasets.arrow_dataset - Concatenating 12 shards
+Filter:   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12):  83%|████████▎ | 1948/2339 [00:02<00:00, 1174.18 examples/s]Applying chat template (num_proc=12):  74%|███████▎  | 1724/2339 [00:03<00:00, 768.94 examples/s]Applying chat template (num_proc=12):  92%|█████████▏| 2145/2339 [00:03<00:00, 970.15 examples/s] Applying chat template (num_proc=12):  90%|████████▉ | 2094/2339 [00:03<00:00, 849.83 examples/s]Applying chat template (num_proc=12):  83%|████████▎ | 1950/2339 [00:02<00:00, 1028.68 examples/s]Applying chat template (num_proc=12):  88%|████████▊ | 2052/2339 [00:03<00:00, 768.81 examples/s]Applying chat template (num_proc=12):  58%|█████▊    | 1366/2339 [00:02<00:01, 595.06 examples/s]Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 1046.21 examples/s]Applying chat template (num_proc=12):  92%|█████████▏| 2145/2339 [00:03<00:00, 1140.39 examples/s]Applying chat template (num_proc=12):  83%|████████▎ | 1950/2339 [00:03<00:00, 857.46 examples/s]Applying chat template (num_proc=12):  92%|█████████▏| 2145/2339 [00:03<00:00, 1083.43 examples/s]Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfsf04c0444c06ed9dc00001822'
+Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 966.32 examples/s]Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 718.19 examples/s] 
+Filter:   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12):  83%|████████▎ | 1950/2339 [00:03<00:00, 1060.92 examples/s]Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfs83e9dfb0602b1a1800001826'
+Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 696.12 examples/s]
+Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 1201.82 examples/s]Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 1124.18 examples/s]Filter:   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12):  99%|█████████▉| 2324/2339 [00:03<00:00, 789.78 examples/s]Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+Traceback (most recent call last):
+SystemExit: 0
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfs88e529554f7d28b80000182a'
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfs41bb7f3dcce740cd0000182b'
+Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 1100.59 examples/s]Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 694.22 examples/s] 
+Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 694.19 examples/s] 
+Applying chat template (num_proc=12):  92%|█████████▏| 2145/2339 [00:03<00:00, 1118.95 examples/s]Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfs52010f168a9739e20000182e'
+Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 633.80 examples/s]
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfsf45fad7f528b56430000182f'
+Filter:   0%|          | 0/43598 [00:00<?, ? examples/s]Filter:   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 635.95 examples/s] 
+Filter:   0%|          | 0/43598 [00:00<?, ? examples/s]Filter:   0%|          | 0/43598 [00:00<?, ? examples/s]Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 1062.70 examples/s]Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
+    self.run()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
+    self._target(*self._args, **self._kwargs)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
+    server.serve_forever()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
+    sys.exit(0)
+SystemExit: 0
+
+During handling of the above exception, another exception occurred:
+
+Traceback (most recent call last):
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
+    finalizer()
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
+    res = self._callback(*self._args, **self._kwargs)
+          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
+    rmtree(tempdir)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
+    _rmtree_safe_fd(fd, path, onerror)
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
+    onerror(os.unlink, fullname, sys.exc_info())
+  File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
+    os.unlink(entry.name, dir_fd=topfd)
+OSError: [Errno 16] Device or resource busy: '.nfsfc85ae0694b60e5100001831'
+Applying chat template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 642.64 examples/s] 
+Filter:   0%|          | 0/43598 [00:00<?, ? examples/s]Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d924339979854155.arrow
+2026-04-10 13:38:50 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-d924339979854155.arrow
+Filter:  23%|██▎       | 10000/43598 [00:09<00:31, 1060.64 examples/s]Filter:  23%|██▎       | 10000/43598 [00:09<00:30, 1101.67 examples/s]Filter:  23%|██▎       | 10000/43598 [00:09<00:30, 1085.72 examples/s]Filter:  23%|██▎       | 10000/43598 [00:09<00:31, 1056.15 examples/s]Filter:  23%|██▎       | 10000/43598 [00:09<00:31, 1077.72 examples/s]Filter:  23%|██▎       | 10000/43598 [00:09<00:31, 1060.72 examples/s]Filter:  23%|██▎       | 10000/43598 [00:09<00:31, 1071.19 examples/s]Filter:  23%|██▎       | 10000/43598 [00:09<00:31, 1075.25 examples/s]Filter:  46%|████▌     | 20000/43598 [00:18<00:22, 1071.34 examples/s]Filter:  46%|████▌     | 20000/43598 [00:18<00:21, 1106.41 examples/s]Filter:  46%|████▌     | 20000/43598 [00:18<00:21, 1104.77 examples/s]Filter:  46%|████▌     | 20000/43598 [00:18<00:21, 1091.13 examples/s]Filter:  46%|████▌     | 20000/43598 [00:18<00:21, 1082.12 examples/s]Filter:  46%|████▌     | 20000/43598 [00:18<00:21, 1076.15 examples/s]Filter:  46%|████▌     | 20000/43598 [00:18<00:21, 1084.44 examples/s]Filter:  46%|████▌     | 20000/43598 [00:18<00:22, 1070.42 examples/s]Filter:  69%|██████▉   | 30000/43598 [00:27<00:12, 1100.97 examples/s]Filter:  69%|██████▉   | 30000/43598 [00:27<00:12, 1098.03 examples/s]Filter:  69%|██████▉   | 30000/43598 [00:27<00:12, 1074.07 examples/s]Filter:  69%|██████▉   | 30000/43598 [00:27<00:12, 1083.51 examples/s]Filter:  69%|██████▉   | 30000/43598 [00:27<00:12, 1088.19 examples/s]Filter:  69%|██████▉   | 30000/43598 [00:27<00:12, 1074.76 examples/s]Filter:  69%|██████▉   | 30000/43598 [00:27<00:12, 1084.57 examples/s]Filter:  69%|██████▉   | 30000/43598 [00:27<00:12, 1072.17 examples/s]Filter:  92%|█████████▏| 40000/43598 [00:36<00:03, 1099.29 examples/s]Filter:  92%|█████████▏| 40000/43598 [00:37<00:03, 1083.81 examples/s]Filter:  92%|█████████▏| 40000/43598 [00:36<00:03, 1092.73 examples/s]Filter:  92%|█████████▏| 40000/43598 [00:36<00:03, 1092.84 examples/s]Filter:  92%|█████████▏| 40000/43598 [00:36<00:03, 1088.25 examples/s]Filter:  92%|█████████▏| 40000/43598 [00:36<00:03, 1084.25 examples/s]Filter:  92%|█████████▏| 40000/43598 [00:36<00:03, 1091.77 examples/s]Filter:  92%|█████████▏| 40000/43598 [00:37<00:03, 1079.73 examples/s]Filter: 100%|██████████| 43598/43598 [00:39<00:00, 1098.72 examples/s]Filter: 100%|██████████| 43598/43598 [00:40<00:00, 1087.31 examples/s]Filter: 100%|██████████| 43598/43598 [00:39<00:00, 1098.90 examples/s]
+Filter:   0%|          | 0/2339 [00:00<?, ? examples/s]Filter: 100%|██████████| 43598/43598 [00:39<00:00, 1097.25 examples/s]Filter: 100%|██████████| 43598/43598 [00:40<00:00, 1079.87 examples/s]
+Filter:   0%|          | 0/2339 [00:00<?, ? examples/s]Filter: 100%|██████████| 43598/43598 [00:40<00:00, 1089.45 examples/s]
+Filter:   0%|          | 0/2339 [00:00<?, ? examples/s]Filter: 100%|██████████| 43598/43598 [00:39<00:00, 1097.41 examples/s]Filter: 100%|██████████| 43598/43598 [00:39<00:00, 1090.10 examples/s]
+Filter:   0%|          | 0/2339 [00:00<?, ? examples/s]Filter: 100%|██████████| 43598/43598 [00:39<00:00, 1088.43 examples/s]Filter: 100%|██████████| 43598/43598 [00:39<00:00, 1091.34 examples/s]
+Filter:   0%|          | 0/2339 [00:00<?, ? examples/s]Filter: 100%|██████████| 43598/43598 [00:40<00:00, 1088.80 examples/s]Filter: 100%|██████████| 43598/43598 [00:40<00:00, 1082.77 examples/s]
+Filter:   0%|          | 0/2339 [00:00<?, ? examples/s]Filter: 100%|██████████| 43598/43598 [00:39<00:00, 1096.15 examples/s]Filter: 100%|██████████| 43598/43598 [00:40<00:00, 1089.78 examples/s]
+Filter:   0%|          | 0/2339 [00:00<?, ? examples/s]Filter: 100%|██████████| 43598/43598 [00:40<00:00, 1086.32 examples/s]Filter: 100%|██████████| 43598/43598 [00:40<00:00, 1078.74 examples/s]
+Filter:   0%|          | 0/2339 [00:00<?, ? examples/s]Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1111.72 examples/s]Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1081.44 examples/s]Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1106.62 examples/s]
+Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1077.69 examples/s]
+Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-45f8b5bf4d93c54f.arrow
+2026-04-10 13:39:23 - INFO - datasets.arrow_dataset - Caching processed dataset at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/Anthropic___hh-rlhf/default-cfba128a0ab1b99f/0.0.0/09be8c5bbc57cb3887f3a9732ad6aa7ec602a1fa/cache-45f8b5bf4d93c54f.arrow
+Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1086.30 examples/s]Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1080.21 examples/s]
+2026-04-10 13:39:23 - INFO - __main__ - Decontaminated 0 (0.00%) samples from the training set.
+2026-04-10 13:39:23 - INFO - __main__ - Sample 41905 of the processed training set:
+
+<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+
+What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+What are your ideas?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|>
+2026-04-10 13:39:23 - INFO - __main__ - Sample 7296 of the processed training set:
+
+<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+
+How can I get blood out of cotton?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+Blood? What do you mean?<|eot_id|>
+2026-04-10 13:39:23 - INFO - __main__ - Sample 1639 of the processed training set:
+
+<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+
+I'm trying to get my friend to open up<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+Sounds great!  What have you tried so far?<|eot_id|><|start_header_id|>user<|end_header_id|>
+
+I've tried to give my friend a safe space to open up about their feelings but they still aren't comfortable<|eot_id|><|start_header_id|>assistant<|end_header_id|>
+
+It’s really important that people feel safe sharing their feelings, but I totally hear that this isn’t easy for them.  And in my experience, that has a lot to do with them feeling that you can accept them and what they’re saying, even if they’re feeling difficult or uncomfortable feelings.  Do you think you’ve done that, and that they still haven’t opened up?<|eot_id|>
+Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1093.93 examples/s]Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1088.80 examples/s]
+Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1092.40 examples/s]Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1083.66 examples/s]
+Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1074.30 examples/s]Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1068.17 examples/s]
+Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1076.92 examples/s]Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1072.00 examples/s]
+Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1107.86 examples/s]Filter: 100%|██████████| 2339/2339 [00:02<00:00, 1103.67 examples/s]
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': model_init_kwargs, dataset_text_field, max_seq_length, packing. Will not be supported from version '1.0.0'.
+
+Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
+  warnings.warn(message, FutureWarning)
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': model_init_kwargs, dataset_text_field, max_seq_length, packing. Will not be supported from version '1.0.0'.
+
+Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
+  warnings.warn(message, FutureWarning)
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': model_init_kwargs, dataset_text_field, max_seq_length, packing. Will not be supported from version '1.0.0'.
+
+Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
+  warnings.warn(message, FutureWarning)
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': model_init_kwargs, dataset_text_field, max_seq_length, packing. Will not be supported from version '1.0.0'.
+
+Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
+  warnings.warn(message, FutureWarning)
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': model_init_kwargs, dataset_text_field, max_seq_length, packing. Will not be supported from version '1.0.0'.
+
+Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
+  warnings.warn(message, FutureWarning)
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': model_init_kwargs, dataset_text_field, max_seq_length, packing. Will not be supported from version '1.0.0'.
+
+Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
+  warnings.warn(message, FutureWarning)
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': model_init_kwargs, dataset_text_field, max_seq_length, packing. Will not be supported from version '1.0.0'.
+
+Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
+  warnings.warn(message, FutureWarning)
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/huggingface_hub/utils/_deprecation.py:100: FutureWarning: Deprecated argument(s) used in '__init__': model_init_kwargs, dataset_text_field, max_seq_length, packing. Will not be supported from version '1.0.0'.
+
+Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
+  warnings.warn(message, FutureWarning)
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:158: UserWarning: You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:158: UserWarning: You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:185: UserWarning: You passed a model_id to the SFTTrainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:158: UserWarning: You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:185: UserWarning: You passed a model_id to the SFTTrainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:158: UserWarning: You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:185: UserWarning: You passed a model_id to the SFTTrainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:185: UserWarning: You passed a model_id to the SFTTrainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:158: UserWarning: You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:185: UserWarning: You passed a model_id to the SFTTrainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:158: UserWarning: You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:158: UserWarning: You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:185: UserWarning: You passed a model_id to the SFTTrainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:185: UserWarning: You passed a model_id to the SFTTrainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:158: UserWarning: You passed `model_init_kwargs` to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:185: UserWarning: You passed a model_id to the SFTTrainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
+  warnings.warn(
+[INFO|configuration_utils.py:691] 2026-04-10 13:39:26,149 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/base_models/Meta-Llama-3-8B/config.json
+[INFO|configuration_utils.py:765] 2026-04-10 13:39:26,151 >> Model config LlamaConfig {
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": 128001,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 8192,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 500000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": false,
+  "vocab_size": 128256
+}
+
+[INFO|modeling_utils.py:1121] 2026-04-10 13:39:26,165 >> loading weights file /scratch/feng.yulu/dynamic-dpo-v4/base_models/Meta-Llama-3-8B/model.safetensors.index.json
+[WARNING|logging.py:328] 2026-04-10 13:39:26,166 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
+[INFO|modeling_utils.py:2167] 2026-04-10 13:39:26,166 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
+[WARNING|logging.py:328] 2026-04-10 13:39:26,166 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
+[WARNING|logging.py:328] 2026-04-10 13:39:26,166 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
+[WARNING|logging.py:328] 2026-04-10 13:39:26,166 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
+[WARNING|logging.py:328] 2026-04-10 13:39:26,169 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
+[WARNING|logging.py:328] 2026-04-10 13:39:26,170 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
+[WARNING|logging.py:328] 2026-04-10 13:39:26,170 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
+[WARNING|logging.py:328] 2026-04-10 13:39:26,170 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
+[INFO|configuration_utils.py:1142] 2026-04-10 13:39:26,171 >> Generate config GenerationConfig {
+  "bos_token_id": 128000,
+  "eos_token_id": 128001,
+  "use_cache": false
+}
+
+Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 463.37it/s]
+Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 473.40it/s]
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:195: UserWarning: You passed a `packing` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:321: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:195: UserWarning: You passed a `packing` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:321: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 464.76it/s]
+Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 471.85it/s]
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:195: UserWarning: You passed a `packing` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:321: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:195: UserWarning: You passed a `packing` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:321: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 349.00it/s]
+Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 349.98it/s]
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:195: UserWarning: You passed a `packing` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:321: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:195: UserWarning: You passed a `packing` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:321: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 360.32it/s]
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:195: UserWarning: You passed a `packing` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:321: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 82.44it/s]
+[INFO|modeling_utils.py:4926] 2026-04-10 13:39:26,261 >> All model checkpoint weights were used when initializing LlamaForCausalLM.
+
+[INFO|modeling_utils.py:4934] 2026-04-10 13:39:26,262 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /scratch/feng.yulu/dynamic-dpo-v4/base_models/Meta-Llama-3-8B.
+If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
+[INFO|configuration_utils.py:1095] 2026-04-10 13:39:26,264 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/base_models/Meta-Llama-3-8B/generation_config.json
+[INFO|configuration_utils.py:1142] 2026-04-10 13:39:26,265 >> Generate config GenerationConfig {
+  "bos_token_id": 128000,
+  "do_sample": true,
+  "eos_token_id": 128001,
+  "max_length": 4096,
+  "temperature": 0.6,
+  "top_p": 0.9
+}
+
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:195: UserWarning: You passed a `packing` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:283: UserWarning: You passed a `max_seq_length` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:321: UserWarning: You passed a `dataset_text_field` argument to the SFTTrainer, the value you passed will override the one in the `SFTConfig`.
+  warnings.warn(
+Using custom data configuration default-39b52f6e03e85a82
+2026-04-10 13:39:26 - INFO - datasets.builder - Using custom data configuration default-39b52f6e03e85a82
+Loading Dataset Infos from /home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/datasets/packaged_modules/generator
+2026-04-10 13:39:26 - INFO - datasets.info - Loading Dataset Infos from /home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/datasets/packaged_modules/generator
+Generating dataset generator (/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-39b52f6e03e85a82/0.0.0)
+2026-04-10 13:39:26 - INFO - datasets.builder - Generating dataset generator (/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-39b52f6e03e85a82/0.0.0)
+Downloading and preparing dataset generator/default to /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-39b52f6e03e85a82/0.0.0...
+2026-04-10 13:39:26 - INFO - datasets.builder - Downloading and preparing dataset generator/default to /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-39b52f6e03e85a82/0.0.0...
+Generating train split
+2026-04-10 13:39:26 - INFO - datasets.builder - Generating train split
+Generating train split: 0 examples [00:00, ? examples/s]Generating train split: 1 examples [00:00,  1.74 examples/s]Generating train split: 705 examples [00:01, 599.40 examples/s]Generating train split: 1413 examples [00:01, 818.34 examples/s]Generating train split: 2120 examples [00:02, 927.30 examples/s]Generating train split: 2825 examples [00:03, 999.25 examples/s]Generating train split: 3530 examples [00:03, 1022.46 examples/s]Generating train split: 4236 examples [00:04, 1046.23 examples/s]Generating train split: 4943 examples [00:05, 1075.23 examples/s]Generating train split: 5650 examples [00:05, 1079.58 examples/s]Generating train split: 6358 examples [00:06, 1008.34 examples/s]Generating train split: 7061 examples [00:07, 1030.47 examples/s]Generating train split: 7767 examples [00:07, 1054.48 examples/s]Generating train split: 8476 examples [00:08, 1064.89 examples/s]Generating train split: 9186 examples [00:09, 1074.60 examples/s]Generating train split: 9895 examples [00:09, 1095.85 examples/s]Generating train split: 10604 examples [00:10, 1094.68 examples/s]Generating train split: 11310 examples [00:11, 1092.87 examples/s]Generating train split: 12000 examples [00:11, 1364.84 examples/s]Generating train split: 12723 examples [00:12, 974.19 examples/s] Generating train split: 13430 examples [00:13, 1005.80 examples/s]Generating train split: 14136 examples [00:13, 1032.77 examples/s]Generating train split: 14842 examples [00:14, 1063.97 examples/s]Generating train split: 15550 examples [00:15, 1071.50 examples/s]Generating train split: 16260 examples [00:15, 1300.26 examples/s]Generating train split: 16516 examples [00:15, 1064.79 examples/s]
+Unable to verify splits sizes.
+2026-04-10 13:39:41 - INFO - datasets.utils.info_utils - Unable to verify splits sizes.
+Dataset generator downloaded and prepared to /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-39b52f6e03e85a82/0.0.0. Subsequent calls will reuse this data.
+2026-04-10 13:39:41 - INFO - datasets.builder - Dataset generator downloaded and prepared to /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-39b52f6e03e85a82/0.0.0. Subsequent calls will reuse this data.
+Using custom data configuration default-1519231937de8df3
+2026-04-10 13:39:42 - INFO - datasets.builder - Using custom data configuration default-1519231937de8df3
+Loading Dataset Infos from /home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/datasets/packaged_modules/generator
+2026-04-10 13:39:42 - INFO - datasets.info - Loading Dataset Infos from /home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/datasets/packaged_modules/generator
+Generating dataset generator (/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-1519231937de8df3/0.0.0)
+2026-04-10 13:39:42 - INFO - datasets.builder - Generating dataset generator (/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-1519231937de8df3/0.0.0)
+Downloading and preparing dataset generator/default to /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-1519231937de8df3/0.0.0...
+2026-04-10 13:39:42 - INFO - datasets.builder - Downloading and preparing dataset generator/default to /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-1519231937de8df3/0.0.0...
+Generating train split
+2026-04-10 13:39:42 - INFO - datasets.builder - Generating train split
+Generating train split: 0 examples [00:00, ? examples/s]Generating train split: 1 examples [00:00,  1.72 examples/s]Generating train split: 711 examples [00:00, 1190.80 examples/s]Generating train split: 895 examples [00:00, 1099.18 examples/s]
+Unable to verify splits sizes.
+2026-04-10 13:39:42 - INFO - datasets.utils.info_utils - Unable to verify splits sizes.
+Dataset generator downloaded and prepared to /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-1519231937de8df3/0.0.0. Subsequent calls will reuse this data.
+2026-04-10 13:39:43 - INFO - datasets.builder - Dataset generator downloaded and prepared to /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets/generator/default-1519231937de8df3/0.0.0. Subsequent calls will reuse this data.
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:412: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `SFTTrainer.__init__`. Use `processing_class` instead.
+  super().__init__(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:412: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `SFTTrainer.__init__`. Use `processing_class` instead.
+  super().__init__(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:412: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `SFTTrainer.__init__`. Use `processing_class` instead.
+  super().__init__(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:412: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `SFTTrainer.__init__`. Use `processing_class` instead.
+  super().__init__(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:412: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `SFTTrainer.__init__`. Use `processing_class` instead.
+  super().__init__(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:412: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `SFTTrainer.__init__`. Use `processing_class` instead.
+  super().__init__(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:412: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `SFTTrainer.__init__`. Use `processing_class` instead.
+  super().__init__(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/trl/trainer/sft_trainer.py:412: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `SFTTrainer.__init__`. Use `processing_class` instead.
+  super().__init__(
+[INFO|trainer.py:748] 2026-04-10 13:39:44,030 >> Using auto half precision backend
+2026-04-10 13:39:44 - INFO - __main__ - *** Train ***
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight.
+  warnings.warn(
+/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints.
+  warnings.warn(
+[INFO|trainer.py:2414] 2026-04-10 13:39:48,781 >> ***** Running training *****
+[INFO|trainer.py:2415] 2026-04-10 13:39:48,781 >>   Num examples = 16,516
+[INFO|trainer.py:2416] 2026-04-10 13:39:48,781 >>   Num Epochs = 1
+[INFO|trainer.py:2417] 2026-04-10 13:39:48,781 >>   Instantaneous batch size per device = 16
+[INFO|trainer.py:2420] 2026-04-10 13:39:48,781 >>   Total train batch size (w. parallel, distributed & accumulation) = 128
+[INFO|trainer.py:2421] 2026-04-10 13:39:48,781 >>   Gradient Accumulation steps = 1
+[INFO|trainer.py:2422] 2026-04-10 13:39:48,781 >>   Total optimization steps = 130
+[INFO|trainer.py:2423] 2026-04-10 13:39:48,781 >>   Number of trainable parameters = 1,003,782,656
+[INFO|integration_utils.py:831] 2026-04-10 13:39:48,782 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
+wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin
+wandb: wandb version 0.25.1 is available!  To upgrade, please run:
+wandb:  $ pip install wandb --upgrade
+wandb: Tracking run with wandb version 0.17.5
+wandb: Run data is saved locally in /scratch/feng.yulu/dynamic-dpo-v4/wandb/wandb/run-20260410_133950-cqdvywmp
+wandb: Run `wandb offline` to turn off syncing.
+wandb: Syncing run llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758
+wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/huggingface
+wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/huggingface/runs/cqdvywmp
+  0%|          | 0/130 [00:00<?, ?it/s]  1%|          | 1/130 [00:01<03:05,  1.44s/it]                                               {'loss': 3.1196, 'grad_norm': inf, 'learning_rate': 0.0, 'epoch': 0.01}
+  1%|          | 1/130 [00:01<03:05,  1.44s/it]  2%|▏         | 2/130 [00:02<02:44,  1.29s/it]  2%|▏         | 3/130 [00:03<02:35,  1.23s/it]  3%|▎         | 4/130 [00:04<02:30,  1.20s/it]  4%|▍         | 5/130 [00:06<02:27,  1.18s/it]                                               {'loss': 3.1223, 'grad_norm': inf, 'learning_rate': 6.153846153846155e-06, 'epoch': 0.04}
+  4%|▍         | 5/130 [00:06<02:27,  1.18s/it]  5%|▍         | 6/130 [00:07<02:25,  1.17s/it]  5%|▌         | 7/130 [00:08<02:23,  1.16s/it]  6%|▌         | 8/130 [00:09<02:21,  1.16s/it]  7%|▋         | 9/130 [00:10<02:19,  1.16s/it]  8%|▊         | 10/130 [00:11<02:18,  1.15s/it]                                                {'loss': 3.1246, 'grad_norm': inf, 'learning_rate': 1.3846153846153847e-05, 'epoch': 0.08}
+  8%|▊         | 10/130 [00:11<02:18,  1.15s/it]  8%|▊         | 11/130 [00:12<02:16,  1.15s/it]  9%|▉         | 12/130 [00:14<02:15,  1.15s/it] 10%|█         | 13/130 [00:15<02:14,  1.15s/it] 11%|█         | 14/130 [00:16<02:13,  1.15s/it] 12%|█▏        | 15/130 [00:17<02:12,  1.15s/it]                                                {'loss': 3.1264, 'grad_norm': inf, 'learning_rate': 1.9996395276708856e-05, 'epoch': 0.12}
+ 12%|█▏        | 15/130 [00:17<02:12,  1.15s/it] 12%|█▏        | 16/130 [00:18<02:11,  1.15s/it] 13%|█▎        | 17/130 [00:19<02:09,  1.15s/it] 14%|█▍        | 18/130 [00:21<02:08,  1.15s/it] 15%|█▍        | 19/130 [00:22<02:07,  1.15s/it] 15%|█▌        | 20/130 [00:23<02:06,  1.15s/it]                                                {'loss': 2.9981, 'grad_norm': 35.01002883911133, 'learning_rate': 1.9870502626379127e-05, 'epoch': 0.15}
+ 15%|█▌        | 20/130 [00:23<02:06,  1.15s/it] 16%|█▌        | 21/130 [00:24<02:05,  1.15s/it] 17%|█▋        | 22/130 [00:25<02:04,  1.15s/it] 18%|█▊        | 23/130 [00:26<02:03,  1.15s/it] 18%|█▊        | 24/130 [00:27<02:02,  1.15s/it] 19%|█▉        | 25/130 [00:29<02:00,  1.15s/it]                                                {'loss': 2.531, 'grad_norm': 11.861149787902832, 'learning_rate': 1.9566964208274254e-05, 'epoch': 0.19}
+ 19%|█▉        | 25/130 [00:29<02:00,  1.15s/it] 20%|██        | 26/130 [00:30<01:59,  1.15s/it] 21%|██        | 27/130 [00:31<01:58,  1.15s/it] 22%|██▏       | 28/130 [00:32<01:57,  1.15s/it] 22%|██▏       | 29/130 [00:33<01:56,  1.15s/it] 23%|██▎       | 30/130 [00:34<01:55,  1.15s/it]                                                {'loss': 2.2014, 'grad_norm': 7.360829830169678, 'learning_rate': 1.909124299802724e-05, 'epoch': 0.23}
+ 23%|██▎       | 30/130 [00:34<01:55,  1.15s/it] 24%|██▍       | 31/130 [00:35<01:53,  1.15s/it] 25%|██▍       | 32/130 [00:37<01:52,  1.15s/it] 25%|██▌       | 33/130 [00:38<01:51,  1.15s/it] 26%|██▌       | 34/130 [00:39<01:50,  1.15s/it] 27%|██▋       | 35/130 [00:40<01:49,  1.15s/it]                                                {'loss': 2.0137, 'grad_norm': 6.061845779418945, 'learning_rate': 1.845190085543795e-05, 'epoch': 0.27}
+ 27%|██▋       | 35/130 [00:40<01:49,  1.15s/it] 28%|██▊       | 36/130 [00:41<01:53,  1.20s/it] 28%|██▊       | 37/130 [00:43<01:50,  1.19s/it] 29%|██▉       | 38/130 [00:44<01:48,  1.18s/it] 30%|███       | 39/130 [00:45<01:46,  1.17s/it] 31%|███       | 40/130 [00:46<01:45,  1.17s/it]                                                {'loss': 1.8515, 'grad_norm': 3.25486421585083, 'learning_rate': 1.766044443118978e-05, 'epoch': 0.31}
+ 31%|███       | 40/130 [00:46<01:45,  1.17s/it] 32%|███▏      | 41/130 [00:47<01:43,  1.17s/it] 32%|███▏      | 42/130 [00:48<01:42,  1.16s/it] 33%|███▎      | 43/130 [00:50<01:41,  1.16s/it] 34%|███▍      | 44/130 [00:51<01:39,  1.16s/it] 35%|███▍      | 45/130 [00:52<01:38,  1.16s/it]                                                {'loss': 1.7909, 'grad_norm': 2.8577518463134766, 'learning_rate': 1.67311180742757e-05, 'epoch': 0.35}
+ 35%|███▍      | 45/130 [00:52<01:38,  1.16s/it] 35%|███▌      | 46/130 [00:53<01:37,  1.16s/it] 36%|███▌      | 47/130 [00:54<01:36,  1.16s/it] 37%|███▋      | 48/130 [00:55<01:35,  1.16s/it] 38%|███▊      | 49/130 [00:56<01:33,  1.16s/it] 38%|███▊      | 50/130 [00:58<01:32,  1.16s/it]                                                {'loss': 1.7104, 'grad_norm': 3.2024171352386475, 'learning_rate': 1.568064746731156e-05, 'epoch': 0.38}
+ 38%|███▊      | 50/130 [00:58<01:32,  1.16s/it] 39%|███▉      | 51/130 [00:59<01:31,  1.16s/it] 40%|████      | 52/130 [01:00<01:30,  1.16s/it] 41%|████      | 53/130 [01:01<01:29,  1.16s/it] 42%|████▏     | 54/130 [01:02<01:31,  1.20s/it] 42%|████▏     | 55/130 [01:04<01:29,  1.19s/it]                                                {'loss': 1.6393, 'grad_norm': 2.368898630142212, 'learning_rate': 1.4527938603696376e-05, 'epoch': 0.42}
+ 42%|████▏     | 55/130 [01:04<01:29,  1.19s/it] 43%|████▎     | 56/130 [01:05<01:27,  1.18s/it] 44%|████▍     | 57/130 [01:06<01:25,  1.17s/it] 45%|████▍     | 58/130 [01:07<01:24,  1.17s/it] 45%|████▌     | 59/130 [01:08<01:22,  1.17s/it] 46%|████▌     | 60/130 [01:09<01:21,  1.17s/it]                                                {'loss': 1.5979, 'grad_norm': 2.016225814819336, 'learning_rate': 1.3293737524320798e-05, 'epoch': 0.46}
+ 46%|████▌     | 60/130 [01:09<01:21,  1.17s/it] 47%|████▋     | 61/130 [01:11<01:20,  1.16s/it] 48%|████▊     | 62/130 [01:12<01:19,  1.16s/it] 48%|████▊     | 63/130 [01:13<01:17,  1.16s/it] 49%|████▉     | 64/130 [01:14<01:16,  1.16s/it] 50%|█████     | 65/130 [01:15<01:15,  1.16s/it]                                                {'loss': 1.5492, 'grad_norm': 2.3714423179626465, 'learning_rate': 1.2000256937760446e-05, 'epoch': 0.5}
+ 50%|█████     | 65/130 [01:15<01:15,  1.16s/it] 51%|█████     | 66/130 [01:16<01:14,  1.16s/it] 52%|█████▏    | 67/130 [01:17<01:13,  1.16s/it] 52%|█████▏    | 68/130 [01:19<01:14,  1.21s/it] 53%|█████▎    | 69/130 [01:20<01:12,  1.19s/it] 54%|█████▍    | 70/130 [01:21<01:10,  1.18s/it]                                                {'loss': 1.5275, 'grad_norm': 2.853393793106079, 'learning_rate': 1.0670776443910024e-05, 'epoch': 0.54}
+ 54%|█████▍    | 70/130 [01:21<01:10,  1.18s/it] 55%|█████▍    | 71/130 [01:22<01:09,  1.18s/it] 55%|█████▌    | 72/130 [01:23<01:07,  1.17s/it] 56%|█████▌    | 73/130 [01:25<01:06,  1.17s/it] 57%|█████▋    | 74/130 [01:26<01:05,  1.17s/it] 58%|█████▊    | 75/130 [01:27<01:04,  1.16s/it]                                                {'loss': 1.4923, 'grad_norm': 1.7769665718078613, 'learning_rate': 9.329223556089976e-06, 'epoch': 0.58}
+ 58%|█████▊    | 75/130 [01:27<01:04,  1.16s/it] 58%|█████▊    | 76/130 [01:28<01:02,  1.16s/it] 59%|█████▉    | 77/130 [01:29<01:01,  1.16s/it] 60%|██████    | 78/130 [01:30<01:00,  1.16s/it] 61%|██████    | 79/130 [01:32<00:59,  1.16s/it] 62%|██████▏   | 80/130 [01:33<00:58,  1.16s/it]                                                {'loss': 1.4681, 'grad_norm': 2.4375193119049072, 'learning_rate': 7.999743062239557e-06, 'epoch': 0.62}
+ 62%|██████▏   | 80/130 [01:33<00:58,  1.16s/it] 62%|██████▏   | 81/130 [01:34<00:56,  1.16s/it] 63%|██████▎   | 82/130 [01:35<00:55,  1.16s/it] 64%|██████▍   | 83/130 [01:36<00:54,  1.16s/it] 65%|██████▍   | 84/130 [01:37<00:53,  1.16s/it] 65%|██████▌   | 85/130 [01:39<00:52,  1.16s/it]                                                {'loss': 1.4528, 'grad_norm': 2.583644151687622, 'learning_rate': 6.706262475679205e-06, 'epoch': 0.65}
+ 65%|██████▌   | 85/130 [01:39<00:52,  1.16s/it] 66%|██████▌   | 86/130 [01:40<00:51,  1.16s/it] 67%|██████▋   | 87/130 [01:41<00:49,  1.16s/it] 68%|██████▊   | 88/130 [01:42<00:48,  1.16s/it] 68%|██████▊   | 89/130 [01:43<00:47,  1.16s/it] 69%|██████▉   | 90/130 [01:44<00:46,  1.16s/it]                                                {'loss': 1.4174, 'grad_norm': 2.249284267425537, 'learning_rate': 5.47206139630363e-06, 'epoch': 0.69}
+ 69%|██████▉   | 90/130 [01:44<00:46,  1.16s/it] 70%|███████   | 91/130 [01:46<00:45,  1.16s/it] 71%|███████   | 92/130 [01:47<00:44,  1.16s/it] 72%|███████▏  | 93/130 [01:48<00:42,  1.16s/it] 72%|███████▏  | 94/130 [01:49<00:41,  1.16s/it] 73%|███████▎  | 95/130 [01:50<00:40,  1.16s/it]                                                {'loss': 1.4044, 'grad_norm': 2.0346739292144775, 'learning_rate': 4.319352532688444e-06, 'epoch': 0.73}
+ 73%|███████▎  | 95/130 [01:50<00:40,  1.16s/it] 74%|███████▍  | 96/130 [01:51<00:40,  1.20s/it] 75%|███████▍  | 97/130 [01:53<00:39,  1.19s/it] 75%|███████▌  | 98/130 [01:54<00:37,  1.18s/it] 76%|███████▌  | 99/130 [01:55<00:36,  1.17s/it] 77%|███████▋  | 100/130 [01:56<00:35,  1.17s/it]                                                 {'loss': 1.3924, 'grad_norm': 1.5525047779083252, 'learning_rate': 3.2688819257242963e-06, 'epoch': 0.77}
+ 77%|███████▋  | 100/130 [01:56<00:35,  1.17s/it][INFO|trainer.py:4307] 2026-04-10 13:41:49,893 >> 
+***** Running Evaluation *****
+[INFO|trainer.py:4309] 2026-04-10 13:41:49,893 >>   Num examples = 895
+[INFO|trainer.py:4312] 2026-04-10 13:41:49,893 >>   Batch size = 16
+
+  0%|          | 0/7 [00:00<?, ?it/s][A
+ 29%|██▊       | 2/7 [00:00<00:00,  6.67it/s][A
+ 43%|████▎     | 3/7 [00:00<00:00,  4.60it/s][A
+ 57%|█████▋    | 4/7 [00:00<00:00,  3.94it/s][A
+ 71%|███████▏  | 5/7 [00:01<00:00,  3.70it/s][A
+ 86%|████████▌ | 6/7 [00:01<00:00,  3.55it/s][A
+100%|██████████| 7/7 [00:01<00:00,  3.45it/s][A                                                 
+                                             [A{'eval_loss': 1.3881758451461792, 'eval_runtime': 2.1626, 'eval_samples_per_second': 413.847, 'eval_steps_per_second': 3.237, 'epoch': 0.77}
+ 77%|███████▋  | 100/130 [01:58<00:35,  1.17s/it]
+100%|██████████| 7/7 [00:01<00:00,  3.45it/s][A
+                                             [A 78%|███████▊  | 101/130 [02:00<00:53,  1.86s/it] 78%|███████▊  | 102/130 [02:01<00:46,  1.65s/it] 79%|███████▉  | 103/130 [02:02<00:40,  1.50s/it] 80%|████████  | 104/130 [02:03<00:36,  1.40s/it] 81%|████████  | 105/130 [02:04<00:33,  1.33s/it]                                                 {'loss': 1.38, 'grad_norm': 1.5717474222183228, 'learning_rate': 2.339555568810221e-06, 'epoch': 0.81}
+ 81%|████████  | 105/130 [02:04<00:33,  1.33s/it] 82%|████████▏ | 106/130 [02:05<00:30,  1.28s/it] 82%|████████▏ | 107/130 [02:06<00:28,  1.24s/it] 83%|████████▎ | 108/130 [02:08<00:26,  1.22s/it] 84%|████████▍ | 109/130 [02:09<00:25,  1.20s/it] 85%|████████▍ | 110/130 [02:10<00:23,  1.19s/it]                                                 {'loss': 1.375, 'grad_norm': 1.4260509014129639, 'learning_rate': 1.5480991445620541e-06, 'epoch': 0.85}
+ 85%|████████▍ | 110/130 [02:10<00:23,  1.19s/it] 85%|████████▌ | 111/130 [02:11<00:22,  1.18s/it] 86%|████████▌ | 112/130 [02:12<00:21,  1.17s/it] 87%|████████▋ | 113/130 [02:13<00:19,  1.17s/it] 88%|████████▊ | 114/130 [02:15<00:18,  1.17s/it] 88%|████████▊ | 115/130 [02:16<00:17,  1.16s/it]                                                 {'loss': 1.3491, 'grad_norm': 1.3539237976074219, 'learning_rate': 9.08757001972762e-07, 'epoch': 0.88}
+ 88%|████████▊ | 115/130 [02:16<00:17,  1.16s/it] 89%|████████▉ | 116/130 [02:17<00:16,  1.16s/it] 90%|█████████ | 117/130 [02:18<00:15,  1.16s/it] 91%|█████████ | 118/130 [02:19<00:13,  1.16s/it] 92%|█████████▏| 119/130 [02:20<00:12,  1.16s/it] 92%|█████████▏| 120/130 [02:22<00:12,  1.20s/it]                                                 {'loss': 1.3533, 'grad_norm': 1.4586122035980225, 'learning_rate': 4.3303579172574884e-07, 'epoch': 0.92}
+ 92%|█████████▏| 120/130 [02:22<00:12,  1.20s/it] 93%|█████████▎| 121/130 [02:23<00:10,  1.19s/it] 94%|█████████▍| 122/130 [02:24<00:09,  1.18s/it] 95%|█████████▍| 123/130 [02:25<00:08,  1.17s/it] 95%|█████████▌| 124/130 [02:26<00:07,  1.17s/it] 96%|█████████▌| 125/130 [02:27<00:05,  1.17s/it]                                                 {'loss': 1.3503, 'grad_norm': 1.3738539218902588, 'learning_rate': 1.2949737362087156e-07, 'epoch': 0.96}
+ 96%|█████████▌| 125/130 [02:28<00:05,  1.17s/it] 97%|█████████▋| 126/130 [02:29<00:04,  1.16s/it] 98%|█████████▊| 127/130 [02:30<00:03,  1.16s/it] 98%|█████████▊| 128/130 [02:31<00:02,  1.16s/it] 99%|█████████▉| 129/130 [02:32<00:01,  1.16s/it]100%|██████████| 130/130 [02:33<00:00,  1.16s/it]                                                 {'loss': 1.3395, 'grad_norm': 1.3676426410675049, 'learning_rate': 3.6047232911462506e-09, 'epoch': 1.0}
+100%|██████████| 130/130 [02:33<00:00,  1.16s/it][INFO|trainer.py:3984] 2026-04-10 13:42:42,665 >> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/checkpoint-130
+[INFO|configuration_utils.py:419] 2026-04-10 13:42:42,671 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/checkpoint-130/config.json
+[INFO|configuration_utils.py:911] 2026-04-10 13:42:42,675 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/checkpoint-130/generation_config.json
+[INFO|modeling_utils.py:3580] 2026-04-10 13:43:25,431 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/checkpoint-130/model.safetensors.index.json.
+[INFO|tokenization_utils_base.py:2510] 2026-04-10 13:43:25,448 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/checkpoint-130/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2519] 2026-04-10 13:43:25,451 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/checkpoint-130/special_tokens_map.json
+[INFO|trainer.py:2681] 2026-04-10 13:46:32,794 >> 
+
+Training completed. Do not forget to share your model on huggingface.co/models =)
+
+
+                                                 {'train_runtime': 404.0129, 'train_samples_per_second': 40.88, 'train_steps_per_second': 0.322, 'train_loss': 1.8291644793290358, 'epoch': 1.0}
+100%|██████████| 130/130 [06:39<00:00,  1.16s/it]100%|██████████| 130/130 [06:39<00:00,  3.07s/it]
+***** train metrics *****
+  epoch                    =        1.0
+  total_flos               = 44661265GF
+  train_loss               =     1.8292
+  train_runtime            = 0:06:44.01
+  train_samples            =      43598
+  train_samples_per_second =      40.88
+  train_steps_per_second   =      0.322
+2026-04-10 13:46:32 - INFO - __main__ - *** Save model ***
+[INFO|configuration_utils.py:419] 2026-04-10 13:46:51,065 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/config.json
+[INFO|configuration_utils.py:911] 2026-04-10 13:46:51,069 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/generation_config.json
+[INFO|modeling_utils.py:3580] 2026-04-10 13:47:37,204 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/model.safetensors.index.json.
+[INFO|tokenization_utils_base.py:2510] 2026-04-10 13:47:37,213 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2519] 2026-04-10 13:47:37,217 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/special_tokens_map.json
+2026-04-10 13:47:37 - INFO - __main__ - Saved HF-compatible model artifacts to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758
+2026-04-10 13:47:37 - INFO - __main__ - Saved validated HF-compatible model artifacts to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758
+[INFO|modelcard.py:450] 2026-04-10 13:47:37,519 >> Dropping the following result as it does not have all the necessary fields:
+{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf', 'config': 'default', 'split': 'train', 'args': 'default'}}
+[INFO|configuration_utils.py:419] 2026-04-10 13:47:37,528 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/config.json
+2026-04-10 13:47:37 - INFO - __main__ - *** Evaluate ***
+[INFO|trainer.py:4307] 2026-04-10 13:47:37,531 >> 
+***** Running Evaluation *****
+[INFO|trainer.py:4309] 2026-04-10 13:47:37,531 >>   Num examples = 895
+[INFO|trainer.py:4312] 2026-04-10 13:47:37,531 >>   Batch size = 16
+  0%|          | 0/7 [00:00<?, ?it/s] 29%|██▊       | 2/7 [00:00<00:00,  6.84it/s] 43%|████▎     | 3/7 [00:00<00:00,  4.83it/s] 57%|█████▋    | 4/7 [00:00<00:00,  4.03it/s] 71%|███████▏  | 5/7 [00:01<00:00,  3.79it/s] 86%|████████▌ | 6/7 [00:01<00:00,  3.65it/s]100%|██████████| 7/7 [00:01<00:00,  3.53it/s]100%|██████████| 7/7 [00:01<00:00,  3.84it/s]
+***** eval metrics *****
+  epoch                   =        1.0
+  eval_loss               =     1.3573
+  eval_runtime            = 0:00:02.10
+  eval_samples            =       2339
+  eval_samples_per_second =    424.776
+  eval_steps_per_second   =      3.322
+2026-04-10 13:47:39 - INFO - __main__ - *** Training complete ***
+wandb: - 0.014 MB of 0.014 MB uploadedwandb: \ 0.014 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: \ 0.037 MB of 0.037 MB uploadedwandb: | 0.037 MB of 0.037 MB uploadedwandb: / 0.037 MB of 0.037 MB uploadedwandb: - 0.037 MB of 0.037 MB uploadedwandb: 
+wandb: Run history:
+wandb:               eval/loss █▁
+wandb:            eval/runtime █▁
+wandb: eval/samples_per_second ▁█
+wandb:   eval/steps_per_second ▁█
+wandb:             train/epoch ▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
+wandb:       train/global_step ▁▁▁▂▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▆▇▇▇▇████
+wandb:         train/grad_norm     █▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
+wandb:     train/learning_rate ▁▃▆████▇▇▇▆▆▆▅▅▄▄▃▃▃▂▂▂▁▁▁▁
+wandb:              train/loss ████▇▆▄▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
+wandb: 
+wandb: Run summary:
+wandb:                eval/loss 1.35726
+wandb:             eval/runtime 2.107
+wandb:  eval/samples_per_second 424.776
+wandb:    eval/steps_per_second 3.322
+wandb:               total_flos 4.795466914988032e+16
+wandb:              train/epoch 1.0
+wandb:        train/global_step 130
+wandb:          train/grad_norm 1.36764
+wandb:      train/learning_rate 0.0
+wandb:               train/loss 1.3395
+wandb:               train_loss 1.82916
+wandb:            train_runtime 404.0129
+wandb: train_samples_per_second 40.88
+wandb:   train_steps_per_second 0.322
+wandb: 
+wandb: 🚀 View run llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758 at: https://wandb.ai/can-not-fand-northeastern-university/huggingface/runs/cqdvywmp
+wandb: ⭐️ View project at: https://wandb.ai/can-not-fand-northeastern-university/huggingface
+wandb: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
+wandb: Find logs at: /scratch/feng.yulu/dynamic-dpo-v4/wandb/wandb/run-20260410_133950-cqdvywmp/logs
+wandb: WARNING The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require("core")`! See https://wandb.me/wandb-core for more information.
diff --git a/train_results.json b/train_results.json
new file mode 100644
index 0000000..e15f03b
--- /dev/null
+++ b/train_results.json
@@ -0,0 +1,9 @@
+{
+    "epoch": 1.0,
+    "total_flos": 4.795466914988032e+16,
+    "train_loss": 1.8291644793290358,
+    "train_runtime": 404.0129,
+    "train_samples": 43598,
+    "train_samples_per_second": 40.88,
+    "train_steps_per_second": 0.322
+}
\ No newline at end of file
diff --git a/trainer_state.json b/trainer_state.json
new file mode 100644
index 0000000..8fc08fc
--- /dev/null
+++ b/trainer_state.json
@@ -0,0 +1,240 @@
+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 100,
+  "global_step": 130,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.007692307692307693,
+      "grad_norm": Infinity,
+      "learning_rate": 0.0,
+      "loss": 3.1196,
+      "step": 1
+    },
+    {
+      "epoch": 0.038461538461538464,
+      "grad_norm": Infinity,
+      "learning_rate": 6.153846153846155e-06,
+      "loss": 3.1223,
+      "step": 5
+    },
+    {
+      "epoch": 0.07692307692307693,
+      "grad_norm": Infinity,
+      "learning_rate": 1.3846153846153847e-05,
+      "loss": 3.1246,
+      "step": 10
+    },
+    {
+      "epoch": 0.11538461538461539,
+      "grad_norm": Infinity,
+      "learning_rate": 1.9996395276708856e-05,
+      "loss": 3.1264,
+      "step": 15
+    },
+    {
+      "epoch": 0.15384615384615385,
+      "grad_norm": 35.01002883911133,
+      "learning_rate": 1.9870502626379127e-05,
+      "loss": 2.9981,
+      "step": 20
+    },
+    {
+      "epoch": 0.19230769230769232,
+      "grad_norm": 11.861149787902832,
+      "learning_rate": 1.9566964208274254e-05,
+      "loss": 2.531,
+      "step": 25
+    },
+    {
+      "epoch": 0.23076923076923078,
+      "grad_norm": 7.360829830169678,
+      "learning_rate": 1.909124299802724e-05,
+      "loss": 2.2014,
+      "step": 30
+    },
+    {
+      "epoch": 0.2692307692307692,
+      "grad_norm": 6.061845779418945,
+      "learning_rate": 1.845190085543795e-05,
+      "loss": 2.0137,
+      "step": 35
+    },
+    {
+      "epoch": 0.3076923076923077,
+      "grad_norm": 3.25486421585083,
+      "learning_rate": 1.766044443118978e-05,
+      "loss": 1.8515,
+      "step": 40
+    },
+    {
+      "epoch": 0.34615384615384615,
+      "grad_norm": 2.8577518463134766,
+      "learning_rate": 1.67311180742757e-05,
+      "loss": 1.7909,
+      "step": 45
+    },
+    {
+      "epoch": 0.38461538461538464,
+      "grad_norm": 3.2024171352386475,
+      "learning_rate": 1.568064746731156e-05,
+      "loss": 1.7104,
+      "step": 50
+    },
+    {
+      "epoch": 0.4230769230769231,
+      "grad_norm": 2.368898630142212,
+      "learning_rate": 1.4527938603696376e-05,
+      "loss": 1.6393,
+      "step": 55
+    },
+    {
+      "epoch": 0.46153846153846156,
+      "grad_norm": 2.016225814819336,
+      "learning_rate": 1.3293737524320798e-05,
+      "loss": 1.5979,
+      "step": 60
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 2.3714423179626465,
+      "learning_rate": 1.2000256937760446e-05,
+      "loss": 1.5492,
+      "step": 65
+    },
+    {
+      "epoch": 0.5384615384615384,
+      "grad_norm": 2.853393793106079,
+      "learning_rate": 1.0670776443910024e-05,
+      "loss": 1.5275,
+      "step": 70
+    },
+    {
+      "epoch": 0.5769230769230769,
+      "grad_norm": 1.7769665718078613,
+      "learning_rate": 9.329223556089976e-06,
+      "loss": 1.4923,
+      "step": 75
+    },
+    {
+      "epoch": 0.6153846153846154,
+      "grad_norm": 2.4375193119049072,
+      "learning_rate": 7.999743062239557e-06,
+      "loss": 1.4681,
+      "step": 80
+    },
+    {
+      "epoch": 0.6538461538461539,
+      "grad_norm": 2.583644151687622,
+      "learning_rate": 6.706262475679205e-06,
+      "loss": 1.4528,
+      "step": 85
+    },
+    {
+      "epoch": 0.6923076923076923,
+      "grad_norm": 2.249284267425537,
+      "learning_rate": 5.47206139630363e-06,
+      "loss": 1.4174,
+      "step": 90
+    },
+    {
+      "epoch": 0.7307692307692307,
+      "grad_norm": 2.0346739292144775,
+      "learning_rate": 4.319352532688444e-06,
+      "loss": 1.4044,
+      "step": 95
+    },
+    {
+      "epoch": 0.7692307692307693,
+      "grad_norm": 1.5525047779083252,
+      "learning_rate": 3.2688819257242963e-06,
+      "loss": 1.3924,
+      "step": 100
+    },
+    {
+      "epoch": 0.7692307692307693,
+      "eval_loss": 1.3881758451461792,
+      "eval_runtime": 2.1626,
+      "eval_samples_per_second": 413.847,
+      "eval_steps_per_second": 3.237,
+      "step": 100
+    },
+    {
+      "epoch": 0.8076923076923077,
+      "grad_norm": 1.5717474222183228,
+      "learning_rate": 2.339555568810221e-06,
+      "loss": 1.38,
+      "step": 105
+    },
+    {
+      "epoch": 0.8461538461538461,
+      "grad_norm": 1.4260509014129639,
+      "learning_rate": 1.5480991445620541e-06,
+      "loss": 1.375,
+      "step": 110
+    },
+    {
+      "epoch": 0.8846153846153846,
+      "grad_norm": 1.3539237976074219,
+      "learning_rate": 9.08757001972762e-07,
+      "loss": 1.3491,
+      "step": 115
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 1.4586122035980225,
+      "learning_rate": 4.3303579172574884e-07,
+      "loss": 1.3533,
+      "step": 120
+    },
+    {
+      "epoch": 0.9615384615384616,
+      "grad_norm": 1.3738539218902588,
+      "learning_rate": 1.2949737362087156e-07,
+      "loss": 1.3503,
+      "step": 125
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 1.3676426410675049,
+      "learning_rate": 3.6047232911462506e-09,
+      "loss": 1.3395,
+      "step": 130
+    },
+    {
+      "epoch": 1.0,
+      "step": 130,
+      "total_flos": 4.795466914988032e+16,
+      "train_loss": 1.8291644793290358,
+      "train_runtime": 404.0129,
+      "train_samples_per_second": 40.88,
+      "train_steps_per_second": 0.322
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 130,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 200,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.795466914988032e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}