From 1e65d1513dc5b802e6f4611add4d45cc3ac77a53 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 5 May 2026 20:48:43 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: W-61/llama3-8b-base-new-method-s_star0.6-20260425-180936 Source: Original Platform --- .gitattributes | 36 + README.md | 77 + all_results.json | 23 + config.json | 29 + eval_results.json | 17 + generation_config.json | 9 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 +++++++ train_results.json | 9 + trainer_state.json | 9140 ++++++++++++++++++++++++++++++ 19 files changed, 11749 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..3470b9c --- /dev/null +++ b/README.md @@ -0,0 +1,77 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-ultrachat-8xh200 +tags: +- alignment-handbook +- new-dpo +- generated_from_trainer +datasets: +- HuggingFaceH4/ultrafeedback_binarized +model-index: +- name: llama3-8b-base-new-method-s_star0.6-20260425-180936 + results: [] +--- + + + +# llama3-8b-base-new-method-s_star0.6-20260425-180936 + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-ultrachat-8xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-ultrachat-8xh200) on the HuggingFaceH4/ultrafeedback_binarized dataset. +It achieves the following results on the evaluation set: +- Loss: 0.5352 +- Fcm Dpo/beta: 0.0110 +- Margin Dpo/margin Mean: 54.2375 +- Margin Dpo/margin Std: 83.0790 +- Logps/chosen: -383.9891 +- Logps/rejected: -417.3312 +- Logps/ref Chosen: -287.8268 +- Logps/ref Rejected: -266.9314 +- Logits/chosen: -0.8407 +- Logits/rejected: -0.8346 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 4 +- eval_batch_size: 4 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 8 +- total_train_batch_size: 128 +- total_eval_batch_size: 16 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Fcm Dpo/beta | Margin Dpo/margin Mean | Margin Dpo/margin Std | Logps/chosen | Logps/rejected | Logps/ref Chosen | Logps/ref Rejected | Logits/chosen | Logits/rejected | +|:-------------:|:------:|:----:|:---------------:|:------------:|:----------------------:|:---------------------:|:------------:|:--------------:|:----------------:|:------------------:|:-------------:|:---------------:| +| 4.6087 | 0.4188 | 200 | 0.5497 | 0.0189 | 29.5673 | 48.3802 | -320.8928 | -329.5647 | -287.8268 | -266.9314 | -0.8679 | -0.8610 | +| 4.3167 | 0.8377 | 400 | 0.5352 | 0.0110 | 54.2375 | 83.0790 | -383.9891 | -417.3312 | -287.8268 | -266.9314 | -0.8407 | -0.8346 | + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..1e4ab2d --- /dev/null +++ b/all_results.json @@ -0,0 +1,23 @@ +{ + "epoch": 0.9989528795811519, + "eval_fcm_dpo/beta": 0.009593765251338482, + "eval_logits/chosen": -0.8410288691520691, + "eval_logits/rejected": -0.8335962891578674, + "eval_logps/chosen": -391.4317626953125, + "eval_logps/ref_chosen": -287.8267517089844, + "eval_logps/ref_rejected": -266.9313659667969, + "eval_logps/rejected": -426.3018493652344, + "eval_loss": 0.5413669347763062, + "eval_margin_dpo/margin_mean": 55.76554870605469, + "eval_margin_dpo/margin_std": 85.69520568847656, + "eval_runtime": 81.3525, + "eval_samples": 2000, + "eval_samples_per_second": 24.584, + "eval_steps_per_second": 1.537, + "total_flos": 0.0, + "train_loss": 4.542374380479568, + "train_runtime": 6039.2377, + "train_samples": 61135, + "train_samples_per_second": 10.123, + "train_steps_per_second": 0.079 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..ed17075 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,17 @@ +{ + "epoch": 0.9989528795811519, + "eval_fcm_dpo/beta": 0.009593765251338482, + "eval_logits/chosen": -0.8410288691520691, + "eval_logits/rejected": -0.8335962891578674, + "eval_logps/chosen": -391.4317626953125, + "eval_logps/ref_chosen": -287.8267517089844, + "eval_logps/ref_rejected": -266.9313659667969, + "eval_logps/rejected": -426.3018493652344, + "eval_loss": 0.5413669347763062, + "eval_margin_dpo/margin_mean": 55.76554870605469, + "eval_margin_dpo/margin_std": 85.69520568847656, + "eval_runtime": 81.3525, + "eval_samples": 2000, + "eval_samples_per_second": 24.584, + "eval_steps_per_second": 1.537 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..7a62eb9 --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b262ca047d3472c5d7b45d3617020375d76b2034a99875cb6fb10f181a40213 +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..9a52824 --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf999594df0c3c73ecffb69cdc7d9fa3264a7c06f4e21803df4f6b3527bdbd0 +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..4e7ae89 --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc3f291dce4d889c051c33ddff4a498fcfddb54c0c94c0f2b8b18958e0ed00aa +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..29c324f --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1f86afefa44a3d9dc477738bc54ee5047560e461731aac7fdc8bc8f2d9ba63d +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..12ce241 --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8f2be715499d06fb62fc75ae898e4bbbf4c347e13d5bc09847e3e1374f9fced +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..45d9e66 --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79fe6a74adda3d12be13ce8c4f26ee3d1602b3a968cfbf7c1e2afd5ced932113 +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..5db20a2 --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b13772a11cfc24ef8c96fa75e7137e802adf2cd0d07d37d7fcbb73ff30ac2923 +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..d834a17 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 0.9989528795811519, + "total_flos": 0.0, + "train_loss": 4.542374380479568, + "train_runtime": 6039.2377, + "train_samples": 61135, + "train_samples_per_second": 10.123, + "train_steps_per_second": 0.079 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..034e15b --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,9140 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9989528795811519, + "eval_steps": 200, + "global_step": 477, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0020942408376963353, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.02793481945991516, + "fcm_dpo/q_t": 0.500069797039032, + "grad_norm": 28.592390060424805, + "learning_rate": 0.0, + "logits/chosen": -0.5898098945617676, + "logits/rejected": -0.604260265827179, + "logps/chosen": -275.28570556640625, + "logps/ref_chosen": -275.2312927246094, + "logps/ref_rejected": -222.9380340576172, + "logps/rejected": -222.96453857421875, + "loss": 5.5463, + "margin_dpo/margin_mean": -0.02793477475643158, + "margin_dpo/margin_std": 0.5724214911460876, + "step": 1 + }, + { + "epoch": 0.004188481675392671, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.014312177896499634, + "fcm_dpo/q_t": 0.4999642074108124, + "grad_norm": 27.881120681762695, + "learning_rate": 1.0416666666666666e-08, + "logits/chosen": -0.6574729681015015, + "logits/rejected": -0.6464410424232483, + "logps/chosen": -264.7165222167969, + "logps/ref_chosen": -264.7611083984375, + "logps/ref_rejected": -242.5597686767578, + "logps/rejected": -242.52951049804688, + "loss": 5.5446, + "margin_dpo/margin_mean": 0.014312252402305603, + "margin_dpo/margin_std": 0.6423971652984619, + "step": 2 + }, + { + "epoch": 0.0062827225130890054, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.029146358370780945, + "fcm_dpo/q_t": 0.4999271333217621, + "grad_norm": 25.850038528442383, + "learning_rate": 2.083333333333333e-08, + "logits/chosen": -0.6840031743049622, + "logits/rejected": -0.7351865172386169, + "logps/chosen": -274.1335754394531, + "logps/ref_chosen": -274.1018981933594, + "logps/ref_rejected": -286.5882568359375, + "logps/rejected": -286.64910888671875, + "loss": 5.5441, + "margin_dpo/margin_mean": 0.02914564311504364, + "margin_dpo/margin_std": 0.7203992605209351, + "step": 3 + }, + { + "epoch": 0.008376963350785341, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.10328760743141174, + "fcm_dpo/q_t": 0.499741792678833, + "grad_norm": 31.70708656311035, + "learning_rate": 3.125e-08, + "logits/chosen": -0.6172086000442505, + "logits/rejected": -0.6114800572395325, + "logps/chosen": -329.83612060546875, + "logps/ref_chosen": -329.8382568359375, + "logps/ref_rejected": -303.2850646972656, + "logps/rejected": -303.3861999511719, + "loss": 5.5411, + "margin_dpo/margin_mean": 0.10328748822212219, + "margin_dpo/margin_std": 0.8034393787384033, + "step": 4 + }, + { + "epoch": 0.010471204188481676, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.01281556487083435, + "fcm_dpo/q_t": 0.4999679923057556, + "grad_norm": 29.54966163635254, + "learning_rate": 4.166666666666666e-08, + "logits/chosen": -0.5715648531913757, + "logits/rejected": -0.587770938873291, + "logps/chosen": -301.7329406738281, + "logps/ref_chosen": -301.7389221191406, + "logps/ref_rejected": -274.7654724121094, + "logps/rejected": -274.77227783203125, + "loss": 5.5447, + "margin_dpo/margin_mean": 0.012814819812774658, + "margin_dpo/margin_std": 0.8004137277603149, + "step": 5 + }, + { + "epoch": 0.012565445026178011, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.05144025385379791, + "fcm_dpo/q_t": 0.4998714029788971, + "grad_norm": 28.23720932006836, + "learning_rate": 5.208333333333333e-08, + "logits/chosen": -0.6801129579544067, + "logits/rejected": -0.6429607272148132, + "logps/chosen": -285.62481689453125, + "logps/ref_chosen": -285.6946716308594, + "logps/ref_rejected": -245.8200225830078, + "logps/rejected": -245.80160522460938, + "loss": 5.5432, + "margin_dpo/margin_mean": 0.051440998911857605, + "margin_dpo/margin_std": 0.691977858543396, + "step": 6 + }, + { + "epoch": 0.014659685863874346, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.057578980922698975, + "fcm_dpo/q_t": 0.5001440048217773, + "grad_norm": 28.544734954833984, + "learning_rate": 6.25e-08, + "logits/chosen": -0.5832664966583252, + "logits/rejected": -0.6165621280670166, + "logps/chosen": -264.64544677734375, + "logps/ref_chosen": -264.65545654296875, + "logps/ref_rejected": -253.10305786132812, + "logps/rejected": -253.03549194335938, + "loss": 5.5475, + "margin_dpo/margin_mean": -0.05757877230644226, + "margin_dpo/margin_std": 0.6711597442626953, + "step": 7 + }, + { + "epoch": 0.016753926701570682, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.09753617644309998, + "fcm_dpo/q_t": 0.4997561573982239, + "grad_norm": 30.755247116088867, + "learning_rate": 7.291666666666667e-08, + "logits/chosen": -0.6714497804641724, + "logits/rejected": -0.6773282885551453, + "logps/chosen": -354.1408996582031, + "logps/ref_chosen": -354.1887512207031, + "logps/ref_rejected": -282.9112243652344, + "logps/rejected": -282.96087646484375, + "loss": 5.5413, + "margin_dpo/margin_mean": 0.097537100315094, + "margin_dpo/margin_std": 0.7466810345649719, + "step": 8 + }, + { + "epoch": 0.018848167539267015, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.026623502373695374, + "fcm_dpo/q_t": 0.5000665783882141, + "grad_norm": 27.906946182250977, + "learning_rate": 8.333333333333333e-08, + "logits/chosen": -0.6355319023132324, + "logits/rejected": -0.6535608768463135, + "logps/chosen": -285.5481872558594, + "logps/ref_chosen": -285.5502014160156, + "logps/ref_rejected": -267.99664306640625, + "logps/rejected": -267.9679870605469, + "loss": 5.5463, + "margin_dpo/margin_mean": -0.0266236811876297, + "margin_dpo/margin_std": 0.6391922831535339, + "step": 9 + }, + { + "epoch": 0.020942408376963352, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.023563116788864136, + "fcm_dpo/q_t": 0.4999410808086395, + "grad_norm": 26.76718521118164, + "learning_rate": 9.375e-08, + "logits/chosen": -0.6935949325561523, + "logits/rejected": -0.6888067722320557, + "logps/chosen": -251.90386962890625, + "logps/ref_chosen": -251.91238403320312, + "logps/ref_rejected": -226.45260620117188, + "logps/rejected": -226.46763610839844, + "loss": 5.5443, + "margin_dpo/margin_mean": 0.0235632061958313, + "margin_dpo/margin_std": 0.7389193177223206, + "step": 10 + }, + { + "epoch": 0.023036649214659685, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.03727307915687561, + "fcm_dpo/q_t": 0.5000931620597839, + "grad_norm": 28.944982528686523, + "learning_rate": 1.0416666666666667e-07, + "logits/chosen": -0.5943973660469055, + "logits/rejected": -0.6493593454360962, + "logps/chosen": -301.0625, + "logps/ref_chosen": -301.08343505859375, + "logps/ref_rejected": -259.546630859375, + "logps/rejected": -259.4883728027344, + "loss": 5.5467, + "margin_dpo/margin_mean": -0.037272870540618896, + "margin_dpo/margin_std": 0.7176087498664856, + "step": 11 + }, + { + "epoch": 0.025130890052356022, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.10183002054691315, + "fcm_dpo/q_t": 0.4997454583644867, + "grad_norm": 30.07319450378418, + "learning_rate": 1.1458333333333332e-07, + "logits/chosen": -0.5795747637748718, + "logits/rejected": -0.5394208431243896, + "logps/chosen": -287.56854248046875, + "logps/ref_chosen": -287.548095703125, + "logps/ref_rejected": -277.37945556640625, + "logps/rejected": -277.5017395019531, + "loss": 5.5412, + "margin_dpo/margin_mean": 0.10182976722717285, + "margin_dpo/margin_std": 0.6723535060882568, + "step": 12 + }, + { + "epoch": 0.027225130890052355, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.15790200233459473, + "fcm_dpo/q_t": 0.4996052384376526, + "grad_norm": 27.2218017578125, + "learning_rate": 1.25e-07, + "logits/chosen": -0.6672236323356628, + "logits/rejected": -0.6754846572875977, + "logps/chosen": -270.6041564941406, + "logps/ref_chosen": -270.6664123535156, + "logps/ref_rejected": -274.6546936035156, + "logps/rejected": -274.7503662109375, + "loss": 5.5389, + "margin_dpo/margin_mean": 0.1579025536775589, + "margin_dpo/margin_std": 0.6908207535743713, + "step": 13 + }, + { + "epoch": 0.02931937172774869, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.01676630973815918, + "fcm_dpo/q_t": 0.4999580979347229, + "grad_norm": 28.227462768554688, + "learning_rate": 1.3541666666666666e-07, + "logits/chosen": -0.623089611530304, + "logits/rejected": -0.6518293619155884, + "logps/chosen": -281.58538818359375, + "logps/ref_chosen": -281.59320068359375, + "logps/ref_rejected": -263.52215576171875, + "logps/rejected": -263.53106689453125, + "loss": 5.5446, + "margin_dpo/margin_mean": 0.016765296459197998, + "margin_dpo/margin_std": 0.6453270316123962, + "step": 14 + }, + { + "epoch": 0.031413612565445025, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.08115784823894501, + "fcm_dpo/q_t": 0.49979710578918457, + "grad_norm": 30.37261390686035, + "learning_rate": 1.4583333333333335e-07, + "logits/chosen": -0.6461591720581055, + "logits/rejected": -0.6596108675003052, + "logps/chosen": -298.36016845703125, + "logps/ref_chosen": -298.45343017578125, + "logps/ref_rejected": -227.17832946777344, + "logps/rejected": -227.16622924804688, + "loss": 5.542, + "margin_dpo/margin_mean": 0.08115695416927338, + "margin_dpo/margin_std": 0.6280770301818848, + "step": 15 + }, + { + "epoch": 0.033507853403141365, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.1199236512184143, + "fcm_dpo/q_t": 0.4997002184391022, + "grad_norm": 30.1366024017334, + "learning_rate": 1.5624999999999999e-07, + "logits/chosen": -0.6011725068092346, + "logits/rejected": -0.5981835722923279, + "logps/chosen": -293.8862609863281, + "logps/ref_chosen": -293.96661376953125, + "logps/ref_rejected": -250.78443908691406, + "logps/rejected": -250.82400512695312, + "loss": 5.5404, + "margin_dpo/margin_mean": 0.11992333829402924, + "margin_dpo/margin_std": 0.720985472202301, + "step": 16 + }, + { + "epoch": 0.0356020942408377, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.06940680742263794, + "fcm_dpo/q_t": 0.49982649087905884, + "grad_norm": 27.645227432250977, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.558211624622345, + "logits/rejected": -0.5835133194923401, + "logps/chosen": -262.30767822265625, + "logps/ref_chosen": -262.39398193359375, + "logps/ref_rejected": -248.500244140625, + "logps/rejected": -248.48330688476562, + "loss": 5.5424, + "margin_dpo/margin_mean": 0.06940683722496033, + "margin_dpo/margin_std": 0.6322791576385498, + "step": 17 + }, + { + "epoch": 0.03769633507853403, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.00013044476509094238, + "fcm_dpo/q_t": 0.5000002980232239, + "grad_norm": 29.71380043029785, + "learning_rate": 1.7708333333333334e-07, + "logits/chosen": -0.612942636013031, + "logits/rejected": -0.619144856929779, + "logps/chosen": -293.71783447265625, + "logps/ref_chosen": -293.709228515625, + "logps/ref_rejected": -274.5875244140625, + "logps/rejected": -274.5960388183594, + "loss": 5.5453, + "margin_dpo/margin_mean": -0.00012956559658050537, + "margin_dpo/margin_std": 0.7896002531051636, + "step": 18 + }, + { + "epoch": 0.039790575916230364, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.02526429295539856, + "fcm_dpo/q_t": 0.4999368190765381, + "grad_norm": 28.150474548339844, + "learning_rate": 1.875e-07, + "logits/chosen": -0.6289379000663757, + "logits/rejected": -0.6254291534423828, + "logps/chosen": -280.205322265625, + "logps/ref_chosen": -280.26568603515625, + "logps/ref_rejected": -259.9742736816406, + "logps/rejected": -259.93914794921875, + "loss": 5.5442, + "margin_dpo/margin_mean": 0.025263652205467224, + "margin_dpo/margin_std": 0.7644654512405396, + "step": 19 + }, + { + "epoch": 0.041884816753926704, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.10411535203456879, + "fcm_dpo/q_t": 0.49973970651626587, + "grad_norm": 29.686153411865234, + "learning_rate": 1.9791666666666664e-07, + "logits/chosen": -0.622660219669342, + "logits/rejected": -0.6548238396644592, + "logps/chosen": -303.71466064453125, + "logps/ref_chosen": -303.8954162597656, + "logps/ref_rejected": -260.214599609375, + "logps/rejected": -260.13800048828125, + "loss": 5.5411, + "margin_dpo/margin_mean": 0.10411512851715088, + "margin_dpo/margin_std": 0.766339898109436, + "step": 20 + }, + { + "epoch": 0.04397905759162304, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.04293261468410492, + "fcm_dpo/q_t": 0.4998926520347595, + "grad_norm": 35.22480392456055, + "learning_rate": 2.0833333333333333e-07, + "logits/chosen": -0.6408384442329407, + "logits/rejected": -0.6681733131408691, + "logps/chosen": -301.4923095703125, + "logps/ref_chosen": -301.5334777832031, + "logps/ref_rejected": -280.28900146484375, + "logps/rejected": -280.2907409667969, + "loss": 5.5435, + "margin_dpo/margin_mean": 0.04293195903301239, + "margin_dpo/margin_std": 0.8277014493942261, + "step": 21 + }, + { + "epoch": 0.04607329842931937, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.030557870864868164, + "fcm_dpo/q_t": 0.500076413154602, + "grad_norm": 25.291522979736328, + "learning_rate": 2.1875e-07, + "logits/chosen": -0.6586352586746216, + "logits/rejected": -0.6604381799697876, + "logps/chosen": -259.9430236816406, + "logps/ref_chosen": -259.9951477050781, + "logps/ref_rejected": -243.0721435546875, + "logps/rejected": -242.98948669433594, + "loss": 5.5465, + "margin_dpo/margin_mean": -0.030558019876480103, + "margin_dpo/margin_std": 0.7162632346153259, + "step": 22 + }, + { + "epoch": 0.048167539267015703, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.09941744804382324, + "fcm_dpo/q_t": 0.4997514486312866, + "grad_norm": 27.887392044067383, + "learning_rate": 2.2916666666666663e-07, + "logits/chosen": -0.6176055669784546, + "logits/rejected": -0.6513772010803223, + "logps/chosen": -282.0886535644531, + "logps/ref_chosen": -282.1807556152344, + "logps/ref_rejected": -265.0758056640625, + "logps/rejected": -265.0830993652344, + "loss": 5.5413, + "margin_dpo/margin_mean": 0.09941692650318146, + "margin_dpo/margin_std": 0.7664570808410645, + "step": 23 + }, + { + "epoch": 0.050261780104712044, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.24128368496894836, + "fcm_dpo/q_t": 0.49939680099487305, + "grad_norm": 29.68800163269043, + "learning_rate": 2.3958333333333335e-07, + "logits/chosen": -0.6591615676879883, + "logits/rejected": -0.5714296102523804, + "logps/chosen": -300.9408874511719, + "logps/ref_chosen": -301.17962646484375, + "logps/ref_rejected": -302.12786865234375, + "logps/rejected": -302.1304016113281, + "loss": 5.5356, + "margin_dpo/margin_mean": 0.24128423631191254, + "margin_dpo/margin_std": 0.7133185267448425, + "step": 24 + }, + { + "epoch": 0.05235602094240838, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.16697196662425995, + "fcm_dpo/q_t": 0.49958258867263794, + "grad_norm": 26.460615158081055, + "learning_rate": 2.5e-07, + "logits/chosen": -0.6041996479034424, + "logits/rejected": -0.6127534508705139, + "logps/chosen": -246.56582641601562, + "logps/ref_chosen": -246.74649047851562, + "logps/ref_rejected": -235.55638122558594, + "logps/rejected": -235.54269409179688, + "loss": 5.5386, + "margin_dpo/margin_mean": 0.16697131097316742, + "margin_dpo/margin_std": 0.7036500573158264, + "step": 25 + }, + { + "epoch": 0.05445026178010471, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.08371715247631073, + "fcm_dpo/q_t": 0.499790757894516, + "grad_norm": 28.732345581054688, + "learning_rate": 2.604166666666667e-07, + "logits/chosen": -0.6599952578544617, + "logits/rejected": -0.6750520467758179, + "logps/chosen": -281.9931335449219, + "logps/ref_chosen": -282.1955871582031, + "logps/ref_rejected": -235.3135528564453, + "logps/rejected": -235.19482421875, + "loss": 5.5419, + "margin_dpo/margin_mean": 0.08371736109256744, + "margin_dpo/margin_std": 0.8501687049865723, + "step": 26 + }, + { + "epoch": 0.05654450261780105, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.1340600550174713, + "fcm_dpo/q_t": 0.4996648132801056, + "grad_norm": 27.74333953857422, + "learning_rate": 2.708333333333333e-07, + "logits/chosen": -0.6518189907073975, + "logits/rejected": -0.6709730625152588, + "logps/chosen": -323.5457763671875, + "logps/ref_chosen": -323.8563537597656, + "logps/ref_rejected": -245.968017578125, + "logps/rejected": -245.7915496826172, + "loss": 5.5399, + "margin_dpo/margin_mean": 0.13406014442443848, + "margin_dpo/margin_std": 0.925900936126709, + "step": 27 + }, + { + "epoch": 0.05863874345549738, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.1435232013463974, + "fcm_dpo/q_t": 0.49964118003845215, + "grad_norm": 26.3175106048584, + "learning_rate": 2.8125e-07, + "logits/chosen": -0.622589111328125, + "logits/rejected": -0.6318536400794983, + "logps/chosen": -247.97296142578125, + "logps/ref_chosen": -248.24673461914062, + "logps/ref_rejected": -240.0382080078125, + "logps/rejected": -239.90797424316406, + "loss": 5.5395, + "margin_dpo/margin_mean": 0.1435234248638153, + "margin_dpo/margin_std": 0.7288922071456909, + "step": 28 + }, + { + "epoch": 0.060732984293193716, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.14979667961597443, + "fcm_dpo/q_t": 0.4996255040168762, + "grad_norm": 29.79783821105957, + "learning_rate": 2.916666666666667e-07, + "logits/chosen": -0.599511444568634, + "logits/rejected": -0.6228891015052795, + "logps/chosen": -317.9765319824219, + "logps/ref_chosen": -318.2564392089844, + "logps/ref_rejected": -286.75848388671875, + "logps/rejected": -286.62841796875, + "loss": 5.5393, + "margin_dpo/margin_mean": 0.1497972011566162, + "margin_dpo/margin_std": 0.8029959201812744, + "step": 29 + }, + { + "epoch": 0.06282722513089005, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.2501027584075928, + "fcm_dpo/q_t": 0.49937474727630615, + "grad_norm": 28.9178524017334, + "learning_rate": 3.020833333333333e-07, + "logits/chosen": -0.5949351191520691, + "logits/rejected": -0.6119610071182251, + "logps/chosen": -252.69354248046875, + "logps/ref_chosen": -253.0491485595703, + "logps/ref_rejected": -261.30029296875, + "logps/rejected": -261.19482421875, + "loss": 5.5353, + "margin_dpo/margin_mean": 0.25010228157043457, + "margin_dpo/margin_std": 0.975698709487915, + "step": 30 + }, + { + "epoch": 0.06492146596858639, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.2504439949989319, + "fcm_dpo/q_t": 0.4993739426136017, + "grad_norm": 25.57700538635254, + "learning_rate": 3.1249999999999997e-07, + "logits/chosen": -0.655745804309845, + "logits/rejected": -0.690646767616272, + "logps/chosen": -247.7589569091797, + "logps/ref_chosen": -248.15301513671875, + "logps/ref_rejected": -203.17703247070312, + "logps/rejected": -203.03338623046875, + "loss": 5.5353, + "margin_dpo/margin_mean": 0.25044363737106323, + "margin_dpo/margin_std": 1.0158027410507202, + "step": 31 + }, + { + "epoch": 0.06701570680628273, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.33064448833465576, + "fcm_dpo/q_t": 0.4991734027862549, + "grad_norm": 29.667165756225586, + "learning_rate": 3.2291666666666666e-07, + "logits/chosen": -0.6167346835136414, + "logits/rejected": -0.622105062007904, + "logps/chosen": -304.966796875, + "logps/ref_chosen": -305.5399475097656, + "logps/ref_rejected": -267.6527099609375, + "logps/rejected": -267.4101867675781, + "loss": 5.5321, + "margin_dpo/margin_mean": 0.3306446075439453, + "margin_dpo/margin_std": 0.9724135994911194, + "step": 32 + }, + { + "epoch": 0.06910994764397906, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.39138373732566833, + "fcm_dpo/q_t": 0.49902158975601196, + "grad_norm": 28.317340850830078, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.6441166400909424, + "logits/rejected": -0.6569768190383911, + "logps/chosen": -285.66668701171875, + "logps/ref_chosen": -286.2335205078125, + "logps/ref_rejected": -255.38748168945312, + "logps/rejected": -255.2120361328125, + "loss": 5.5297, + "margin_dpo/margin_mean": 0.39138340950012207, + "margin_dpo/margin_std": 1.0941178798675537, + "step": 33 + }, + { + "epoch": 0.0712041884816754, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.8573173880577087, + "fcm_dpo/q_t": 0.4978567957878113, + "grad_norm": 31.08516502380371, + "learning_rate": 3.4375e-07, + "logits/chosen": -0.619086503982544, + "logits/rejected": -0.6280518770217896, + "logps/chosen": -340.7860107421875, + "logps/ref_chosen": -341.5920104980469, + "logps/ref_rejected": -278.8866882324219, + "logps/rejected": -278.93798828125, + "loss": 5.5111, + "margin_dpo/margin_mean": 0.8573174476623535, + "margin_dpo/margin_std": 1.1765947341918945, + "step": 34 + }, + { + "epoch": 0.07329842931937172, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.3640906810760498, + "fcm_dpo/q_t": 0.499089777469635, + "grad_norm": 26.597396850585938, + "learning_rate": 3.541666666666667e-07, + "logits/chosen": -0.6332607269287109, + "logits/rejected": -0.653661847114563, + "logps/chosen": -264.4308166503906, + "logps/ref_chosen": -265.0795593261719, + "logps/ref_rejected": -264.4876708984375, + "logps/rejected": -264.20306396484375, + "loss": 5.5308, + "margin_dpo/margin_mean": 0.3640906810760498, + "margin_dpo/margin_std": 1.240203619003296, + "step": 35 + }, + { + "epoch": 0.07539267015706806, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.6718266606330872, + "fcm_dpo/q_t": 0.4983205497264862, + "grad_norm": 31.872516632080078, + "learning_rate": 3.645833333333333e-07, + "logits/chosen": -0.6042373180389404, + "logits/rejected": -0.6219602823257446, + "logps/chosen": -296.499755859375, + "logps/ref_chosen": -297.3261413574219, + "logps/ref_rejected": -282.09515380859375, + "logps/rejected": -281.94061279296875, + "loss": 5.5186, + "margin_dpo/margin_mean": 0.6718263030052185, + "margin_dpo/margin_std": 1.3965107202529907, + "step": 36 + }, + { + "epoch": 0.0774869109947644, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.5291406512260437, + "fcm_dpo/q_t": 0.49867722392082214, + "grad_norm": 30.831968307495117, + "learning_rate": 3.75e-07, + "logits/chosen": -0.6052833795547485, + "logits/rejected": -0.6201093196868896, + "logps/chosen": -313.28765869140625, + "logps/ref_chosen": -314.0340270996094, + "logps/ref_rejected": -299.3437805175781, + "logps/rejected": -299.1265563964844, + "loss": 5.5243, + "margin_dpo/margin_mean": 0.5291397571563721, + "margin_dpo/margin_std": 1.5934827327728271, + "step": 37 + }, + { + "epoch": 0.07958115183246073, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.6104308366775513, + "fcm_dpo/q_t": 0.49847403168678284, + "grad_norm": 28.433164596557617, + "learning_rate": 3.8541666666666665e-07, + "logits/chosen": -0.640455424785614, + "logits/rejected": -0.6528275012969971, + "logps/chosen": -281.47015380859375, + "logps/ref_chosen": -282.54119873046875, + "logps/ref_rejected": -269.7773132324219, + "logps/rejected": -269.3166809082031, + "loss": 5.5211, + "margin_dpo/margin_mean": 0.610431969165802, + "margin_dpo/margin_std": 1.5897610187530518, + "step": 38 + }, + { + "epoch": 0.08167539267015707, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.2843832969665527, + "fcm_dpo/q_t": 0.49678951501846313, + "grad_norm": 29.44791603088379, + "learning_rate": 3.958333333333333e-07, + "logits/chosen": -0.6190811991691589, + "logits/rejected": -0.6332811713218689, + "logps/chosen": -275.41473388671875, + "logps/ref_chosen": -276.7729187011719, + "logps/ref_rejected": -249.95889282226562, + "logps/rejected": -249.88507080078125, + "loss": 5.4944, + "margin_dpo/margin_mean": 1.2843828201293945, + "margin_dpo/margin_std": 1.8405652046203613, + "step": 39 + }, + { + "epoch": 0.08376963350785341, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.8125598430633545, + "fcm_dpo/q_t": 0.49796897172927856, + "grad_norm": 27.380224227905273, + "learning_rate": 4.0625e-07, + "logits/chosen": -0.6235227584838867, + "logits/rejected": -0.6593804359436035, + "logps/chosen": -283.1932678222656, + "logps/ref_chosen": -284.30706787109375, + "logps/ref_rejected": -244.4459991455078, + "logps/rejected": -244.14476013183594, + "loss": 5.5131, + "margin_dpo/margin_mean": 0.8125599026679993, + "margin_dpo/margin_std": 1.823110580444336, + "step": 40 + }, + { + "epoch": 0.08586387434554973, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.8139923810958862, + "fcm_dpo/q_t": 0.4979651868343353, + "grad_norm": 30.188688278198242, + "learning_rate": 4.1666666666666667e-07, + "logits/chosen": -0.6231560707092285, + "logits/rejected": -0.6478135585784912, + "logps/chosen": -292.701171875, + "logps/ref_chosen": -293.8151550292969, + "logps/ref_rejected": -252.16815185546875, + "logps/rejected": -251.86814880371094, + "loss": 5.513, + "margin_dpo/margin_mean": 0.8139930963516235, + "margin_dpo/margin_std": 1.6932668685913086, + "step": 41 + }, + { + "epoch": 0.08795811518324607, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.8815785050392151, + "fcm_dpo/q_t": 0.49779632687568665, + "grad_norm": 27.49101448059082, + "learning_rate": 4.270833333333333e-07, + "logits/chosen": -0.6369996666908264, + "logits/rejected": -0.6549193859100342, + "logps/chosen": -251.56045532226562, + "logps/ref_chosen": -252.76023864746094, + "logps/ref_rejected": -261.0414733886719, + "logps/rejected": -260.7232666015625, + "loss": 5.5106, + "margin_dpo/margin_mean": 0.881578803062439, + "margin_dpo/margin_std": 2.2027502059936523, + "step": 42 + }, + { + "epoch": 0.09005235602094241, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.269668459892273, + "fcm_dpo/q_t": 0.4968262314796448, + "grad_norm": 29.89678955078125, + "learning_rate": 4.375e-07, + "logits/chosen": -0.5942052602767944, + "logits/rejected": -0.6096649169921875, + "logps/chosen": -315.5320129394531, + "logps/ref_chosen": -316.8347473144531, + "logps/ref_rejected": -273.7649230957031, + "logps/rejected": -273.73187255859375, + "loss": 5.4951, + "margin_dpo/margin_mean": 1.2696670293807983, + "margin_dpo/margin_std": 2.1477150917053223, + "step": 43 + }, + { + "epoch": 0.09214659685863874, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.6501388549804688, + "fcm_dpo/q_t": 0.49587562680244446, + "grad_norm": 30.845321655273438, + "learning_rate": 4.479166666666667e-07, + "logits/chosen": -0.5972121953964233, + "logits/rejected": -0.5959709882736206, + "logps/chosen": -285.3184509277344, + "logps/ref_chosen": -286.8757019042969, + "logps/ref_rejected": -282.4681396484375, + "logps/rejected": -282.5610656738281, + "loss": 5.4804, + "margin_dpo/margin_mean": 1.6501388549804688, + "margin_dpo/margin_std": 2.9746947288513184, + "step": 44 + }, + { + "epoch": 0.09424083769633508, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.1927553415298462, + "fcm_dpo/q_t": 0.4970191717147827, + "grad_norm": 28.909330368041992, + "learning_rate": 4.5833333333333327e-07, + "logits/chosen": -0.6916259527206421, + "logits/rejected": -0.715716540813446, + "logps/chosen": -322.6328125, + "logps/ref_chosen": -324.2633972167969, + "logps/ref_rejected": -293.09466552734375, + "logps/rejected": -292.6568298339844, + "loss": 5.4985, + "margin_dpo/margin_mean": 1.192754864692688, + "margin_dpo/margin_std": 2.8390185832977295, + "step": 45 + }, + { + "epoch": 0.09633507853403141, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.5148533582687378, + "fcm_dpo/q_t": 0.4962137043476105, + "grad_norm": 30.279727935791016, + "learning_rate": 4.6874999999999996e-07, + "logits/chosen": -0.6287131309509277, + "logits/rejected": -0.6423863768577576, + "logps/chosen": -296.6163635253906, + "logps/ref_chosen": -298.3357238769531, + "logps/ref_rejected": -267.66204833984375, + "logps/rejected": -267.45751953125, + "loss": 5.4855, + "margin_dpo/margin_mean": 1.5148537158966064, + "margin_dpo/margin_std": 2.574115753173828, + "step": 46 + }, + { + "epoch": 0.09842931937172775, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 0.9817731976509094, + "fcm_dpo/q_t": 0.49754610657691956, + "grad_norm": 26.394506454467773, + "learning_rate": 4.791666666666667e-07, + "logits/chosen": -0.590155303478241, + "logits/rejected": -0.6099727153778076, + "logps/chosen": -261.077392578125, + "logps/ref_chosen": -262.5669250488281, + "logps/ref_rejected": -258.70989990234375, + "logps/rejected": -258.20208740234375, + "loss": 5.5072, + "margin_dpo/margin_mean": 0.9817725419998169, + "margin_dpo/margin_std": 3.2907633781433105, + "step": 47 + }, + { + "epoch": 0.10052356020942409, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.5850274562835693, + "fcm_dpo/q_t": 0.4960397481918335, + "grad_norm": 27.51393699645996, + "learning_rate": 4.895833333333333e-07, + "logits/chosen": -0.625287652015686, + "logits/rejected": -0.6512780785560608, + "logps/chosen": -267.6490173339844, + "logps/ref_chosen": -269.4932556152344, + "logps/ref_rejected": -241.888916015625, + "logps/rejected": -241.6297149658203, + "loss": 5.4831, + "margin_dpo/margin_mean": 1.5850276947021484, + "margin_dpo/margin_std": 3.051654100418091, + "step": 48 + }, + { + "epoch": 0.10261780104712041, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.7378376722335815, + "fcm_dpo/q_t": 0.4956568479537964, + "grad_norm": 27.689477920532227, + "learning_rate": 5e-07, + "logits/chosen": -0.6677048802375793, + "logits/rejected": -0.6521282196044922, + "logps/chosen": -255.6532745361328, + "logps/ref_chosen": -257.8844909667969, + "logps/ref_rejected": -256.8912048339844, + "logps/rejected": -256.3978271484375, + "loss": 5.4772, + "margin_dpo/margin_mean": 1.7378380298614502, + "margin_dpo/margin_std": 3.387692928314209, + "step": 49 + }, + { + "epoch": 0.10471204188481675, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.1869100332260132, + "fcm_dpo/q_t": 0.49703431129455566, + "grad_norm": 27.907745361328125, + "learning_rate": 4.999932966293553e-07, + "logits/chosen": -0.6318182945251465, + "logits/rejected": -0.6537318229675293, + "logps/chosen": -299.6126403808594, + "logps/ref_chosen": -301.62884521484375, + "logps/ref_rejected": -298.2716064453125, + "logps/rejected": -297.4422912597656, + "loss": 5.4995, + "margin_dpo/margin_mean": 1.1869091987609863, + "margin_dpo/margin_std": 3.856821298599243, + "step": 50 + }, + { + "epoch": 0.1068062827225131, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.0278406143188477, + "fcm_dpo/q_t": 0.49493393301963806, + "grad_norm": 29.1589298248291, + "learning_rate": 4.999731868769026e-07, + "logits/chosen": -0.60748690366745, + "logits/rejected": -0.6017611026763916, + "logps/chosen": -267.2236022949219, + "logps/ref_chosen": -269.37237548828125, + "logps/ref_rejected": -297.0167541503906, + "logps/rejected": -296.89581298828125, + "loss": 5.4671, + "margin_dpo/margin_mean": 2.0278408527374268, + "margin_dpo/margin_std": 4.648531436920166, + "step": 51 + }, + { + "epoch": 0.10890052356020942, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.078564167022705, + "fcm_dpo/q_t": 0.4923100769519806, + "grad_norm": 30.302101135253906, + "learning_rate": 4.99939671821067e-07, + "logits/chosen": -0.6508050560951233, + "logits/rejected": -0.6571372151374817, + "logps/chosen": -304.177978515625, + "logps/ref_chosen": -306.9028015136719, + "logps/ref_rejected": -281.24737548828125, + "logps/rejected": -281.6011047363281, + "loss": 5.4254, + "margin_dpo/margin_mean": 3.078564167022705, + "margin_dpo/margin_std": 4.690369606018066, + "step": 52 + }, + { + "epoch": 0.11099476439790576, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.22479248046875, + "fcm_dpo/q_t": 0.4944427013397217, + "grad_norm": 31.287256240844727, + "learning_rate": 4.998927532591591e-07, + "logits/chosen": -0.6520200371742249, + "logits/rejected": -0.6926702260971069, + "logps/chosen": -283.11590576171875, + "logps/ref_chosen": -285.9759521484375, + "logps/ref_rejected": -273.9073486328125, + "logps/rejected": -273.2720947265625, + "loss": 5.4597, + "margin_dpo/margin_mean": 2.22479248046875, + "margin_dpo/margin_std": 5.078397274017334, + "step": 53 + }, + { + "epoch": 0.1130890052356021, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.866455316543579, + "fcm_dpo/q_t": 0.49533912539482117, + "grad_norm": 26.340913772583008, + "learning_rate": 4.998324337072792e-07, + "logits/chosen": -0.68650883436203, + "logits/rejected": -0.6943265795707703, + "logps/chosen": -303.7992858886719, + "logps/ref_chosen": -306.504638671875, + "logps/ref_rejected": -272.67431640625, + "logps/rejected": -271.8354187011719, + "loss": 5.4741, + "margin_dpo/margin_mean": 1.866454839706421, + "margin_dpo/margin_std": 5.443723678588867, + "step": 54 + }, + { + "epoch": 0.11518324607329843, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.2475740909576416, + "fcm_dpo/q_t": 0.494386225938797, + "grad_norm": 24.877641677856445, + "learning_rate": 4.997587164001815e-07, + "logits/chosen": -0.6414747834205627, + "logits/rejected": -0.6457206606864929, + "logps/chosen": -220.563720703125, + "logps/ref_chosen": -222.33013916015625, + "logps/ref_rejected": -206.59571838378906, + "logps/rejected": -207.07687377929688, + "loss": 5.4583, + "margin_dpo/margin_mean": 2.2475738525390625, + "margin_dpo/margin_std": 4.910269737243652, + "step": 55 + }, + { + "epoch": 0.11727748691099477, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.1674323081970215, + "fcm_dpo/q_t": 0.49209311604499817, + "grad_norm": 27.445068359375, + "learning_rate": 4.996716052911017e-07, + "logits/chosen": -0.6101264357566833, + "logits/rejected": -0.6247260570526123, + "logps/chosen": -247.60667419433594, + "logps/ref_chosen": -250.47816467285156, + "logps/ref_rejected": -228.25848388671875, + "logps/rejected": -228.55442810058594, + "loss": 5.4237, + "margin_dpo/margin_mean": 3.167431354522705, + "margin_dpo/margin_std": 5.867389678955078, + "step": 56 + }, + { + "epoch": 0.1193717277486911, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 4.167306423187256, + "fcm_dpo/q_t": 0.48959389328956604, + "grad_norm": 30.918928146362305, + "learning_rate": 4.99571105051544e-07, + "logits/chosen": -0.7010935544967651, + "logits/rejected": -0.6720656156539917, + "logps/chosen": -311.27130126953125, + "logps/ref_chosen": -315.1195373535156, + "logps/ref_rejected": -272.755615234375, + "logps/rejected": -273.0746765136719, + "loss": 5.3836, + "margin_dpo/margin_mean": 4.167305946350098, + "margin_dpo/margin_std": 5.622750759124756, + "step": 57 + }, + { + "epoch": 0.12146596858638743, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 2.323967933654785, + "fcm_dpo/q_t": 0.49419358372688293, + "grad_norm": 27.555803298950195, + "learning_rate": 4.994572210710314e-07, + "logits/chosen": -0.6160457730293274, + "logits/rejected": -0.6402078866958618, + "logps/chosen": -262.7194519042969, + "logps/ref_chosen": -265.1816711425781, + "logps/ref_rejected": -268.2203369140625, + "logps/rejected": -268.0820617675781, + "loss": 5.4563, + "margin_dpo/margin_mean": 2.323967933654785, + "margin_dpo/margin_std": 5.637367248535156, + "step": 58 + }, + { + "epoch": 0.12356020942408377, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 1.5584101676940918, + "fcm_dpo/q_t": 0.4961104989051819, + "grad_norm": 29.654539108276367, + "learning_rate": 4.993299594568162e-07, + "logits/chosen": -0.5985250473022461, + "logits/rejected": -0.5907694697380066, + "logps/chosen": -284.25274658203125, + "logps/ref_chosen": -286.35394287109375, + "logps/ref_rejected": -260.6757507324219, + "logps/rejected": -260.1329345703125, + "loss": 5.4889, + "margin_dpo/margin_mean": 1.5584099292755127, + "margin_dpo/margin_std": 7.0632781982421875, + "step": 59 + }, + { + "epoch": 0.1256544502617801, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.4080166816711426, + "fcm_dpo/q_t": 0.4914897680282593, + "grad_norm": 28.067386627197266, + "learning_rate": 4.991893270335525e-07, + "logits/chosen": -0.6754345297813416, + "logits/rejected": -0.699802577495575, + "logps/chosen": -255.8909912109375, + "logps/ref_chosen": -258.74859619140625, + "logps/ref_rejected": -255.04893493652344, + "logps/rejected": -255.59933471679688, + "loss": 5.4155, + "margin_dpo/margin_mean": 3.4080190658569336, + "margin_dpo/margin_std": 7.094330787658691, + "step": 60 + }, + { + "epoch": 0.12774869109947645, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.202296257019043, + "fcm_dpo/q_t": 0.49200791120529175, + "grad_norm": 29.841184616088867, + "learning_rate": 4.990353313429303e-07, + "logits/chosen": -0.6437735557556152, + "logits/rejected": -0.6602544784545898, + "logps/chosen": -275.47747802734375, + "logps/ref_chosen": -278.4678955078125, + "logps/ref_rejected": -252.02720642089844, + "logps/rejected": -252.23904418945312, + "loss": 5.4247, + "margin_dpo/margin_mean": 3.202296257019043, + "margin_dpo/margin_std": 7.591219425201416, + "step": 61 + }, + { + "epoch": 0.12984293193717278, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.3970751762390137, + "fcm_dpo/q_t": 0.4915270209312439, + "grad_norm": 26.655916213989258, + "learning_rate": 4.988679806432711e-07, + "logits/chosen": -0.6097227334976196, + "logits/rejected": -0.6514406800270081, + "logps/chosen": -268.9474182128906, + "logps/ref_chosen": -272.92431640625, + "logps/ref_rejected": -260.7935485839844, + "logps/rejected": -260.2137145996094, + "loss": 5.4163, + "margin_dpo/margin_mean": 3.39707612991333, + "margin_dpo/margin_std": 7.437541961669922, + "step": 62 + }, + { + "epoch": 0.1319371727748691, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.588202476501465, + "fcm_dpo/q_t": 0.4910445213317871, + "grad_norm": 28.286346435546875, + "learning_rate": 4.986872839090852e-07, + "logits/chosen": -0.6595807671546936, + "logits/rejected": -0.6659517288208008, + "logps/chosen": -273.69244384765625, + "logps/ref_chosen": -277.0889892578125, + "logps/ref_rejected": -273.3413391113281, + "logps/rejected": -273.532958984375, + "loss": 5.4086, + "margin_dpo/margin_mean": 3.588200807571411, + "margin_dpo/margin_std": 7.262460708618164, + "step": 63 + }, + { + "epoch": 0.13403141361256546, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 4.24724006652832, + "fcm_dpo/q_t": 0.48940467834472656, + "grad_norm": 28.308141708374023, + "learning_rate": 4.9849325083059e-07, + "logits/chosen": -0.628346860408783, + "logits/rejected": -0.6231892704963684, + "logps/chosen": -279.7200927734375, + "logps/ref_chosen": -283.8244934082031, + "logps/ref_rejected": -263.29351806640625, + "logps/rejected": -263.4363708496094, + "loss": 5.3847, + "margin_dpo/margin_mean": 4.24724006652832, + "margin_dpo/margin_std": 8.381464958190918, + "step": 64 + }, + { + "epoch": 0.13612565445026178, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.2838730812072754, + "fcm_dpo/q_t": 0.49180322885513306, + "grad_norm": 27.914520263671875, + "learning_rate": 4.982858918131906e-07, + "logits/chosen": -0.6988512277603149, + "logits/rejected": -0.6669014692306519, + "logps/chosen": -261.4900817871094, + "logps/ref_chosen": -264.8699645996094, + "logps/ref_rejected": -268.5076904296875, + "logps/rejected": -268.4117431640625, + "loss": 5.4206, + "margin_dpo/margin_mean": 3.2838728427886963, + "margin_dpo/margin_std": 7.35109281539917, + "step": 65 + }, + { + "epoch": 0.1382198952879581, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 4.0454559326171875, + "fcm_dpo/q_t": 0.48991909623146057, + "grad_norm": 27.6617488861084, + "learning_rate": 4.980652179769217e-07, + "logits/chosen": -0.6764880418777466, + "logits/rejected": -0.696363091468811, + "logps/chosen": -269.8972473144531, + "logps/ref_chosen": -272.9283142089844, + "logps/ref_rejected": -280.94696044921875, + "logps/rejected": -281.96136474609375, + "loss": 5.3953, + "margin_dpo/margin_mean": 4.045454978942871, + "margin_dpo/margin_std": 9.999269485473633, + "step": 66 + }, + { + "epoch": 0.14031413612565444, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 3.675020694732666, + "fcm_dpo/q_t": 0.4908318519592285, + "grad_norm": 25.536836624145508, + "learning_rate": 4.978312411558517e-07, + "logits/chosen": -0.6852215528488159, + "logits/rejected": -0.7170518636703491, + "logps/chosen": -262.1640319824219, + "logps/ref_chosen": -266.18695068359375, + "logps/ref_rejected": -250.17405700683594, + "logps/rejected": -249.82615661621094, + "loss": 5.4077, + "margin_dpo/margin_mean": 3.6750199794769287, + "margin_dpo/margin_std": 8.747812271118164, + "step": 67 + }, + { + "epoch": 0.1424083769633508, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 4.583087921142578, + "fcm_dpo/q_t": 0.48859941959381104, + "grad_norm": 28.211336135864258, + "learning_rate": 4.975839738974473e-07, + "logits/chosen": -0.6900507211685181, + "logits/rejected": -0.7039142847061157, + "logps/chosen": -294.9899597167969, + "logps/ref_chosen": -297.9385986328125, + "logps/ref_rejected": -261.5141296386719, + "logps/rejected": -263.14862060546875, + "loss": 5.3759, + "margin_dpo/margin_mean": 4.583088397979736, + "margin_dpo/margin_std": 10.535322189331055, + "step": 68 + }, + { + "epoch": 0.14450261780104712, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 6.044898986816406, + "fcm_dpo/q_t": 0.48493677377700806, + "grad_norm": 28.641454696655273, + "learning_rate": 4.97323429461901e-07, + "logits/chosen": -0.6796502470970154, + "logits/rejected": -0.7097989320755005, + "logps/chosen": -261.7384338378906, + "logps/ref_chosen": -265.6175231933594, + "logps/ref_rejected": -236.8287353515625, + "logps/rejected": -238.99456787109375, + "loss": 5.3173, + "margin_dpo/margin_mean": 6.044898509979248, + "margin_dpo/margin_std": 9.810757637023926, + "step": 69 + }, + { + "epoch": 0.14659685863874344, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 5.822492599487305, + "fcm_dpo/q_t": 0.4854995310306549, + "grad_norm": 28.677330017089844, + "learning_rate": 4.970496218214204e-07, + "logits/chosen": -0.6750044822692871, + "logits/rejected": -0.7083183526992798, + "logps/chosen": -291.96441650390625, + "logps/ref_chosen": -296.2259216308594, + "logps/ref_rejected": -254.68496704101562, + "logps/rejected": -256.2459411621094, + "loss": 5.3286, + "margin_dpo/margin_mean": 5.822491645812988, + "margin_dpo/margin_std": 10.934935569763184, + "step": 70 + }, + { + "epoch": 0.1486910994764398, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 5.134796142578125, + "fcm_dpo/q_t": 0.4872013032436371, + "grad_norm": 28.424619674682617, + "learning_rate": 4.967625656594781e-07, + "logits/chosen": -0.6523040533065796, + "logits/rejected": -0.6413918733596802, + "logps/chosen": -283.8147277832031, + "logps/ref_chosen": -288.92724609375, + "logps/ref_rejected": -278.6405334472656, + "logps/rejected": -278.662841796875, + "loss": 5.3592, + "margin_dpo/margin_mean": 5.134795188903809, + "margin_dpo/margin_std": 12.370285034179688, + "step": 71 + }, + { + "epoch": 0.15078534031413612, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 5.098928928375244, + "fcm_dpo/q_t": 0.4872835576534271, + "grad_norm": 28.138423919677734, + "learning_rate": 4.964622763700252e-07, + "logits/chosen": -0.6939007639884949, + "logits/rejected": -0.705129861831665, + "logps/chosen": -233.71646118164062, + "logps/ref_chosen": -237.0452880859375, + "logps/ref_rejected": -252.7946319580078, + "logps/rejected": -254.56471252441406, + "loss": 5.3564, + "margin_dpo/margin_mean": 5.098929405212402, + "margin_dpo/margin_std": 10.444880485534668, + "step": 72 + }, + { + "epoch": 0.15287958115183245, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 4.748826503753662, + "fcm_dpo/q_t": 0.48815372586250305, + "grad_norm": 27.864391326904297, + "learning_rate": 4.961487700566646e-07, + "logits/chosen": -0.659065306186676, + "logits/rejected": -0.6768229603767395, + "logps/chosen": -268.7459411621094, + "logps/ref_chosen": -273.0531005859375, + "logps/ref_rejected": -246.8330841064453, + "logps/rejected": -247.2747802734375, + "loss": 5.3737, + "margin_dpo/margin_mean": 4.748826503753662, + "margin_dpo/margin_std": 12.207172393798828, + "step": 73 + }, + { + "epoch": 0.1549738219895288, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 4.507737159729004, + "fcm_dpo/q_t": 0.48877474665641785, + "grad_norm": 30.305334091186523, + "learning_rate": 4.958220635317885e-07, + "logits/chosen": -0.7256600260734558, + "logits/rejected": -0.7039333581924438, + "logps/chosen": -338.9497985839844, + "logps/ref_chosen": -342.2818908691406, + "logps/ref_rejected": -330.0293884277344, + "logps/rejected": -331.2049865722656, + "loss": 5.3817, + "margin_dpo/margin_mean": 4.5077362060546875, + "margin_dpo/margin_std": 11.664762496948242, + "step": 74 + }, + { + "epoch": 0.15706806282722513, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 6.5859527587890625, + "fcm_dpo/q_t": 0.48358994722366333, + "grad_norm": 29.470287322998047, + "learning_rate": 4.954821743156767e-07, + "logits/chosen": -0.6431756615638733, + "logits/rejected": -0.6447348594665527, + "logps/chosen": -262.26544189453125, + "logps/ref_chosen": -266.8641662597656, + "logps/ref_rejected": -276.8699951171875, + "logps/rejected": -278.8572692871094, + "loss": 5.2993, + "margin_dpo/margin_mean": 6.585953712463379, + "margin_dpo/margin_std": 10.910937309265137, + "step": 75 + }, + { + "epoch": 0.15916230366492146, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 7.311924457550049, + "fcm_dpo/q_t": 0.4818291962146759, + "grad_norm": 29.281173706054688, + "learning_rate": 4.951291206355559e-07, + "logits/chosen": -0.7205427289009094, + "logits/rejected": -0.7283482551574707, + "logps/chosen": -277.0059814453125, + "logps/ref_chosen": -281.174560546875, + "logps/ref_rejected": -263.6067199707031, + "logps/rejected": -266.7500305175781, + "loss": 5.2755, + "margin_dpo/margin_mean": 7.311923980712891, + "margin_dpo/margin_std": 12.705620765686035, + "step": 76 + }, + { + "epoch": 0.1612565445026178, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 5.805636405944824, + "fcm_dpo/q_t": 0.4855879247188568, + "grad_norm": 33.04362106323242, + "learning_rate": 4.947629214246236e-07, + "logits/chosen": -0.5541229248046875, + "logits/rejected": -0.5619992017745972, + "logps/chosen": -302.3905944824219, + "logps/ref_chosen": -306.09527587890625, + "logps/ref_rejected": -253.49569702148438, + "logps/rejected": -255.5966339111328, + "loss": 5.3378, + "margin_dpo/margin_mean": 5.805635452270508, + "margin_dpo/margin_std": 14.091662406921387, + "step": 77 + }, + { + "epoch": 0.16335078534031414, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 8.697511672973633, + "fcm_dpo/q_t": 0.47841718792915344, + "grad_norm": 29.72622299194336, + "learning_rate": 4.943835963210323e-07, + "logits/chosen": -0.6819251179695129, + "logits/rejected": -0.6768004298210144, + "logps/chosen": -253.04547119140625, + "logps/ref_chosen": -256.90234375, + "logps/ref_rejected": -211.57154846191406, + "logps/rejected": -216.41221618652344, + "loss": 5.2264, + "margin_dpo/margin_mean": 8.697509765625, + "margin_dpo/margin_std": 14.436126708984375, + "step": 78 + }, + { + "epoch": 0.16544502617801046, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 8.278536796569824, + "fcm_dpo/q_t": 0.479459285736084, + "grad_norm": 29.883098602294922, + "learning_rate": 4.939911656668361e-07, + "logits/chosen": -0.6628604531288147, + "logits/rejected": -0.6839243173599243, + "logps/chosen": -263.1708068847656, + "logps/ref_chosen": -266.2735595703125, + "logps/ref_rejected": -251.57257080078125, + "logps/rejected": -256.74835205078125, + "loss": 5.2449, + "margin_dpo/margin_mean": 8.278536796569824, + "margin_dpo/margin_std": 14.98855972290039, + "step": 79 + }, + { + "epoch": 0.16753926701570682, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 6.632655143737793, + "fcm_dpo/q_t": 0.48353880643844604, + "grad_norm": 28.937639236450195, + "learning_rate": 4.935856505068998e-07, + "logits/chosen": -0.6736690998077393, + "logits/rejected": -0.7038200497627258, + "logps/chosen": -285.98919677734375, + "logps/ref_chosen": -287.8509826660156, + "logps/ref_rejected": -256.0766296386719, + "logps/rejected": -260.8474426269531, + "loss": 5.3041, + "margin_dpo/margin_mean": 6.632654190063477, + "margin_dpo/margin_std": 13.19876480102539, + "step": 80 + }, + { + "epoch": 0.16963350785340314, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 7.253467559814453, + "fcm_dpo/q_t": 0.4820139408111572, + "grad_norm": 28.075214385986328, + "learning_rate": 4.93167072587771e-07, + "logits/chosen": -0.6420468091964722, + "logits/rejected": -0.6412660479545593, + "logps/chosen": -266.10321044921875, + "logps/ref_chosen": -268.5232238769531, + "logps/ref_rejected": -237.81137084960938, + "logps/rejected": -242.6448211669922, + "loss": 5.2899, + "margin_dpo/margin_mean": 7.253467559814453, + "margin_dpo/margin_std": 16.714815139770508, + "step": 81 + }, + { + "epoch": 0.17172774869109947, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 7.97769021987915, + "fcm_dpo/q_t": 0.48026588559150696, + "grad_norm": 27.7528018951416, + "learning_rate": 4.92735454356513e-07, + "logits/chosen": -0.7254935503005981, + "logits/rejected": -0.7326993346214294, + "logps/chosen": -276.9371032714844, + "logps/ref_chosen": -279.36395263671875, + "logps/ref_rejected": -236.51365661621094, + "logps/rejected": -242.0644989013672, + "loss": 5.2605, + "margin_dpo/margin_mean": 7.977689743041992, + "margin_dpo/margin_std": 15.65487289428711, + "step": 82 + }, + { + "epoch": 0.17382198952879582, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 8.64671516418457, + "fcm_dpo/q_t": 0.4785246253013611, + "grad_norm": 30.814836502075195, + "learning_rate": 4.922908189595017e-07, + "logits/chosen": -0.6886410713195801, + "logits/rejected": -0.6722111105918884, + "logps/chosen": -273.9360046386719, + "logps/ref_chosen": -274.21923828125, + "logps/ref_rejected": -276.2212219238281, + "logps/rejected": -284.584716796875, + "loss": 5.243, + "margin_dpo/margin_mean": 8.646713256835938, + "margin_dpo/margin_std": 18.04184913635254, + "step": 83 + }, + { + "epoch": 0.17591623036649215, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 7.324193477630615, + "fcm_dpo/q_t": 0.48183199763298035, + "grad_norm": 29.859872817993164, + "learning_rate": 4.918331902411841e-07, + "logits/chosen": -0.7265677452087402, + "logits/rejected": -0.7404079437255859, + "logps/chosen": -293.82232666015625, + "logps/ref_chosen": -294.3975524902344, + "logps/ref_rejected": -279.81884765625, + "logps/rejected": -286.56781005859375, + "loss": 5.2888, + "margin_dpo/margin_mean": 7.324193477630615, + "margin_dpo/margin_std": 16.89883804321289, + "step": 84 + }, + { + "epoch": 0.17801047120418848, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 5.70173454284668, + "fcm_dpo/q_t": 0.4858514070510864, + "grad_norm": 29.388431549072266, + "learning_rate": 4.913625927427995e-07, + "logits/chosen": -0.6746452450752258, + "logits/rejected": -0.6829299330711365, + "logps/chosen": -245.21981811523438, + "logps/ref_chosen": -243.66220092773438, + "logps/ref_rejected": -263.9421691894531, + "logps/rejected": -271.2015380859375, + "loss": 5.3473, + "margin_dpo/margin_mean": 5.701735019683838, + "margin_dpo/margin_std": 15.451016426086426, + "step": 85 + }, + { + "epoch": 0.18010471204188483, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 8.923068046569824, + "fcm_dpo/q_t": 0.4778454601764679, + "grad_norm": 34.907039642333984, + "learning_rate": 4.908790517010636e-07, + "logits/chosen": -0.6926656365394592, + "logits/rejected": -0.6865877509117126, + "logps/chosen": -308.2077941894531, + "logps/ref_chosen": -309.4306945800781, + "logps/ref_rejected": -290.91278076171875, + "logps/rejected": -298.6129455566406, + "loss": 5.2276, + "margin_dpo/margin_mean": 8.923067092895508, + "margin_dpo/margin_std": 17.267658233642578, + "step": 86 + }, + { + "epoch": 0.18219895287958116, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 10.27783203125, + "fcm_dpo/q_t": 0.47462230920791626, + "grad_norm": 29.65764617919922, + "learning_rate": 4.903825930468148e-07, + "logits/chosen": -0.755806028842926, + "logits/rejected": -0.7502421736717224, + "logps/chosen": -278.2044677734375, + "logps/ref_chosen": -278.0277099609375, + "logps/ref_rejected": -245.70123291015625, + "logps/rejected": -256.15582275390625, + "loss": 5.1833, + "margin_dpo/margin_mean": 10.27783203125, + "margin_dpo/margin_std": 18.962289810180664, + "step": 87 + }, + { + "epoch": 0.18429319371727748, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 8.847644805908203, + "fcm_dpo/q_t": 0.4781361222267151, + "grad_norm": 28.80191421508789, + "learning_rate": 4.898732434036243e-07, + "logits/chosen": -0.7761508822441101, + "logits/rejected": -0.7929233312606812, + "logps/chosen": -268.6051025390625, + "logps/ref_chosen": -266.5148010253906, + "logps/ref_rejected": -265.90081787109375, + "logps/rejected": -276.8387756347656, + "loss": 5.2374, + "margin_dpo/margin_mean": 8.847643852233887, + "margin_dpo/margin_std": 19.153247833251953, + "step": 88 + }, + { + "epoch": 0.18638743455497384, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 9.27825927734375, + "fcm_dpo/q_t": 0.4770185947418213, + "grad_norm": 30.52988624572754, + "learning_rate": 4.893510300863676e-07, + "logits/chosen": -0.7448249459266663, + "logits/rejected": -0.7356829643249512, + "logps/chosen": -265.67352294921875, + "logps/ref_chosen": -265.6893005371094, + "logps/ref_rejected": -251.49314880371094, + "logps/rejected": -260.7556457519531, + "loss": 5.2198, + "margin_dpo/margin_mean": 9.27825927734375, + "margin_dpo/margin_std": 18.18901824951172, + "step": 89 + }, + { + "epoch": 0.18848167539267016, + "fcm_dpo/beta": 0.009999998845160007, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 8.289998054504395, + "fcm_dpo/q_t": 0.4794497489929199, + "grad_norm": 29.910764694213867, + "learning_rate": 4.8881598109976e-07, + "logits/chosen": -0.7497580051422119, + "logits/rejected": -0.7592126131057739, + "logps/chosen": -308.5013122558594, + "logps/ref_chosen": -307.4250183105469, + "logps/ref_rejected": -265.7172546386719, + "logps/rejected": -275.0835266113281, + "loss": 5.2544, + "margin_dpo/margin_mean": 8.289999008178711, + "margin_dpo/margin_std": 17.661346435546875, + "step": 90 + }, + { + "epoch": 0.1905759162303665, + "fcm_dpo/beta": 0.010252725332975388, + "fcm_dpo/delta": 0.04954978823661804, + "fcm_dpo/margin": 9.483511924743652, + "fcm_dpo/q_t": 0.47634202241897583, + "grad_norm": 33.50828552246094, + "learning_rate": 4.882681251368548e-07, + "logits/chosen": -0.6733120679855347, + "logits/rejected": -0.6901057958602905, + "logps/chosen": -237.88088989257812, + "logps/ref_chosen": -235.74098205566406, + "logps/ref_rejected": -226.6428985595703, + "logps/rejected": -238.2663116455078, + "loss": 5.2111, + "margin_dpo/margin_mean": 9.483511924743652, + "margin_dpo/margin_std": 19.319496154785156, + "step": 91 + }, + { + "epoch": 0.19267015706806281, + "fcm_dpo/beta": 0.010404359549283981, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 10.150311470031738, + "fcm_dpo/q_t": 0.47393330931663513, + "grad_norm": 34.50823974609375, + "learning_rate": 4.877074915775048e-07, + "logits/chosen": -0.7354683876037598, + "logits/rejected": -0.7188453674316406, + "logps/chosen": -286.5132751464844, + "logps/ref_chosen": -283.4475402832031, + "logps/ref_rejected": -273.134033203125, + "logps/rejected": -286.35009765625, + "loss": 5.1857, + "margin_dpo/margin_mean": 10.150311470031738, + "margin_dpo/margin_std": 21.28767967224121, + "step": 92 + }, + { + "epoch": 0.19476439790575917, + "fcm_dpo/beta": 0.010404359549283981, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 9.547552108764648, + "fcm_dpo/q_t": 0.47551578283309937, + "grad_norm": 29.792530059814453, + "learning_rate": 4.871341104867864e-07, + "logits/chosen": -0.7289955019950867, + "logits/rejected": -0.7523810267448425, + "logps/chosen": -235.75485229492188, + "logps/ref_chosen": -233.33714294433594, + "logps/ref_rejected": -230.54273986816406, + "logps/rejected": -242.5079803466797, + "loss": 5.2018, + "margin_dpo/margin_mean": 9.547552108764648, + "margin_dpo/margin_std": 19.47620391845703, + "step": 93 + }, + { + "epoch": 0.1968586387434555, + "fcm_dpo/beta": 0.010404359549283981, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 9.37955093383789, + "fcm_dpo/q_t": 0.47586768865585327, + "grad_norm": 32.49482727050781, + "learning_rate": 4.865480126133871e-07, + "logits/chosen": -0.6883825659751892, + "logits/rejected": -0.7099732160568237, + "logps/chosen": -297.0543212890625, + "logps/ref_chosen": -294.6528015136719, + "logps/ref_rejected": -283.657958984375, + "logps/rejected": -295.4390563964844, + "loss": 5.2195, + "margin_dpo/margin_mean": 9.379551887512207, + "margin_dpo/margin_std": 21.819246292114258, + "step": 94 + }, + { + "epoch": 0.19895287958115182, + "fcm_dpo/beta": 0.010404359549283981, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 10.186941146850586, + "fcm_dpo/q_t": 0.4739447236061096, + "grad_norm": 34.7429313659668, + "learning_rate": 4.859492293879573e-07, + "logits/chosen": -0.7289009094238281, + "logits/rejected": -0.7504929304122925, + "logps/chosen": -314.9253845214844, + "logps/ref_chosen": -311.6697082519531, + "logps/ref_rejected": -262.7471923828125, + "logps/rejected": -276.1898193359375, + "loss": 5.1947, + "margin_dpo/margin_mean": 10.18694019317627, + "margin_dpo/margin_std": 22.561256408691406, + "step": 95 + }, + { + "epoch": 0.20104712041884817, + "fcm_dpo/beta": 0.010404359549283981, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 10.215592384338379, + "fcm_dpo/q_t": 0.4738875925540924, + "grad_norm": 36.46210479736328, + "learning_rate": 4.853377929214243e-07, + "logits/chosen": -0.7038691639900208, + "logits/rejected": -0.7164921760559082, + "logps/chosen": -287.2462158203125, + "logps/ref_chosen": -282.55596923828125, + "logps/ref_rejected": -242.71588134765625, + "logps/rejected": -257.6216735839844, + "loss": 5.1946, + "margin_dpo/margin_mean": 10.215592384338379, + "margin_dpo/margin_std": 23.375957489013672, + "step": 96 + }, + { + "epoch": 0.2031413612565445, + "fcm_dpo/beta": 0.010404359549283981, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": 12.028409957885742, + "fcm_dpo/q_t": 0.4692074954509735, + "grad_norm": 33.21619415283203, + "learning_rate": 4.847137360032699e-07, + "logits/chosen": -0.7515384554862976, + "logits/rejected": -0.7390632629394531, + "logps/chosen": -307.90765380859375, + "logps/ref_chosen": -303.57781982421875, + "logps/ref_rejected": -264.22491455078125, + "logps/rejected": -280.58319091796875, + "loss": 5.1173, + "margin_dpo/margin_mean": 12.028410911560059, + "margin_dpo/margin_std": 22.326217651367188, + "step": 97 + }, + { + "epoch": 0.20523560209424083, + "fcm_dpo/beta": 0.010807948186993599, + "fcm_dpo/delta": 0.08449017256498337, + "fcm_dpo/margin": 12.213380813598633, + "fcm_dpo/q_t": 0.4684543013572693, + "grad_norm": 37.011268615722656, + "learning_rate": 4.84077092099773e-07, + "logits/chosen": -0.7741104364395142, + "logits/rejected": -0.7865383625030518, + "logps/chosen": -291.7223815917969, + "logps/ref_chosen": -286.8303527832031, + "logps/ref_rejected": -278.08331298828125, + "logps/rejected": -295.1886901855469, + "loss": 5.1132, + "margin_dpo/margin_mean": 12.213380813598633, + "margin_dpo/margin_std": 22.774032592773438, + "step": 98 + }, + { + "epoch": 0.20732984293193718, + "fcm_dpo/beta": 0.011437967419624329, + "fcm_dpo/delta": 0.09145952761173248, + "fcm_dpo/margin": 12.564851760864258, + "fcm_dpo/q_t": 0.46524322032928467, + "grad_norm": 37.08080291748047, + "learning_rate": 4.834278953522137e-07, + "logits/chosen": -0.7426201701164246, + "logits/rejected": -0.756097137928009, + "logps/chosen": -285.139404296875, + "logps/ref_chosen": -279.92120361328125, + "logps/ref_rejected": -250.3365478515625, + "logps/rejected": -268.11956787109375, + "loss": 5.0901, + "margin_dpo/margin_mean": 12.564850807189941, + "margin_dpo/margin_std": 27.0224666595459, + "step": 99 + }, + { + "epoch": 0.2094240837696335, + "fcm_dpo/beta": 0.012557308189570904, + "fcm_dpo/delta": 0.07896663248538971, + "fcm_dpo/margin": 12.57419490814209, + "fcm_dpo/q_t": 0.46233466267585754, + "grad_norm": 43.29024887084961, + "learning_rate": 4.827661805750437e-07, + "logits/chosen": -0.7817738056182861, + "logits/rejected": -0.7950529456138611, + "logps/chosen": -304.5354309082031, + "logps/ref_chosen": -296.8276672363281, + "logps/ref_rejected": -275.56146240234375, + "logps/rejected": -295.8433837890625, + "loss": 5.04, + "margin_dpo/margin_mean": 12.574195861816406, + "margin_dpo/margin_std": 24.096710205078125, + "step": 100 + }, + { + "epoch": 0.21151832460732983, + "fcm_dpo/beta": 0.013111630454659462, + "fcm_dpo/delta": 0.07151152938604355, + "fcm_dpo/margin": 14.857452392578125, + "fcm_dpo/q_t": 0.45359134674072266, + "grad_norm": 41.36968231201172, + "learning_rate": 4.820919832540181e-07, + "logits/chosen": -0.770940363407135, + "logits/rejected": -0.7847775816917419, + "logps/chosen": -257.88330078125, + "logps/ref_chosen": -252.74203491210938, + "logps/ref_rejected": -276.4185485839844, + "logps/rejected": -296.41729736328125, + "loss": 4.9313, + "margin_dpo/margin_mean": 14.857452392578125, + "margin_dpo/margin_std": 26.29358673095703, + "step": 101 + }, + { + "epoch": 0.2136125654450262, + "fcm_dpo/beta": 0.014527034014463425, + "fcm_dpo/delta": 0.15123134851455688, + "fcm_dpo/margin": 14.755053520202637, + "fcm_dpo/q_t": 0.4487529695034027, + "grad_norm": 44.943565368652344, + "learning_rate": 4.814053395442932e-07, + "logits/chosen": -0.7487014532089233, + "logits/rejected": -0.7447975873947144, + "logps/chosen": -224.57212829589844, + "logps/ref_chosen": -219.5537109375, + "logps/ref_rejected": -231.90853881835938, + "logps/rejected": -251.68197631835938, + "loss": 4.8727, + "margin_dpo/margin_mean": 14.75505256652832, + "margin_dpo/margin_std": 24.941452026367188, + "step": 102 + }, + { + "epoch": 0.2157068062827225, + "fcm_dpo/beta": 0.016830556094646454, + "fcm_dpo/delta": 0.15375208854675293, + "fcm_dpo/margin": 13.61697769165039, + "fcm_dpo/q_t": 0.44760948419570923, + "grad_norm": 53.148414611816406, + "learning_rate": 4.807062862684873e-07, + "logits/chosen": -0.7735249996185303, + "logits/rejected": -0.770460307598114, + "logps/chosen": -264.299560546875, + "logps/ref_chosen": -259.6750793457031, + "logps/ref_rejected": -278.7400817871094, + "logps/rejected": -296.9815368652344, + "loss": 4.8764, + "margin_dpo/margin_mean": 13.61697769165039, + "margin_dpo/margin_std": 25.619842529296875, + "step": 103 + }, + { + "epoch": 0.21780104712041884, + "fcm_dpo/beta": 0.01824803464114666, + "fcm_dpo/delta": 0.12172321230173111, + "fcm_dpo/margin": 10.087403297424316, + "fcm_dpo/q_t": 0.45723575353622437, + "grad_norm": 59.135841369628906, + "learning_rate": 4.799948609147061e-07, + "logits/chosen": -0.7728451490402222, + "logits/rejected": -0.7799044251441956, + "logps/chosen": -276.86041259765625, + "logps/ref_chosen": -267.9741516113281, + "logps/ref_rejected": -230.5306396484375, + "logps/rejected": -249.5042724609375, + "loss": 5.0674, + "margin_dpo/margin_mean": 10.087403297424316, + "margin_dpo/margin_std": 26.16142463684082, + "step": 104 + }, + { + "epoch": 0.2198952879581152, + "fcm_dpo/beta": 0.019626103341579437, + "fcm_dpo/delta": 0.09441255033016205, + "fcm_dpo/margin": 20.40988540649414, + "fcm_dpo/q_t": 0.40757566690444946, + "grad_norm": 62.410152435302734, + "learning_rate": 4.792711016345321e-07, + "logits/chosen": -0.7623639106750488, + "logits/rejected": -0.7740727066993713, + "logps/chosen": -327.2814025878906, + "logps/ref_chosen": -322.25482177734375, + "logps/ref_rejected": -279.02978515625, + "logps/rejected": -304.46624755859375, + "loss": 4.3973, + "margin_dpo/margin_mean": 20.409887313842773, + "margin_dpo/margin_std": 26.728302001953125, + "step": 105 + }, + { + "epoch": 0.22198952879581152, + "fcm_dpo/beta": 0.021983552724123, + "fcm_dpo/delta": 0.10915235430002213, + "fcm_dpo/margin": 12.417057991027832, + "fcm_dpo/q_t": 0.4385029673576355, + "grad_norm": 80.47908020019531, + "learning_rate": 4.785350472409791e-07, + "logits/chosen": -0.7452399730682373, + "logits/rejected": -0.782451868057251, + "logps/chosen": -308.17291259765625, + "logps/ref_chosen": -296.15777587890625, + "logps/ref_rejected": -266.2691650390625, + "logps/rejected": -290.70135498046875, + "loss": 4.9373, + "margin_dpo/margin_mean": 12.4170560836792, + "margin_dpo/margin_std": 29.023073196411133, + "step": 106 + }, + { + "epoch": 0.22408376963350785, + "fcm_dpo/beta": 0.024012316018342972, + "fcm_dpo/delta": 0.14064227044582367, + "fcm_dpo/margin": 19.3704833984375, + "fcm_dpo/q_t": 0.3971790373325348, + "grad_norm": 77.79216766357422, + "learning_rate": 4.777867372064105e-07, + "logits/chosen": -0.78067547082901, + "logits/rejected": -0.7740224599838257, + "logps/chosen": -310.7627868652344, + "logps/ref_chosen": -306.996337890625, + "logps/ref_rejected": -296.79412841796875, + "logps/rejected": -319.9310302734375, + "loss": 4.3062, + "margin_dpo/margin_mean": 19.370481491088867, + "margin_dpo/margin_std": 27.15206527709961, + "step": 107 + }, + { + "epoch": 0.2261780104712042, + "fcm_dpo/beta": 0.025636808946728706, + "fcm_dpo/delta": 0.09028993546962738, + "fcm_dpo/margin": 17.937637329101562, + "fcm_dpo/q_t": 0.4029965102672577, + "grad_norm": 286.3813781738281, + "learning_rate": 4.770262116604223e-07, + "logits/chosen": -0.7616235017776489, + "logits/rejected": -0.7734853625297546, + "logps/chosen": -299.8006286621094, + "logps/ref_chosen": -295.1526794433594, + "logps/ref_rejected": -235.974853515625, + "logps/rejected": -258.5604553222656, + "loss": 4.4937, + "margin_dpo/margin_mean": 17.937637329101562, + "margin_dpo/margin_std": 29.53498649597168, + "step": 108 + }, + { + "epoch": 0.22827225130890053, + "fcm_dpo/beta": 0.02691740356385708, + "fcm_dpo/delta": 0.008004628121852875, + "fcm_dpo/margin": 19.709096908569336, + "fcm_dpo/q_t": 0.3899995982646942, + "grad_norm": 89.63356018066406, + "learning_rate": 4.7625351138769166e-07, + "logits/chosen": -0.7978358268737793, + "logits/rejected": -0.796513020992279, + "logps/chosen": -333.0800476074219, + "logps/ref_chosen": -325.9248046875, + "logps/ref_rejected": -279.15423583984375, + "logps/rejected": -306.0185852050781, + "loss": 4.3147, + "margin_dpo/margin_mean": 19.709096908569336, + "margin_dpo/margin_std": 30.691986083984375, + "step": 109 + }, + { + "epoch": 0.23036649214659685, + "fcm_dpo/beta": 0.028422407805919647, + "fcm_dpo/delta": 0.0776296854019165, + "fcm_dpo/margin": 18.461952209472656, + "fcm_dpo/q_t": 0.392780601978302, + "grad_norm": 86.87859344482422, + "learning_rate": 4.75468677825789e-07, + "logits/chosen": -0.7915902137756348, + "logits/rejected": -0.780044674873352, + "logps/chosen": -281.5834655761719, + "logps/ref_chosen": -274.439208984375, + "logps/ref_rejected": -260.0552062988281, + "logps/rejected": -285.6614074707031, + "loss": 4.4874, + "margin_dpo/margin_mean": 18.46194839477539, + "margin_dpo/margin_std": 32.64317321777344, + "step": 110 + }, + { + "epoch": 0.2324607329842932, + "fcm_dpo/beta": 0.029570797458291054, + "fcm_dpo/delta": 0.029479999095201492, + "fcm_dpo/margin": 19.259639739990234, + "fcm_dpo/q_t": 0.38573166728019714, + "grad_norm": 94.41033935546875, + "learning_rate": 4.7467175306295647e-07, + "logits/chosen": -0.8291243314743042, + "logits/rejected": -0.8090481162071228, + "logps/chosen": -336.6731262207031, + "logps/ref_chosen": -329.2361755371094, + "logps/ref_rejected": -287.82830810546875, + "logps/rejected": -314.52490234375, + "loss": 4.4329, + "margin_dpo/margin_mean": 19.259639739990234, + "margin_dpo/margin_std": 32.591739654541016, + "step": 111 + }, + { + "epoch": 0.23455497382198953, + "fcm_dpo/beta": 0.028911547735333443, + "fcm_dpo/delta": -0.026821672916412354, + "fcm_dpo/margin": 12.475048065185547, + "fcm_dpo/q_t": 0.4273641109466553, + "grad_norm": 117.3414077758789, + "learning_rate": 4.7386277983585053e-07, + "logits/chosen": -0.7319104671478271, + "logits/rejected": -0.7623211145401001, + "logps/chosen": -269.155517578125, + "logps/ref_chosen": -257.0593566894531, + "logps/ref_rejected": -272.9595031738281, + "logps/rejected": -297.53070068359375, + "loss": 5.1793, + "margin_dpo/margin_mean": 12.475048065185547, + "margin_dpo/margin_std": 33.321533203125, + "step": 112 + }, + { + "epoch": 0.23664921465968586, + "fcm_dpo/beta": 0.02749396488070488, + "fcm_dpo/delta": -0.030711829662322998, + "fcm_dpo/margin": 22.709571838378906, + "fcm_dpo/q_t": 0.3759039044380188, + "grad_norm": 89.33954620361328, + "learning_rate": 4.7304180152725024e-07, + "logits/chosen": -0.7936510443687439, + "logits/rejected": -0.7988536953926086, + "logps/chosen": -294.9634094238281, + "logps/ref_chosen": -286.0416564941406, + "logps/ref_rejected": -270.374267578125, + "logps/rejected": -302.00555419921875, + "loss": 4.2854, + "margin_dpo/margin_mean": 22.709571838378906, + "margin_dpo/margin_std": 36.09437942504883, + "step": 113 + }, + { + "epoch": 0.2387434554973822, + "fcm_dpo/beta": 0.029794633388519287, + "fcm_dpo/delta": 0.07771297544240952, + "fcm_dpo/margin": 12.448982238769531, + "fcm_dpo/q_t": 0.42108646035194397, + "grad_norm": 107.05913543701172, + "learning_rate": 4.7220886216373085e-07, + "logits/chosen": -0.83903968334198, + "logits/rejected": -0.841633677482605, + "logps/chosen": -271.00335693359375, + "logps/ref_chosen": -260.0084533691406, + "logps/ref_rejected": -246.67190551757812, + "logps/rejected": -270.11578369140625, + "loss": 5.0306, + "margin_dpo/margin_mean": 12.448982238769531, + "margin_dpo/margin_std": 31.104576110839844, + "step": 114 + }, + { + "epoch": 0.24083769633507854, + "fcm_dpo/beta": 0.029507935047149658, + "fcm_dpo/delta": 0.016908658668398857, + "fcm_dpo/margin": 12.143805503845215, + "fcm_dpo/q_t": 0.4331102967262268, + "grad_norm": 120.61966705322266, + "learning_rate": 4.7136400641330245e-07, + "logits/chosen": -0.8338419795036316, + "logits/rejected": -0.7967959642410278, + "logps/chosen": -310.32476806640625, + "logps/ref_chosen": -299.4229736328125, + "logps/ref_rejected": -272.1186828613281, + "logps/rejected": -295.1643371582031, + "loss": 5.1519, + "margin_dpo/margin_mean": 12.143804550170898, + "margin_dpo/margin_std": 34.1319465637207, + "step": 115 + }, + { + "epoch": 0.24293193717277486, + "fcm_dpo/beta": 0.03158475458621979, + "fcm_dpo/delta": 0.08304879814386368, + "fcm_dpo/margin": 16.471763610839844, + "fcm_dpo/q_t": 0.39313048124313354, + "grad_norm": 96.66343688964844, + "learning_rate": 4.70507279583015e-07, + "logits/chosen": -0.8481428027153015, + "logits/rejected": -0.8136316537857056, + "logps/chosen": -284.63897705078125, + "logps/ref_chosen": -279.263916015625, + "logps/ref_rejected": -253.6192169189453, + "logps/rejected": -275.46600341796875, + "loss": 4.5386, + "margin_dpo/margin_mean": 16.471763610839844, + "margin_dpo/margin_std": 29.921730041503906, + "step": 116 + }, + { + "epoch": 0.2450261780104712, + "fcm_dpo/beta": 0.03184635192155838, + "fcm_dpo/delta": -0.10544593632221222, + "fcm_dpo/margin": 17.663230895996094, + "fcm_dpo/q_t": 0.3876641094684601, + "grad_norm": 113.3866958618164, + "learning_rate": 4.6963872761652834e-07, + "logits/chosen": -0.7901442646980286, + "logits/rejected": -0.7954122424125671, + "logps/chosen": -266.06890869140625, + "logps/ref_chosen": -259.2248840332031, + "logps/ref_rejected": -229.3042755126953, + "logps/rejected": -253.81155395507812, + "loss": 4.4728, + "margin_dpo/margin_mean": 17.663230895996094, + "margin_dpo/margin_std": 28.427824020385742, + "step": 117 + }, + { + "epoch": 0.24712041884816754, + "fcm_dpo/beta": 0.029734350740909576, + "fcm_dpo/delta": -0.03558747097849846, + "fcm_dpo/margin": 19.60186004638672, + "fcm_dpo/q_t": 0.38436776399612427, + "grad_norm": 113.9665756225586, + "learning_rate": 4.687583970916486e-07, + "logits/chosen": -0.7948500514030457, + "logits/rejected": -0.7873266935348511, + "logps/chosen": -276.48236083984375, + "logps/ref_chosen": -267.0707092285156, + "logps/ref_rejected": -272.7322082519531, + "logps/rejected": -301.7456970214844, + "loss": 4.4767, + "margin_dpo/margin_mean": 19.601858139038086, + "margin_dpo/margin_std": 34.46326446533203, + "step": 118 + }, + { + "epoch": 0.24921465968586387, + "fcm_dpo/beta": 0.029399575665593147, + "fcm_dpo/delta": -0.0034819915890693665, + "fcm_dpo/margin": 15.70901870727539, + "fcm_dpo/q_t": 0.4097801744937897, + "grad_norm": 116.46439361572266, + "learning_rate": 4.6786633521783005e-07, + "logits/chosen": -0.8555842638015747, + "logits/rejected": -0.8587056398391724, + "logps/chosen": -336.5263671875, + "logps/ref_chosen": -324.6766357421875, + "logps/ref_rejected": -306.0322265625, + "logps/rejected": -333.5909423828125, + "loss": 4.8796, + "margin_dpo/margin_mean": 15.709016799926758, + "margin_dpo/margin_std": 34.084205627441406, + "step": 119 + }, + { + "epoch": 0.2513089005235602, + "fcm_dpo/beta": 0.029722902923822403, + "fcm_dpo/delta": 0.015680911019444466, + "fcm_dpo/margin": 15.187647819519043, + "fcm_dpo/q_t": 0.41209471225738525, + "grad_norm": 98.18533325195312, + "learning_rate": 4.669625898336438e-07, + "logits/chosen": -0.8075263500213623, + "logits/rejected": -0.8280918598175049, + "logps/chosen": -324.54205322265625, + "logps/ref_chosen": -315.2617492675781, + "logps/ref_rejected": -265.32501220703125, + "logps/rejected": -289.79296875, + "loss": 4.8908, + "margin_dpo/margin_mean": 15.187647819519043, + "margin_dpo/margin_std": 33.20510482788086, + "step": 120 + }, + { + "epoch": 0.2534031413612565, + "fcm_dpo/beta": 0.03124306909739971, + "fcm_dpo/delta": 0.14657826721668243, + "fcm_dpo/margin": 12.737668991088867, + "fcm_dpo/q_t": 0.42548656463623047, + "grad_norm": 110.32199096679688, + "learning_rate": 4.6604720940421207e-07, + "logits/chosen": -0.8259115815162659, + "logits/rejected": -0.8430719971656799, + "logps/chosen": -235.69189453125, + "logps/ref_chosen": -222.99609375, + "logps/ref_rejected": -226.92860412597656, + "logps/rejected": -252.36209106445312, + "loss": 5.0129, + "margin_dpo/margin_mean": 12.737669944763184, + "margin_dpo/margin_std": 32.302677154541016, + "step": 121 + }, + { + "epoch": 0.2554973821989529, + "fcm_dpo/beta": 0.032185669988393784, + "fcm_dpo/delta": -0.019368404522538185, + "fcm_dpo/margin": 15.059699058532715, + "fcm_dpo/q_t": 0.40549296140670776, + "grad_norm": 117.572509765625, + "learning_rate": 4.651202430186092e-07, + "logits/chosen": -0.8742939829826355, + "logits/rejected": -0.8380413055419922, + "logps/chosen": -288.3365783691406, + "logps/ref_chosen": -276.02630615234375, + "logps/ref_rejected": -277.97418212890625, + "logps/rejected": -305.3441162109375, + "loss": 4.9239, + "margin_dpo/margin_mean": 15.059700012207031, + "margin_dpo/margin_std": 34.5055046081543, + "step": 122 + }, + { + "epoch": 0.25759162303664923, + "fcm_dpo/beta": 0.03192441910505295, + "fcm_dpo/delta": -0.06708841025829315, + "fcm_dpo/margin": 20.746381759643555, + "fcm_dpo/q_t": 0.3659403324127197, + "grad_norm": 114.86331176757812, + "learning_rate": 4.6418174038722924e-07, + "logits/chosen": -0.7943709492683411, + "logits/rejected": -0.794708788394928, + "logps/chosen": -334.998291015625, + "logps/ref_chosen": -328.1546325683594, + "logps/ref_rejected": -280.6911315917969, + "logps/rejected": -308.28118896484375, + "loss": 4.2403, + "margin_dpo/margin_mean": 20.746379852294922, + "margin_dpo/margin_std": 32.13544464111328, + "step": 123 + }, + { + "epoch": 0.25968586387434556, + "fcm_dpo/beta": 0.030819490551948547, + "fcm_dpo/delta": 0.03179997205734253, + "fcm_dpo/margin": 16.770048141479492, + "fcm_dpo/q_t": 0.39261382818222046, + "grad_norm": 99.67831420898438, + "learning_rate": 4.6323175183912023e-07, + "logits/chosen": -0.8295610547065735, + "logits/rejected": -0.8021270036697388, + "logps/chosen": -285.3762512207031, + "logps/ref_chosen": -275.6961975097656, + "logps/ref_rejected": -225.361572265625, + "logps/rejected": -251.8116455078125, + "loss": 4.5914, + "margin_dpo/margin_mean": 16.770048141479492, + "margin_dpo/margin_std": 29.99167251586914, + "step": 124 + }, + { + "epoch": 0.2617801047120419, + "fcm_dpo/beta": 0.03114517405629158, + "fcm_dpo/delta": -0.03549438342452049, + "fcm_dpo/margin": 16.479698181152344, + "fcm_dpo/q_t": 0.40479522943496704, + "grad_norm": 124.4884033203125, + "learning_rate": 4.6227032831928483e-07, + "logits/chosen": -0.7944302558898926, + "logits/rejected": -0.7552446722984314, + "logps/chosen": -288.5038757324219, + "logps/ref_chosen": -278.06976318359375, + "logps/ref_rejected": -265.63873291015625, + "logps/rejected": -292.5525207519531, + "loss": 4.8872, + "margin_dpo/margin_mean": 16.479698181152344, + "margin_dpo/margin_std": 36.26911544799805, + "step": 125 + }, + { + "epoch": 0.2638743455497382, + "fcm_dpo/beta": 0.03014766052365303, + "fcm_dpo/delta": -0.020349113270640373, + "fcm_dpo/margin": 18.311885833740234, + "fcm_dpo/q_t": 0.38548335433006287, + "grad_norm": 109.14166259765625, + "learning_rate": 4.612975213859487e-07, + "logits/chosen": -0.8047983646392822, + "logits/rejected": -0.8239343166351318, + "logps/chosen": -329.813232421875, + "logps/ref_chosen": -321.3960876464844, + "logps/ref_rejected": -285.37664794921875, + "logps/rejected": -312.10565185546875, + "loss": 4.4441, + "margin_dpo/margin_mean": 18.311885833740234, + "margin_dpo/margin_std": 30.9145450592041, + "step": 126 + }, + { + "epoch": 0.26596858638743454, + "fcm_dpo/beta": 0.030198298394680023, + "fcm_dpo/delta": -0.08730512112379074, + "fcm_dpo/margin": 20.74812889099121, + "fcm_dpo/q_t": 0.37573808431625366, + "grad_norm": 107.66555786132812, + "learning_rate": 4.603133832077953e-07, + "logits/chosen": -0.87255859375, + "logits/rejected": -0.8262636661529541, + "logps/chosen": -313.08575439453125, + "logps/ref_chosen": -306.55877685546875, + "logps/ref_rejected": -274.8651428222656, + "logps/rejected": -302.1402587890625, + "loss": 4.3108, + "margin_dpo/margin_mean": 20.748130798339844, + "margin_dpo/margin_std": 31.776979446411133, + "step": 127 + }, + { + "epoch": 0.2680628272251309, + "fcm_dpo/beta": 0.027010329067707062, + "fcm_dpo/delta": -0.06802891194820404, + "fcm_dpo/margin": 22.768661499023438, + "fcm_dpo/q_t": 0.37576210498809814, + "grad_norm": 87.82177734375, + "learning_rate": 4.5931796656116837e-07, + "logits/chosen": -0.7685502171516418, + "logits/rejected": -0.7739553451538086, + "logps/chosen": -268.2386779785156, + "logps/ref_chosen": -265.3973693847656, + "logps/ref_rejected": -250.9737548828125, + "logps/rejected": -276.5837097167969, + "loss": 4.2397, + "margin_dpo/margin_mean": 22.768665313720703, + "margin_dpo/margin_std": 34.84334945678711, + "step": 128 + }, + { + "epoch": 0.27015706806282724, + "fcm_dpo/beta": 0.027978552505373955, + "fcm_dpo/delta": 0.057149242609739304, + "fcm_dpo/margin": 19.382301330566406, + "fcm_dpo/q_t": 0.39152759313583374, + "grad_norm": 95.93099212646484, + "learning_rate": 4.5831132482724193e-07, + "logits/chosen": -0.7922682166099548, + "logits/rejected": -0.795950710773468, + "logps/chosen": -307.2889404296875, + "logps/ref_chosen": -303.158447265625, + "logps/ref_rejected": -275.9891052246094, + "logps/rejected": -299.50189208984375, + "loss": 4.457, + "margin_dpo/margin_mean": 19.38229751586914, + "margin_dpo/margin_std": 33.825496673583984, + "step": 129 + }, + { + "epoch": 0.27225130890052357, + "fcm_dpo/beta": 0.028778987005352974, + "fcm_dpo/delta": 0.07279841601848602, + "fcm_dpo/margin": 16.71417999267578, + "fcm_dpo/q_t": 0.401623010635376, + "grad_norm": 103.26705932617188, + "learning_rate": 4.5729351198915705e-07, + "logits/chosen": -0.7664986848831177, + "logits/rejected": -0.8091428875923157, + "logps/chosen": -292.35394287109375, + "logps/ref_chosen": -286.4073486328125, + "logps/ref_rejected": -294.38665771484375, + "logps/rejected": -317.0474853515625, + "loss": 4.6181, + "margin_dpo/margin_mean": 16.714181900024414, + "margin_dpo/margin_std": 32.14228820800781, + "step": 130 + }, + { + "epoch": 0.2743455497382199, + "fcm_dpo/beta": 0.03138697147369385, + "fcm_dpo/delta": 0.12776511907577515, + "fcm_dpo/margin": 15.206416130065918, + "fcm_dpo/q_t": 0.40508803725242615, + "grad_norm": 126.76692199707031, + "learning_rate": 4.5626458262912735e-07, + "logits/chosen": -0.8393828868865967, + "logits/rejected": -0.7898960113525391, + "logps/chosen": -317.5235290527344, + "logps/ref_chosen": -311.5650634765625, + "logps/ref_rejected": -291.62432861328125, + "logps/rejected": -312.7891845703125, + "loss": 4.7479, + "margin_dpo/margin_mean": 15.206417083740234, + "margin_dpo/margin_std": 31.15882110595703, + "step": 131 + }, + { + "epoch": 0.2764397905759162, + "fcm_dpo/beta": 0.03294968605041504, + "fcm_dpo/delta": -0.09555768966674805, + "fcm_dpo/margin": 20.942249298095703, + "fcm_dpo/q_t": 0.37162911891937256, + "grad_norm": 136.78445434570312, + "learning_rate": 4.5522459192551166e-07, + "logits/chosen": -0.8077597618103027, + "logits/rejected": -0.7918823957443237, + "logps/chosen": -272.0758972167969, + "logps/ref_chosen": -270.0818176269531, + "logps/ref_rejected": -284.3084411621094, + "logps/rejected": -307.24481201171875, + "loss": 4.3352, + "margin_dpo/margin_mean": 20.942249298095703, + "margin_dpo/margin_std": 33.96846389770508, + "step": 132 + }, + { + "epoch": 0.27853403141361255, + "fcm_dpo/beta": 0.030396468937397003, + "fcm_dpo/delta": -0.021258918568491936, + "fcm_dpo/margin": 18.287437438964844, + "fcm_dpo/q_t": 0.38483449816703796, + "grad_norm": 97.30946350097656, + "learning_rate": 4.541735956498554e-07, + "logits/chosen": -0.8339589834213257, + "logits/rejected": -0.841139018535614, + "logps/chosen": -287.4236145019531, + "logps/ref_chosen": -285.6213684082031, + "logps/ref_rejected": -251.19386291503906, + "logps/rejected": -271.2835693359375, + "loss": 4.4624, + "margin_dpo/margin_mean": 18.287437438964844, + "margin_dpo/margin_std": 30.66234588623047, + "step": 133 + }, + { + "epoch": 0.2806282722513089, + "fcm_dpo/beta": 0.03080589883029461, + "fcm_dpo/delta": 0.050978198647499084, + "fcm_dpo/margin": 15.381253242492676, + "fcm_dpo/q_t": 0.40036991238594055, + "grad_norm": 106.52774810791016, + "learning_rate": 4.5311165016389914e-07, + "logits/chosen": -0.8478070497512817, + "logits/rejected": -0.8514746427536011, + "logps/chosen": -328.1652526855469, + "logps/ref_chosen": -318.92083740234375, + "logps/ref_rejected": -293.1894836425781, + "logps/rejected": -317.8151550292969, + "loss": 4.662, + "margin_dpo/margin_mean": 15.381254196166992, + "margin_dpo/margin_std": 29.16480255126953, + "step": 134 + }, + { + "epoch": 0.28272251308900526, + "fcm_dpo/beta": 0.031100064516067505, + "fcm_dpo/delta": -0.0012684855610132217, + "fcm_dpo/margin": 17.726974487304688, + "fcm_dpo/q_t": 0.38483473658561707, + "grad_norm": 133.8107147216797, + "learning_rate": 4.520388124165564e-07, + "logits/chosen": -0.7306185364723206, + "logits/rejected": -0.7757068872451782, + "logps/chosen": -296.7521667480469, + "logps/ref_chosen": -292.8217468261719, + "logps/ref_rejected": -269.2896728515625, + "logps/rejected": -290.9470520019531, + "loss": 4.365, + "margin_dpo/margin_mean": 17.726974487304688, + "margin_dpo/margin_std": 27.901412963867188, + "step": 135 + }, + { + "epoch": 0.2848167539267016, + "fcm_dpo/beta": 0.03128836303949356, + "fcm_dpo/delta": 0.02984962984919548, + "fcm_dpo/margin": 16.618209838867188, + "fcm_dpo/q_t": 0.3998725414276123, + "grad_norm": 131.5796356201172, + "learning_rate": 4.5095513994085974e-07, + "logits/chosen": -0.7901206612586975, + "logits/rejected": -0.7873492240905762, + "logps/chosen": -278.5729064941406, + "logps/ref_chosen": -272.8525390625, + "logps/ref_rejected": -252.68202209472656, + "logps/rejected": -275.0205993652344, + "loss": 4.711, + "margin_dpo/margin_mean": 16.618209838867188, + "margin_dpo/margin_std": 32.85752487182617, + "step": 136 + }, + { + "epoch": 0.2869109947643979, + "fcm_dpo/beta": 0.03243596479296684, + "fcm_dpo/delta": 0.0307313185185194, + "fcm_dpo/margin": 15.30479621887207, + "fcm_dpo/q_t": 0.4031601846218109, + "grad_norm": 127.34578704833984, + "learning_rate": 4.498606908508753e-07, + "logits/chosen": -0.8465839624404907, + "logits/rejected": -0.8333037495613098, + "logps/chosen": -308.864013671875, + "logps/ref_chosen": -300.7522277832031, + "logps/ref_rejected": -286.1935119628906, + "logps/rejected": -309.6100769042969, + "loss": 4.7322, + "margin_dpo/margin_mean": 15.30479621887207, + "margin_dpo/margin_std": 30.956771850585938, + "step": 137 + }, + { + "epoch": 0.28900523560209423, + "fcm_dpo/beta": 0.032454121857881546, + "fcm_dpo/delta": 0.005412563681602478, + "fcm_dpo/margin": 18.291305541992188, + "fcm_dpo/q_t": 0.3903680145740509, + "grad_norm": 106.79438781738281, + "learning_rate": 4.487555238385862e-07, + "logits/chosen": -0.7613782286643982, + "logits/rejected": -0.7434461712837219, + "logps/chosen": -294.6986083984375, + "logps/ref_chosen": -288.9369812011719, + "logps/ref_rejected": -263.7076416015625, + "logps/rejected": -287.7606201171875, + "loss": 4.5596, + "margin_dpo/margin_mean": 18.291303634643555, + "margin_dpo/margin_std": 34.35835266113281, + "step": 138 + }, + { + "epoch": 0.29109947643979056, + "fcm_dpo/beta": 0.03308243677020073, + "fcm_dpo/delta": 0.03304573893547058, + "fcm_dpo/margin": 13.005290985107422, + "fcm_dpo/q_t": 0.41861557960510254, + "grad_norm": 116.88390350341797, + "learning_rate": 4.476396981707453e-07, + "logits/chosen": -0.7813708782196045, + "logits/rejected": -0.8129632472991943, + "logps/chosen": -274.0767517089844, + "logps/ref_chosen": -270.0443115234375, + "logps/ref_rejected": -267.3226013183594, + "logps/rejected": -284.3603210449219, + "loss": 4.901, + "margin_dpo/margin_mean": 13.005290031433105, + "margin_dpo/margin_std": 29.456113815307617, + "step": 139 + }, + { + "epoch": 0.2931937172774869, + "fcm_dpo/beta": 0.03421860188245773, + "fcm_dpo/delta": 0.0003454945981502533, + "fcm_dpo/margin": 17.4409236907959, + "fcm_dpo/q_t": 0.3806909918785095, + "grad_norm": 129.08346557617188, + "learning_rate": 4.4651327368569684e-07, + "logits/chosen": -0.8671438694000244, + "logits/rejected": -0.841330349445343, + "logps/chosen": -287.3354187011719, + "logps/ref_chosen": -282.9555969238281, + "logps/ref_rejected": -251.17181396484375, + "logps/rejected": -272.9925537109375, + "loss": 4.2991, + "margin_dpo/margin_mean": 17.440921783447266, + "margin_dpo/margin_std": 27.346405029296875, + "step": 140 + }, + { + "epoch": 0.29528795811518327, + "fcm_dpo/beta": 0.03223487734794617, + "fcm_dpo/delta": -0.05671250820159912, + "fcm_dpo/margin": 20.16065216064453, + "fcm_dpo/q_t": 0.36774590611457825, + "grad_norm": 107.1982192993164, + "learning_rate": 4.453763107901675e-07, + "logits/chosen": -0.7901620268821716, + "logits/rejected": -0.7895568013191223, + "logps/chosen": -298.8914794921875, + "logps/ref_chosen": -296.3001708984375, + "logps/ref_rejected": -279.8486633300781, + "logps/rejected": -302.6006164550781, + "loss": 4.2532, + "margin_dpo/margin_mean": 20.160648345947266, + "margin_dpo/margin_std": 31.275304794311523, + "step": 141 + }, + { + "epoch": 0.2973821989528796, + "fcm_dpo/beta": 0.031261567026376724, + "fcm_dpo/delta": -0.0013711625942960382, + "fcm_dpo/margin": 16.695674896240234, + "fcm_dpo/q_t": 0.40020960569381714, + "grad_norm": 104.9339370727539, + "learning_rate": 4.4422887045602674e-07, + "logits/chosen": -0.8087915778160095, + "logits/rejected": -0.8152974843978882, + "logps/chosen": -304.1774597167969, + "logps/ref_chosen": -300.56585693359375, + "logps/ref_rejected": -231.43316650390625, + "logps/rejected": -251.74044799804688, + "loss": 4.7019, + "margin_dpo/margin_mean": 16.695674896240234, + "margin_dpo/margin_std": 33.27724075317383, + "step": 142 + }, + { + "epoch": 0.2994764397905759, + "fcm_dpo/beta": 0.03194243088364601, + "fcm_dpo/delta": -0.005147319287061691, + "fcm_dpo/margin": 18.84752655029297, + "fcm_dpo/q_t": 0.37769022583961487, + "grad_norm": 109.19286346435547, + "learning_rate": 4.4307101421701755e-07, + "logits/chosen": -0.7999674677848816, + "logits/rejected": -0.7869732975959778, + "logps/chosen": -300.0097961425781, + "logps/ref_chosen": -296.73236083984375, + "logps/ref_rejected": -266.45257568359375, + "logps/rejected": -288.5776062011719, + "loss": 4.3005, + "margin_dpo/margin_mean": 18.847524642944336, + "margin_dpo/margin_std": 29.313934326171875, + "step": 143 + }, + { + "epoch": 0.30157068062827225, + "fcm_dpo/beta": 0.03147399052977562, + "fcm_dpo/delta": 0.05607675388455391, + "fcm_dpo/margin": 16.06841278076172, + "fcm_dpo/q_t": 0.4018367528915405, + "grad_norm": 109.13096618652344, + "learning_rate": 4.419028041654559e-07, + "logits/chosen": -0.8504543304443359, + "logits/rejected": -0.8398086428642273, + "logps/chosen": -302.9305419921875, + "logps/ref_chosen": -298.843994140625, + "logps/ref_rejected": -266.120849609375, + "logps/rejected": -286.2757873535156, + "loss": 4.6688, + "margin_dpo/margin_mean": 16.06841278076172, + "margin_dpo/margin_std": 32.15247344970703, + "step": 144 + }, + { + "epoch": 0.3036649214659686, + "fcm_dpo/beta": 0.03136536106467247, + "fcm_dpo/delta": -0.10698030889034271, + "fcm_dpo/margin": 20.321487426757812, + "fcm_dpo/q_t": 0.36860162019729614, + "grad_norm": 104.23075103759766, + "learning_rate": 4.4072430294890166e-07, + "logits/chosen": -0.8474912047386169, + "logits/rejected": -0.8559509515762329, + "logps/chosen": -278.58154296875, + "logps/ref_chosen": -275.7528381347656, + "logps/ref_rejected": -214.74807739257812, + "logps/rejected": -237.8982391357422, + "loss": 4.1589, + "margin_dpo/margin_mean": 20.321487426757812, + "margin_dpo/margin_std": 28.55498504638672, + "step": 145 + }, + { + "epoch": 0.3057591623036649, + "fcm_dpo/beta": 0.030392833054065704, + "fcm_dpo/delta": 0.009405029937624931, + "fcm_dpo/margin": 19.343936920166016, + "fcm_dpo/q_t": 0.38136640191078186, + "grad_norm": 100.81139373779297, + "learning_rate": 4.395355737667985e-07, + "logits/chosen": -0.817609429359436, + "logits/rejected": -0.8185821771621704, + "logps/chosen": -284.92779541015625, + "logps/ref_chosen": -277.09820556640625, + "logps/ref_rejected": -265.41046142578125, + "logps/rejected": -292.58392333984375, + "loss": 4.2804, + "margin_dpo/margin_mean": 19.343936920166016, + "margin_dpo/margin_std": 29.182607650756836, + "step": 146 + }, + { + "epoch": 0.3078534031413613, + "fcm_dpo/beta": 0.03225337713956833, + "fcm_dpo/delta": 0.03318355232477188, + "fcm_dpo/margin": 15.443235397338867, + "fcm_dpo/q_t": 0.40001511573791504, + "grad_norm": 107.14227294921875, + "learning_rate": 4.3833668036708483e-07, + "logits/chosen": -0.8150308132171631, + "logits/rejected": -0.8176466822624207, + "logps/chosen": -299.32708740234375, + "logps/ref_chosen": -291.4185791015625, + "logps/ref_rejected": -253.43051147460938, + "logps/rejected": -276.7822265625, + "loss": 4.7888, + "margin_dpo/margin_mean": 15.443236351013184, + "margin_dpo/margin_std": 31.842870712280273, + "step": 147 + }, + { + "epoch": 0.3099476439790576, + "fcm_dpo/beta": 0.03284765034914017, + "fcm_dpo/delta": 0.06887248158454895, + "fcm_dpo/margin": 15.128622055053711, + "fcm_dpo/q_t": 0.4055444300174713, + "grad_norm": 105.72512817382812, + "learning_rate": 4.3712768704277524e-07, + "logits/chosen": -0.8757432699203491, + "logits/rejected": -0.8821508288383484, + "logps/chosen": -244.3941650390625, + "logps/ref_chosen": -236.74850463867188, + "logps/ref_rejected": -231.4674072265625, + "logps/rejected": -254.24166870117188, + "loss": 4.7548, + "margin_dpo/margin_mean": 15.128622055053711, + "margin_dpo/margin_std": 31.366443634033203, + "step": 148 + }, + { + "epoch": 0.31204188481675393, + "fcm_dpo/beta": 0.032370131462812424, + "fcm_dpo/delta": -0.04777521640062332, + "fcm_dpo/margin": 19.857650756835938, + "fcm_dpo/q_t": 0.3684397339820862, + "grad_norm": 107.77214813232422, + "learning_rate": 4.3590865862851263e-07, + "logits/chosen": -0.8252199292182922, + "logits/rejected": -0.8139665126800537, + "logps/chosen": -326.12774658203125, + "logps/ref_chosen": -319.9284973144531, + "logps/ref_rejected": -308.20233154296875, + "logps/rejected": -334.2592468261719, + "loss": 4.0675, + "margin_dpo/margin_mean": 19.857654571533203, + "margin_dpo/margin_std": 27.37247085571289, + "step": 149 + }, + { + "epoch": 0.31413612565445026, + "fcm_dpo/beta": 0.032013505697250366, + "fcm_dpo/delta": 0.011951310560107231, + "fcm_dpo/margin": 18.362552642822266, + "fcm_dpo/q_t": 0.38145214319229126, + "grad_norm": 108.20628356933594, + "learning_rate": 4.346796604970912e-07, + "logits/chosen": -0.8032433390617371, + "logits/rejected": -0.7947119474411011, + "logps/chosen": -286.0317077636719, + "logps/ref_chosen": -276.3182373046875, + "logps/ref_rejected": -273.02215576171875, + "logps/rejected": -301.0981750488281, + "loss": 4.3501, + "margin_dpo/margin_mean": 18.362550735473633, + "margin_dpo/margin_std": 29.839893341064453, + "step": 150 + }, + { + "epoch": 0.3162303664921466, + "fcm_dpo/beta": 0.029401123523712158, + "fcm_dpo/delta": -0.19104339182376862, + "fcm_dpo/margin": 26.312572479248047, + "fcm_dpo/q_t": 0.34187808632850647, + "grad_norm": 90.58390045166016, + "learning_rate": 4.3344075855595097e-07, + "logits/chosen": -0.8197271823883057, + "logits/rejected": -0.8265554308891296, + "logps/chosen": -304.98236083984375, + "logps/ref_chosen": -297.31280517578125, + "logps/ref_rejected": -266.1003723144531, + "logps/rejected": -300.0824890136719, + "loss": 3.7524, + "margin_dpo/margin_mean": 26.312572479248047, + "margin_dpo/margin_std": 30.86597442626953, + "step": 151 + }, + { + "epoch": 0.3183246073298429, + "fcm_dpo/beta": 0.027028188109397888, + "fcm_dpo/delta": -0.029568390920758247, + "fcm_dpo/margin": 20.28197479248047, + "fcm_dpo/q_t": 0.38840028643608093, + "grad_norm": 95.14047241210938, + "learning_rate": 4.3219201924364323e-07, + "logits/chosen": -0.8347331285476685, + "logits/rejected": -0.8374426364898682, + "logps/chosen": -276.0089416503906, + "logps/ref_chosen": -270.2470397949219, + "logps/ref_rejected": -269.7749328613281, + "logps/rejected": -295.8188781738281, + "loss": 4.3761, + "margin_dpo/margin_mean": 20.28197479248047, + "margin_dpo/margin_std": 31.723121643066406, + "step": 152 + }, + { + "epoch": 0.3204188481675393, + "fcm_dpo/beta": 0.0251263827085495, + "fcm_dpo/delta": -0.1380881667137146, + "fcm_dpo/margin": 28.840253829956055, + "fcm_dpo/q_t": 0.34343641996383667, + "grad_norm": 84.2206039428711, + "learning_rate": 4.309335095262675e-07, + "logits/chosen": -0.8285923004150391, + "logits/rejected": -0.8218899369239807, + "logps/chosen": -283.19036865234375, + "logps/ref_chosen": -273.779052734375, + "logps/ref_rejected": -280.9530944824219, + "logps/rejected": -319.20465087890625, + "loss": 3.6856, + "margin_dpo/margin_mean": 28.840253829956055, + "margin_dpo/margin_std": 30.283130645751953, + "step": 153 + }, + { + "epoch": 0.3225130890052356, + "fcm_dpo/beta": 0.024389155209064484, + "fcm_dpo/delta": -0.017212260514497757, + "fcm_dpo/margin": 20.448200225830078, + "fcm_dpo/q_t": 0.3947216272354126, + "grad_norm": 89.34386444091797, + "learning_rate": 4.2966529689388064e-07, + "logits/chosen": -0.8547238707542419, + "logits/rejected": -0.841791033744812, + "logps/chosen": -301.56524658203125, + "logps/ref_chosen": -289.9031982421875, + "logps/ref_rejected": -261.5166320800781, + "logps/rejected": -293.62689208984375, + "loss": 4.4855, + "margin_dpo/margin_mean": 20.448200225830078, + "margin_dpo/margin_std": 34.4425048828125, + "step": 154 + }, + { + "epoch": 0.32460732984293195, + "fcm_dpo/beta": 0.0245128832757473, + "fcm_dpo/delta": 0.04135804995894432, + "fcm_dpo/margin": 20.464195251464844, + "fcm_dpo/q_t": 0.3978845477104187, + "grad_norm": 100.74219512939453, + "learning_rate": 4.2838744935687716e-07, + "logits/chosen": -0.7908228635787964, + "logits/rejected": -0.7928870916366577, + "logps/chosen": -299.3333435058594, + "logps/ref_chosen": -285.8612060546875, + "logps/ref_rejected": -300.1272888183594, + "logps/rejected": -334.0636291503906, + "loss": 4.4532, + "margin_dpo/margin_mean": 20.464195251464844, + "margin_dpo/margin_std": 34.932029724121094, + "step": 155 + }, + { + "epoch": 0.3267015706806283, + "fcm_dpo/beta": 0.024317309260368347, + "fcm_dpo/delta": -0.13548636436462402, + "fcm_dpo/margin": 29.958675384521484, + "fcm_dpo/q_t": 0.3527216911315918, + "grad_norm": 76.70926666259766, + "learning_rate": 4.271000354423425e-07, + "logits/chosen": -0.8232815265655518, + "logits/rejected": -0.8277627825737, + "logps/chosen": -291.4639587402344, + "logps/ref_chosen": -279.0354919433594, + "logps/ref_rejected": -244.2198486328125, + "logps/rejected": -286.6070251464844, + "loss": 3.9783, + "margin_dpo/margin_mean": 29.95867919921875, + "margin_dpo/margin_std": 39.6899299621582, + "step": 156 + }, + { + "epoch": 0.3287958115183246, + "fcm_dpo/beta": 0.0227323267608881, + "fcm_dpo/delta": 0.0647030621767044, + "fcm_dpo/margin": 21.048620223999023, + "fcm_dpo/q_t": 0.39951539039611816, + "grad_norm": 85.27225494384766, + "learning_rate": 4.258031241903777e-07, + "logits/chosen": -0.8875189423561096, + "logits/rejected": -0.8885977864265442, + "logps/chosen": -287.203125, + "logps/ref_chosen": -270.830322265625, + "logps/ref_rejected": -259.08319091796875, + "logps/rejected": -296.504638671875, + "loss": 4.4747, + "margin_dpo/margin_mean": 21.048620223999023, + "margin_dpo/margin_std": 34.90514373779297, + "step": 157 + }, + { + "epoch": 0.3308900523560209, + "fcm_dpo/beta": 0.02372920699417591, + "fcm_dpo/delta": -0.022162986919283867, + "fcm_dpo/margin": 23.346778869628906, + "fcm_dpo/q_t": 0.38233768939971924, + "grad_norm": 88.78839874267578, + "learning_rate": 4.2449678515039743e-07, + "logits/chosen": -0.8333015441894531, + "logits/rejected": -0.822943389415741, + "logps/chosen": -306.6914367675781, + "logps/ref_chosen": -289.9663391113281, + "logps/ref_rejected": -271.335693359375, + "logps/rejected": -311.4075927734375, + "loss": 4.2801, + "margin_dpo/margin_mean": 23.346778869628906, + "margin_dpo/margin_std": 34.549774169921875, + "step": 158 + }, + { + "epoch": 0.33298429319371725, + "fcm_dpo/beta": 0.023554343730211258, + "fcm_dpo/delta": 0.04829606041312218, + "fcm_dpo/margin": 18.543855667114258, + "fcm_dpo/q_t": 0.4130256772041321, + "grad_norm": 94.4974136352539, + "learning_rate": 4.2318108837739986e-07, + "logits/chosen": -0.9109346270561218, + "logits/rejected": -0.8718158602714539, + "logps/chosen": -340.6222839355469, + "logps/ref_chosen": -321.37835693359375, + "logps/ref_rejected": -250.45652770996094, + "logps/rejected": -288.24432373046875, + "loss": 4.8111, + "margin_dpo/margin_mean": 18.54385757446289, + "margin_dpo/margin_std": 39.477230072021484, + "step": 159 + }, + { + "epoch": 0.33507853403141363, + "fcm_dpo/beta": 0.023043226450681686, + "fcm_dpo/delta": -0.07298657298088074, + "fcm_dpo/margin": 28.90218734741211, + "fcm_dpo/q_t": 0.3582006096839905, + "grad_norm": 87.2422866821289, + "learning_rate": 4.218561044282098e-07, + "logits/chosen": -0.8484607338905334, + "logits/rejected": -0.8577648401260376, + "logps/chosen": -291.74420166015625, + "logps/ref_chosen": -276.28350830078125, + "logps/ref_rejected": -262.7477722167969, + "logps/rejected": -307.1106262207031, + "loss": 3.88, + "margin_dpo/margin_mean": 28.90218734741211, + "margin_dpo/margin_std": 33.74877166748047, + "step": 160 + }, + { + "epoch": 0.33717277486910996, + "fcm_dpo/beta": 0.02312248945236206, + "fcm_dpo/delta": -0.00832156278192997, + "fcm_dpo/margin": 26.23219108581543, + "fcm_dpo/q_t": 0.3745940327644348, + "grad_norm": 87.6370620727539, + "learning_rate": 4.2052190435769554e-07, + "logits/chosen": -0.8637784719467163, + "logits/rejected": -0.8568350076675415, + "logps/chosen": -329.4377746582031, + "logps/ref_chosen": -310.4927978515625, + "logps/ref_rejected": -250.25347900390625, + "logps/rejected": -295.4306335449219, + "loss": 4.238, + "margin_dpo/margin_mean": 26.23219108581543, + "margin_dpo/margin_std": 39.27847671508789, + "step": 161 + }, + { + "epoch": 0.3392670157068063, + "fcm_dpo/beta": 0.02204562909901142, + "fcm_dpo/delta": -0.00742918998003006, + "fcm_dpo/margin": 22.998626708984375, + "fcm_dpo/q_t": 0.3925955891609192, + "grad_norm": 84.41416931152344, + "learning_rate": 4.1917855971495763e-07, + "logits/chosen": -0.845470130443573, + "logits/rejected": -0.8392305374145508, + "logps/chosen": -313.5657653808594, + "logps/ref_chosen": -296.1105041503906, + "logps/ref_rejected": -253.4247589111328, + "logps/rejected": -293.8786926269531, + "loss": 4.4313, + "margin_dpo/margin_mean": 22.998626708984375, + "margin_dpo/margin_std": 37.05625534057617, + "step": 162 + }, + { + "epoch": 0.3413612565445026, + "fcm_dpo/beta": 0.023259364068508148, + "fcm_dpo/delta": 0.029428036883473396, + "fcm_dpo/margin": 24.4699764251709, + "fcm_dpo/q_t": 0.37805965542793274, + "grad_norm": 105.51744842529297, + "learning_rate": 4.1782614253949255e-07, + "logits/chosen": -0.8878765106201172, + "logits/rejected": -0.8931166529655457, + "logps/chosen": -313.3696594238281, + "logps/ref_chosen": -293.4999084472656, + "logps/ref_rejected": -266.7116394042969, + "logps/rejected": -311.0514221191406, + "loss": 4.2088, + "margin_dpo/margin_mean": 24.4699764251709, + "margin_dpo/margin_std": 34.383941650390625, + "step": 163 + }, + { + "epoch": 0.34345549738219894, + "fcm_dpo/beta": 0.02358204685151577, + "fcm_dpo/delta": -0.005520589649677277, + "fcm_dpo/margin": 25.577049255371094, + "fcm_dpo/q_t": 0.3782970905303955, + "grad_norm": 93.9336929321289, + "learning_rate": 4.164647253573289e-07, + "logits/chosen": -0.8413535356521606, + "logits/rejected": -0.8617441654205322, + "logps/chosen": -291.18902587890625, + "logps/ref_chosen": -267.04949951171875, + "logps/ref_rejected": -215.9768829345703, + "logps/rejected": -265.6934509277344, + "loss": 4.2881, + "margin_dpo/margin_mean": 25.577049255371094, + "margin_dpo/margin_std": 39.729583740234375, + "step": 164 + }, + { + "epoch": 0.34554973821989526, + "fcm_dpo/beta": 0.023120472207665443, + "fcm_dpo/delta": 0.014161716215312481, + "fcm_dpo/margin": 20.357099533081055, + "fcm_dpo/q_t": 0.40322345495224, + "grad_norm": 96.0807113647461, + "learning_rate": 4.1509438117713863e-07, + "logits/chosen": -0.8848339319229126, + "logits/rejected": -0.8598626255989075, + "logps/chosen": -296.1021728515625, + "logps/ref_chosen": -278.06146240234375, + "logps/ref_rejected": -260.4288635253906, + "logps/rejected": -298.82666015625, + "loss": 4.5232, + "margin_dpo/margin_mean": 20.357099533081055, + "margin_dpo/margin_std": 35.15179443359375, + "step": 165 + }, + { + "epoch": 0.34764397905759165, + "fcm_dpo/beta": 0.02365921624004841, + "fcm_dpo/delta": 0.07884444296360016, + "fcm_dpo/margin": 22.211210250854492, + "fcm_dpo/q_t": 0.3956853151321411, + "grad_norm": 100.91581726074219, + "learning_rate": 4.137151834863213e-07, + "logits/chosen": -0.8429009914398193, + "logits/rejected": -0.8110395669937134, + "logps/chosen": -292.6982727050781, + "logps/ref_chosen": -275.9490661621094, + "logps/ref_rejected": -232.13473510742188, + "logps/rejected": -271.09515380859375, + "loss": 4.4726, + "margin_dpo/margin_mean": 22.211214065551758, + "margin_dpo/margin_std": 38.937843322753906, + "step": 166 + }, + { + "epoch": 0.34973821989528797, + "fcm_dpo/beta": 0.025131061673164368, + "fcm_dpo/delta": 0.01401679590344429, + "fcm_dpo/margin": 23.275146484375, + "fcm_dpo/q_t": 0.382481187582016, + "grad_norm": 97.39994049072266, + "learning_rate": 4.123272062470633e-07, + "logits/chosen": -0.8488789796829224, + "logits/rejected": -0.8377172946929932, + "logps/chosen": -299.45098876953125, + "logps/ref_chosen": -280.5514221191406, + "logps/ref_rejected": -255.2896728515625, + "logps/rejected": -297.46441650390625, + "loss": 4.4208, + "margin_dpo/margin_mean": 23.275146484375, + "margin_dpo/margin_std": 39.44821548461914, + "step": 167 + }, + { + "epoch": 0.3518324607329843, + "fcm_dpo/beta": 0.023984873667359352, + "fcm_dpo/delta": -0.06481810659170151, + "fcm_dpo/margin": 25.343101501464844, + "fcm_dpo/q_t": 0.3719956874847412, + "grad_norm": 296.59173583984375, + "learning_rate": 4.1093052389237174e-07, + "logits/chosen": -0.8263663649559021, + "logits/rejected": -0.8027467727661133, + "logps/chosen": -334.6053771972656, + "logps/ref_chosen": -315.7982177734375, + "logps/ref_rejected": -291.48406982421875, + "logps/rejected": -335.63433837890625, + "loss": 4.4031, + "margin_dpo/margin_mean": 25.34310531616211, + "margin_dpo/margin_std": 42.009727478027344, + "step": 168 + }, + { + "epoch": 0.3539267015706806, + "fcm_dpo/beta": 0.022055521607398987, + "fcm_dpo/delta": -0.17693692445755005, + "fcm_dpo/margin": 34.57185363769531, + "fcm_dpo/q_t": 0.34355735778808594, + "grad_norm": 79.44908905029297, + "learning_rate": 4.0952521132208267e-07, + "logits/chosen": -0.8222439885139465, + "logits/rejected": -0.8391299843788147, + "logps/chosen": -275.7101745605469, + "logps/ref_chosen": -261.06427001953125, + "logps/ref_rejected": -235.40663146972656, + "logps/rejected": -284.6243896484375, + "loss": 3.6944, + "margin_dpo/margin_mean": 34.57185363769531, + "margin_dpo/margin_std": 37.923160552978516, + "step": 169 + }, + { + "epoch": 0.35602094240837695, + "fcm_dpo/beta": 0.02063800022006035, + "fcm_dpo/delta": 0.05438760668039322, + "fcm_dpo/margin": 26.59862518310547, + "fcm_dpo/q_t": 0.3886667490005493, + "grad_norm": 96.43052673339844, + "learning_rate": 4.081113438988443e-07, + "logits/chosen": -0.7964289784431458, + "logits/rejected": -0.7981937527656555, + "logps/chosen": -324.3586120605469, + "logps/ref_chosen": -308.96722412109375, + "logps/ref_rejected": -263.8466796875, + "logps/rejected": -305.836669921875, + "loss": 4.3947, + "margin_dpo/margin_mean": 26.598623275756836, + "margin_dpo/margin_std": 45.38837432861328, + "step": 170 + }, + { + "epoch": 0.3581151832460733, + "fcm_dpo/beta": 0.020479857921600342, + "fcm_dpo/delta": -0.09359031170606613, + "fcm_dpo/margin": 30.383586883544922, + "fcm_dpo/q_t": 0.36607781052589417, + "grad_norm": 93.717529296875, + "learning_rate": 4.0668899744407567e-07, + "logits/chosen": -0.8218968510627747, + "logits/rejected": -0.8354977369308472, + "logps/chosen": -269.9103698730469, + "logps/ref_chosen": -258.8890380859375, + "logps/ref_rejected": -262.19140625, + "logps/rejected": -303.5963439941406, + "loss": 3.9434, + "margin_dpo/margin_mean": 30.383586883544922, + "margin_dpo/margin_std": 35.17938995361328, + "step": 171 + }, + { + "epoch": 0.36020942408376966, + "fcm_dpo/beta": 0.021176544949412346, + "fcm_dpo/delta": 0.16231057047843933, + "fcm_dpo/margin": 16.054502487182617, + "fcm_dpo/q_t": 0.42630359530448914, + "grad_norm": 96.79520416259766, + "learning_rate": 4.0525824823390043e-07, + "logits/chosen": -0.8345335125923157, + "logits/rejected": -0.853988766670227, + "logps/chosen": -352.2255554199219, + "logps/ref_chosen": -339.0223388671875, + "logps/ref_rejected": -295.78759765625, + "logps/rejected": -325.0453186035156, + "loss": 4.8841, + "margin_dpo/margin_mean": 16.054502487182617, + "margin_dpo/margin_std": 36.764705657958984, + "step": 172 + }, + { + "epoch": 0.362303664921466, + "fcm_dpo/beta": 0.023571645841002464, + "fcm_dpo/delta": 0.06804777681827545, + "fcm_dpo/margin": 22.532241821289062, + "fcm_dpo/q_t": 0.3934495151042938, + "grad_norm": 84.94215393066406, + "learning_rate": 4.0381917299505686e-07, + "logits/chosen": -0.8429185748100281, + "logits/rejected": -0.8447529077529907, + "logps/chosen": -313.66534423828125, + "logps/ref_chosen": -300.1114501953125, + "logps/ref_rejected": -273.78460693359375, + "logps/rejected": -309.87078857421875, + "loss": 4.4559, + "margin_dpo/margin_mean": 22.532241821289062, + "margin_dpo/margin_std": 38.33403015136719, + "step": 173 + }, + { + "epoch": 0.3643979057591623, + "fcm_dpo/beta": 0.023526517674326897, + "fcm_dpo/delta": -0.038947440683841705, + "fcm_dpo/margin": 27.008258819580078, + "fcm_dpo/q_t": 0.3678101897239685, + "grad_norm": 109.56539154052734, + "learning_rate": 4.0237184890078243e-07, + "logits/chosen": -0.8134390711784363, + "logits/rejected": -0.8019281625747681, + "logps/chosen": -348.16650390625, + "logps/ref_chosen": -335.0538635253906, + "logps/ref_rejected": -257.4646911621094, + "logps/rejected": -297.5855407714844, + "loss": 4.0733, + "margin_dpo/margin_mean": 27.008256912231445, + "margin_dpo/margin_std": 36.92762756347656, + "step": 174 + }, + { + "epoch": 0.36649214659685864, + "fcm_dpo/beta": 0.023315949365496635, + "fcm_dpo/delta": -0.02021496742963791, + "fcm_dpo/margin": 26.327842712402344, + "fcm_dpo/q_t": 0.3810538947582245, + "grad_norm": 105.11174011230469, + "learning_rate": 4.00916353566676e-07, + "logits/chosen": -0.8290956616401672, + "logits/rejected": -0.8322280645370483, + "logps/chosen": -303.4194030761719, + "logps/ref_chosen": -284.39556884765625, + "logps/ref_rejected": -283.3876647949219, + "logps/rejected": -328.7392883300781, + "loss": 4.3747, + "margin_dpo/margin_mean": 26.327844619750977, + "margin_dpo/margin_std": 42.5020637512207, + "step": 175 + }, + { + "epoch": 0.36858638743455496, + "fcm_dpo/beta": 0.023749521002173424, + "fcm_dpo/delta": 0.04947128891944885, + "fcm_dpo/margin": 20.36212158203125, + "fcm_dpo/q_t": 0.40329134464263916, + "grad_norm": 95.4178695678711, + "learning_rate": 3.994527650465352e-07, + "logits/chosen": -0.7997909784317017, + "logits/rejected": -0.8140876293182373, + "logps/chosen": -271.2232360839844, + "logps/ref_chosen": -251.81280517578125, + "logps/ref_rejected": -242.05328369140625, + "logps/rejected": -281.8258361816406, + "loss": 4.8359, + "margin_dpo/margin_mean": 20.36212158203125, + "margin_dpo/margin_std": 43.5911750793457, + "step": 176 + }, + { + "epoch": 0.3706806282722513, + "fcm_dpo/beta": 0.023227877914905548, + "fcm_dpo/delta": -0.04320107400417328, + "fcm_dpo/margin": 20.517908096313477, + "fcm_dpo/q_t": 0.40150418877601624, + "grad_norm": 95.16880798339844, + "learning_rate": 3.979811618281705e-07, + "logits/chosen": -0.8828033804893494, + "logits/rejected": -0.8596282005310059, + "logps/chosen": -318.2162780761719, + "logps/ref_chosen": -298.6463928222656, + "logps/ref_rejected": -295.66534423828125, + "logps/rejected": -335.75311279296875, + "loss": 4.7767, + "margin_dpo/margin_mean": 20.517908096313477, + "margin_dpo/margin_std": 41.196895599365234, + "step": 177 + }, + { + "epoch": 0.37277486910994767, + "fcm_dpo/beta": 0.02242261730134487, + "fcm_dpo/delta": -0.029734821990132332, + "fcm_dpo/margin": 27.962230682373047, + "fcm_dpo/q_t": 0.3739369809627533, + "grad_norm": 87.00016021728516, + "learning_rate": 3.9650162282919654e-07, + "logits/chosen": -0.7981923222541809, + "logits/rejected": -0.7972285747528076, + "logps/chosen": -301.7319641113281, + "logps/ref_chosen": -286.2576599121094, + "logps/ref_rejected": -243.97491455078125, + "logps/rejected": -287.41143798828125, + "loss": 4.1371, + "margin_dpo/margin_mean": 27.962230682373047, + "margin_dpo/margin_std": 40.20293426513672, + "step": 178 + }, + { + "epoch": 0.374869109947644, + "fcm_dpo/beta": 0.021783435717225075, + "fcm_dpo/delta": -0.04039537161588669, + "fcm_dpo/margin": 23.8038272857666, + "fcm_dpo/q_t": 0.392859548330307, + "grad_norm": 93.1056137084961, + "learning_rate": 3.9501422739279953e-07, + "logits/chosen": -0.7682486772537231, + "logits/rejected": -0.7733548283576965, + "logps/chosen": -276.9896240234375, + "logps/ref_chosen": -259.737060546875, + "logps/ref_rejected": -277.8813171386719, + "logps/rejected": -318.9377136230469, + "loss": 4.5235, + "margin_dpo/margin_mean": 23.80382537841797, + "margin_dpo/margin_std": 41.399452209472656, + "step": 179 + }, + { + "epoch": 0.3769633507853403, + "fcm_dpo/beta": 0.02132536470890045, + "fcm_dpo/delta": -0.050105344504117966, + "fcm_dpo/margin": 28.025800704956055, + "fcm_dpo/q_t": 0.378864049911499, + "grad_norm": 80.81954956054688, + "learning_rate": 3.935190552834828e-07, + "logits/chosen": -0.8179333209991455, + "logits/rejected": -0.8522875905036926, + "logps/chosen": -284.9200744628906, + "logps/ref_chosen": -267.30889892578125, + "logps/ref_rejected": -230.4376983642578, + "logps/rejected": -276.0746765136719, + "loss": 4.1849, + "margin_dpo/margin_mean": 28.025800704956055, + "margin_dpo/margin_std": 40.71231460571289, + "step": 180 + }, + { + "epoch": 0.37905759162303665, + "fcm_dpo/beta": 0.021433616057038307, + "fcm_dpo/delta": 0.09068157523870468, + "fcm_dpo/margin": 23.975404739379883, + "fcm_dpo/q_t": 0.39596718549728394, + "grad_norm": 105.24143981933594, + "learning_rate": 3.920161866827889e-07, + "logits/chosen": -0.8095259666442871, + "logits/rejected": -0.8213891386985779, + "logps/chosen": -321.52716064453125, + "logps/ref_chosen": -300.49139404296875, + "logps/ref_rejected": -278.98284912109375, + "logps/rejected": -323.9939880371094, + "loss": 4.5794, + "margin_dpo/margin_mean": 23.97540283203125, + "margin_dpo/margin_std": 44.497955322265625, + "step": 181 + }, + { + "epoch": 0.381151832460733, + "fcm_dpo/beta": 0.02187720127403736, + "fcm_dpo/delta": -0.12704817950725555, + "fcm_dpo/margin": 32.94600296020508, + "fcm_dpo/q_t": 0.350864440202713, + "grad_norm": 94.21673583984375, + "learning_rate": 3.90505702185e-07, + "logits/chosen": -0.7871803045272827, + "logits/rejected": -0.8218678832054138, + "logps/chosen": -297.6783142089844, + "logps/ref_chosen": -279.4981689453125, + "logps/ref_rejected": -263.6926574707031, + "logps/rejected": -314.8188171386719, + "loss": 3.8389, + "margin_dpo/margin_mean": 32.94600296020508, + "margin_dpo/margin_std": 39.00600051879883, + "step": 182 + }, + { + "epoch": 0.3832460732984293, + "fcm_dpo/beta": 0.020077742636203766, + "fcm_dpo/delta": 0.016617465764284134, + "fcm_dpo/margin": 29.090024948120117, + "fcm_dpo/q_t": 0.38179779052734375, + "grad_norm": 83.8680191040039, + "learning_rate": 3.889876827928156e-07, + "logits/chosen": -0.842463493347168, + "logits/rejected": -0.8533914685249329, + "logps/chosen": -289.95166015625, + "logps/ref_chosen": -271.2057189941406, + "logps/ref_rejected": -243.91549682617188, + "logps/rejected": -291.75146484375, + "loss": 4.2765, + "margin_dpo/margin_mean": 29.090024948120117, + "margin_dpo/margin_std": 45.345638275146484, + "step": 183 + }, + { + "epoch": 0.38534031413612563, + "fcm_dpo/beta": 0.018667876720428467, + "fcm_dpo/delta": -0.12012484669685364, + "fcm_dpo/margin": 37.77571487426758, + "fcm_dpo/q_t": 0.35291537642478943, + "grad_norm": 92.6821060180664, + "learning_rate": 3.874622099130087e-07, + "logits/chosen": -0.8658108711242676, + "logits/rejected": -0.8556749820709229, + "logps/chosen": -331.8802185058594, + "logps/ref_chosen": -318.4457702636719, + "logps/ref_rejected": -266.640869140625, + "logps/rejected": -317.8509826660156, + "loss": 3.8875, + "margin_dpo/margin_mean": 37.77571487426758, + "margin_dpo/margin_std": 46.467491149902344, + "step": 184 + }, + { + "epoch": 0.387434554973822, + "fcm_dpo/beta": 0.018280260264873505, + "fcm_dpo/delta": -0.017129220068454742, + "fcm_dpo/margin": 31.00004768371582, + "fcm_dpo/q_t": 0.382385790348053, + "grad_norm": 80.60724639892578, + "learning_rate": 3.859293653520604e-07, + "logits/chosen": -0.854312539100647, + "logits/rejected": -0.8550869226455688, + "logps/chosen": -296.79412841796875, + "logps/ref_chosen": -274.308837890625, + "logps/ref_rejected": -260.7274169921875, + "logps/rejected": -314.2127685546875, + "loss": 4.2308, + "margin_dpo/margin_mean": 31.000051498413086, + "margin_dpo/margin_std": 45.35227584838867, + "step": 185 + }, + { + "epoch": 0.38952879581151834, + "fcm_dpo/beta": 0.018405750393867493, + "fcm_dpo/delta": 0.0071517787873744965, + "fcm_dpo/margin": 29.293418884277344, + "fcm_dpo/q_t": 0.3844657838344574, + "grad_norm": 82.40447998046875, + "learning_rate": 3.8438923131177237e-07, + "logits/chosen": -0.8595123291015625, + "logits/rejected": -0.8700802326202393, + "logps/chosen": -321.4861145019531, + "logps/ref_chosen": -299.00537109375, + "logps/ref_rejected": -274.4014587402344, + "logps/rejected": -326.1756286621094, + "loss": 4.2669, + "margin_dpo/margin_mean": 29.293418884277344, + "margin_dpo/margin_std": 41.822120666503906, + "step": 186 + }, + { + "epoch": 0.39162303664921466, + "fcm_dpo/beta": 0.01949167065322399, + "fcm_dpo/delta": 0.11492104828357697, + "fcm_dpo/margin": 25.117904663085938, + "fcm_dpo/q_t": 0.39704573154449463, + "grad_norm": 108.40086364746094, + "learning_rate": 3.828418903848593e-07, + "logits/chosen": -0.8057087659835815, + "logits/rejected": -0.800156831741333, + "logps/chosen": -356.62225341796875, + "logps/ref_chosen": -329.8253173828125, + "logps/ref_rejected": -263.73175048828125, + "logps/rejected": -315.6466064453125, + "loss": 4.6723, + "margin_dpo/margin_mean": 25.117904663085938, + "margin_dpo/margin_std": 48.73664093017578, + "step": 187 + }, + { + "epoch": 0.393717277486911, + "fcm_dpo/beta": 0.01967058703303337, + "fcm_dpo/delta": -0.03304888680577278, + "fcm_dpo/margin": 29.974576950073242, + "fcm_dpo/q_t": 0.38121888041496277, + "grad_norm": 85.1061019897461, + "learning_rate": 3.812874255505191e-07, + "logits/chosen": -0.8419395089149475, + "logits/rejected": -0.8400317430496216, + "logps/chosen": -289.6829528808594, + "logps/ref_chosen": -263.005615234375, + "logps/ref_rejected": -247.08668518066406, + "logps/rejected": -303.7385559082031, + "loss": 4.4389, + "margin_dpo/margin_mean": 29.974576950073242, + "margin_dpo/margin_std": 50.47289276123047, + "step": 188 + }, + { + "epoch": 0.3958115183246073, + "fcm_dpo/beta": 0.018633361905813217, + "fcm_dpo/delta": -0.060549549758434296, + "fcm_dpo/margin": 35.01060104370117, + "fcm_dpo/q_t": 0.3630969822406769, + "grad_norm": 82.80532836914062, + "learning_rate": 3.797259201699833e-07, + "logits/chosen": -0.859175443649292, + "logits/rejected": -0.8690008521080017, + "logps/chosen": -291.63153076171875, + "logps/ref_chosen": -272.96038818359375, + "logps/ref_rejected": -275.13238525390625, + "logps/rejected": -328.81414794921875, + "loss": 3.9104, + "margin_dpo/margin_mean": 35.01060104370117, + "margin_dpo/margin_std": 41.501155853271484, + "step": 189 + }, + { + "epoch": 0.39790575916230364, + "fcm_dpo/beta": 0.018668456003069878, + "fcm_dpo/delta": 0.0034092608839273453, + "fcm_dpo/margin": 31.924047470092773, + "fcm_dpo/q_t": 0.3739194869995117, + "grad_norm": 86.3962173461914, + "learning_rate": 3.781574579820464e-07, + "logits/chosen": -0.8613168597221375, + "logits/rejected": -0.8277738094329834, + "logps/chosen": -275.919677734375, + "logps/ref_chosen": -257.79754638671875, + "logps/ref_rejected": -225.2164306640625, + "logps/rejected": -275.2625732421875, + "loss": 4.0864, + "margin_dpo/margin_mean": 31.924047470092773, + "margin_dpo/margin_std": 42.496273040771484, + "step": 190 + }, + { + "epoch": 0.4, + "fcm_dpo/beta": 0.018610456958413124, + "fcm_dpo/delta": -0.014111967757344246, + "fcm_dpo/margin": 31.048202514648438, + "fcm_dpo/q_t": 0.3805280923843384, + "grad_norm": 87.75660705566406, + "learning_rate": 3.765821230985757e-07, + "logits/chosen": -0.8736047148704529, + "logits/rejected": -0.8768740296363831, + "logps/chosen": -260.75518798828125, + "logps/ref_chosen": -243.8585205078125, + "logps/ref_rejected": -245.12136840820312, + "logps/rejected": -293.0662536621094, + "loss": 4.2548, + "margin_dpo/margin_mean": 31.048202514648438, + "margin_dpo/margin_std": 46.76060104370117, + "step": 191 + }, + { + "epoch": 0.40209424083769635, + "fcm_dpo/beta": 0.018671073019504547, + "fcm_dpo/delta": 0.009804993867874146, + "fcm_dpo/margin": 25.756927490234375, + "fcm_dpo/q_t": 0.39886969327926636, + "grad_norm": 83.8148193359375, + "learning_rate": 3.75e-07, + "logits/chosen": -0.825681746006012, + "logits/rejected": -0.8136826157569885, + "logps/chosen": -289.8357238769531, + "logps/ref_chosen": -266.9799499511719, + "logps/ref_rejected": -260.1697082519531, + "logps/rejected": -308.78240966796875, + "loss": 4.5612, + "margin_dpo/margin_mean": 25.756927490234375, + "margin_dpo/margin_std": 45.346221923828125, + "step": 192 + }, + { + "epoch": 0.4041884816753927, + "fcm_dpo/beta": 0.018258847296237946, + "fcm_dpo/delta": -0.021077796816825867, + "fcm_dpo/margin": 30.8725643157959, + "fcm_dpo/q_t": 0.38373884558677673, + "grad_norm": 91.01241302490234, + "learning_rate": 3.734111735307796e-07, + "logits/chosen": -0.8784509897232056, + "logits/rejected": -0.8553139567375183, + "logps/chosen": -308.2591247558594, + "logps/ref_chosen": -280.25323486328125, + "logps/ref_rejected": -291.0348815917969, + "logps/rejected": -349.9133605957031, + "loss": 4.3134, + "margin_dpo/margin_mean": 30.872562408447266, + "margin_dpo/margin_std": 47.93418884277344, + "step": 193 + }, + { + "epoch": 0.406282722513089, + "fcm_dpo/beta": 0.019326101988554, + "fcm_dpo/delta": 0.08527359366416931, + "fcm_dpo/margin": 23.81899070739746, + "fcm_dpo/q_t": 0.4056922197341919, + "grad_norm": 106.6082992553711, + "learning_rate": 3.7181572889485623e-07, + "logits/chosen": -0.8528724908828735, + "logits/rejected": -0.8473402261734009, + "logps/chosen": -318.2233581542969, + "logps/ref_chosen": -288.13946533203125, + "logps/ref_rejected": -251.31529235839844, + "logps/rejected": -305.2181701660156, + "loss": 4.5547, + "margin_dpo/margin_mean": 23.818988800048828, + "margin_dpo/margin_std": 42.86112594604492, + "step": 194 + }, + { + "epoch": 0.4083769633507853, + "fcm_dpo/beta": 0.020995743572711945, + "fcm_dpo/delta": 0.09806863218545914, + "fcm_dpo/margin": 21.186416625976562, + "fcm_dpo/q_t": 0.41219669580459595, + "grad_norm": 105.31787872314453, + "learning_rate": 3.7021375165108377e-07, + "logits/chosen": -0.8652254343032837, + "logits/rejected": -0.8719401359558105, + "logps/chosen": -305.6102600097656, + "logps/ref_chosen": -274.0006408691406, + "logps/ref_rejected": -280.22723388671875, + "logps/rejected": -333.0232849121094, + "loss": 4.659, + "margin_dpo/margin_mean": 21.186416625976562, + "margin_dpo/margin_std": 41.24464797973633, + "step": 195 + }, + { + "epoch": 0.41047120418848165, + "fcm_dpo/beta": 0.021107617765665054, + "fcm_dpo/delta": -0.02604127675294876, + "fcm_dpo/margin": 29.49114227294922, + "fcm_dpo/q_t": 0.37593233585357666, + "grad_norm": 109.97003173828125, + "learning_rate": 3.6860532770864005e-07, + "logits/chosen": -0.8447614908218384, + "logits/rejected": -0.8570613861083984, + "logps/chosen": -298.1605529785156, + "logps/ref_chosen": -274.90069580078125, + "logps/ref_rejected": -248.7281951904297, + "logps/rejected": -301.47918701171875, + "loss": 4.2574, + "margin_dpo/margin_mean": 29.49114227294922, + "margin_dpo/margin_std": 46.1149787902832, + "step": 196 + }, + { + "epoch": 0.41256544502617803, + "fcm_dpo/beta": 0.02037704363465309, + "fcm_dpo/delta": -0.11450602114200592, + "fcm_dpo/margin": 34.678550720214844, + "fcm_dpo/q_t": 0.35536617040634155, + "grad_norm": 116.41548156738281, + "learning_rate": 3.6699054332241985e-07, + "logits/chosen": -0.8692039847373962, + "logits/rejected": -0.8588843941688538, + "logps/chosen": -335.359375, + "logps/ref_chosen": -309.5348205566406, + "logps/ref_rejected": -264.3179931640625, + "logps/rejected": -324.8210754394531, + "loss": 3.9332, + "margin_dpo/margin_mean": 34.678550720214844, + "margin_dpo/margin_std": 43.28546142578125, + "step": 197 + }, + { + "epoch": 0.41465968586387436, + "fcm_dpo/beta": 0.0187942273914814, + "fcm_dpo/delta": -0.014170356094837189, + "fcm_dpo/margin": 32.54724884033203, + "fcm_dpo/q_t": 0.3783041536808014, + "grad_norm": 99.18403625488281, + "learning_rate": 3.653694850884091e-07, + "logits/chosen": -0.8634573221206665, + "logits/rejected": -0.841856062412262, + "logps/chosen": -326.5914306640625, + "logps/ref_chosen": -301.0134582519531, + "logps/ref_rejected": -292.84185791015625, + "logps/rejected": -350.9670715332031, + "loss": 4.31, + "margin_dpo/margin_mean": 32.5472526550293, + "margin_dpo/margin_std": 51.99414825439453, + "step": 198 + }, + { + "epoch": 0.4167539267015707, + "fcm_dpo/beta": 0.01868726871907711, + "fcm_dpo/delta": -0.040653832256793976, + "fcm_dpo/margin": 31.534244537353516, + "fcm_dpo/q_t": 0.3784925043582916, + "grad_norm": 91.59637451171875, + "learning_rate": 3.6374223993904124e-07, + "logits/chosen": -0.8504621982574463, + "logits/rejected": -0.8154540061950684, + "logps/chosen": -290.4877014160156, + "logps/ref_chosen": -264.6058654785156, + "logps/ref_rejected": -214.9014892578125, + "logps/rejected": -272.31756591796875, + "loss": 4.1757, + "margin_dpo/margin_mean": 31.534244537353516, + "margin_dpo/margin_std": 45.6278190612793, + "step": 199 + }, + { + "epoch": 0.418848167539267, + "fcm_dpo/beta": 0.018357042223215103, + "fcm_dpo/delta": 0.04033544659614563, + "fcm_dpo/margin": 28.317873001098633, + "fcm_dpo/q_t": 0.39727315306663513, + "grad_norm": 104.42108917236328, + "learning_rate": 3.621088951385353e-07, + "logits/chosen": -0.8921913504600525, + "logits/rejected": -0.8735958337783813, + "logps/chosen": -352.2391662597656, + "logps/ref_chosen": -324.1588134765625, + "logps/ref_rejected": -277.80218505859375, + "logps/rejected": -334.200439453125, + "loss": 4.6087, + "margin_dpo/margin_mean": 28.31787872314453, + "margin_dpo/margin_std": 53.46382522583008, + "step": 200 + }, + { + "epoch": 0.418848167539267, + "eval_fcm_dpo/beta": 0.018857382237911224, + "eval_logits/chosen": -0.8679316639900208, + "eval_logits/rejected": -0.8609716296195984, + "eval_logps/chosen": -320.89276123046875, + "eval_logps/ref_chosen": -287.8267517089844, + "eval_logps/ref_rejected": -266.9313659667969, + "eval_logps/rejected": -329.564697265625, + "eval_loss": 0.5497193336486816, + "eval_margin_dpo/margin_mean": 29.56734848022461, + "eval_margin_dpo/margin_std": 48.380184173583984, + "eval_runtime": 81.4797, + "eval_samples_per_second": 24.546, + "eval_steps_per_second": 1.534, + "step": 200 + }, + { + "epoch": 0.42094240837696334, + "fcm_dpo/beta": 0.019249822944402695, + "fcm_dpo/delta": -0.007784634828567505, + "fcm_dpo/margin": 31.416568756103516, + "fcm_dpo/q_t": 0.3744759261608124, + "grad_norm": 98.83305358886719, + "learning_rate": 3.604695382782159e-07, + "logits/chosen": -0.8689364194869995, + "logits/rejected": -0.8637883067131042, + "logps/chosen": -304.6473388671875, + "logps/ref_chosen": -271.49566650390625, + "logps/ref_rejected": -245.71414184570312, + "logps/rejected": -310.2823791503906, + "loss": 4.2192, + "margin_dpo/margin_mean": 31.416568756103516, + "margin_dpo/margin_std": 46.150325775146484, + "step": 201 + }, + { + "epoch": 0.42303664921465967, + "fcm_dpo/beta": 0.018503909930586815, + "fcm_dpo/delta": -0.041275542229413986, + "fcm_dpo/margin": 31.27553939819336, + "fcm_dpo/q_t": 0.3803809881210327, + "grad_norm": 98.9993667602539, + "learning_rate": 3.588242572718162e-07, + "logits/chosen": -0.8732012510299683, + "logits/rejected": -0.8661995530128479, + "logps/chosen": -304.1346435546875, + "logps/ref_chosen": -272.0979309082031, + "logps/ref_rejected": -235.94805908203125, + "logps/rejected": -299.2603454589844, + "loss": 4.3415, + "margin_dpo/margin_mean": 31.27553939819336, + "margin_dpo/margin_std": 48.76369094848633, + "step": 202 + }, + { + "epoch": 0.42513089005235605, + "fcm_dpo/beta": 0.01837236061692238, + "fcm_dpo/delta": 0.05643375590443611, + "fcm_dpo/margin": 24.213150024414062, + "fcm_dpo/q_t": 0.40584272146224976, + "grad_norm": 100.67874145507812, + "learning_rate": 3.571731403507635e-07, + "logits/chosen": -0.850642204284668, + "logits/rejected": -0.8625622391700745, + "logps/chosen": -318.47943115234375, + "logps/ref_chosen": -280.2221374511719, + "logps/ref_rejected": -251.79798889160156, + "logps/rejected": -314.2684326171875, + "loss": 4.5736, + "margin_dpo/margin_mean": 24.213150024414062, + "margin_dpo/margin_std": 43.66739273071289, + "step": 203 + }, + { + "epoch": 0.4272251308900524, + "fcm_dpo/beta": 0.018126487731933594, + "fcm_dpo/delta": -0.06936343759298325, + "fcm_dpo/margin": 36.58906173706055, + "fcm_dpo/q_t": 0.3620806634426117, + "grad_norm": 95.67644500732422, + "learning_rate": 3.5551627605944746e-07, + "logits/chosen": -0.8942813277244568, + "logits/rejected": -0.8736305236816406, + "logps/chosen": -348.34130859375, + "logps/ref_chosen": -318.7960510253906, + "logps/ref_rejected": -269.69921875, + "logps/rejected": -335.83355712890625, + "loss": 3.9389, + "margin_dpo/margin_mean": 36.58906173706055, + "margin_dpo/margin_std": 46.73650360107422, + "step": 204 + }, + { + "epoch": 0.4293193717277487, + "fcm_dpo/beta": 0.01768399402499199, + "fcm_dpo/delta": -0.04656511917710304, + "fcm_dpo/margin": 36.3321418762207, + "fcm_dpo/q_t": 0.36954307556152344, + "grad_norm": 89.59551239013672, + "learning_rate": 3.5385375325047163e-07, + "logits/chosen": -0.8388100862503052, + "logits/rejected": -0.8084380626678467, + "logps/chosen": -316.36187744140625, + "logps/ref_chosen": -283.7620544433594, + "logps/ref_rejected": -297.69439697265625, + "logps/rejected": -366.6263732910156, + "loss": 4.04, + "margin_dpo/margin_mean": 36.3321418762207, + "margin_dpo/margin_std": 49.33777618408203, + "step": 205 + }, + { + "epoch": 0.431413612565445, + "fcm_dpo/beta": 0.017556358128786087, + "fcm_dpo/delta": 0.019014529883861542, + "fcm_dpo/margin": 30.180171966552734, + "fcm_dpo/q_t": 0.3905620276927948, + "grad_norm": 98.87091827392578, + "learning_rate": 3.5218566107988867e-07, + "logits/chosen": -0.8614488840103149, + "logits/rejected": -0.8843433260917664, + "logps/chosen": -330.2857971191406, + "logps/ref_chosen": -293.66387939453125, + "logps/ref_rejected": -291.3056640625, + "logps/rejected": -358.1078186035156, + "loss": 4.453, + "margin_dpo/margin_mean": 30.18017578125, + "margin_dpo/margin_std": 50.2305908203125, + "step": 206 + }, + { + "epoch": 0.43350785340314135, + "fcm_dpo/beta": 0.017863312736153603, + "fcm_dpo/delta": 0.02159544639289379, + "fcm_dpo/margin": 29.29644775390625, + "fcm_dpo/q_t": 0.39205509424209595, + "grad_norm": 100.55647277832031, + "learning_rate": 3.505120890024195e-07, + "logits/chosen": -0.8120275735855103, + "logits/rejected": -0.8208277821540833, + "logps/chosen": -303.7646484375, + "logps/ref_chosen": -270.5350646972656, + "logps/ref_rejected": -278.7747497558594, + "logps/rejected": -341.30072021484375, + "loss": 4.5997, + "margin_dpo/margin_mean": 29.296445846557617, + "margin_dpo/margin_std": 55.331058502197266, + "step": 207 + }, + { + "epoch": 0.4356020942408377, + "fcm_dpo/beta": 0.01773180440068245, + "fcm_dpo/delta": -0.038446761667728424, + "fcm_dpo/margin": 35.76897430419922, + "fcm_dpo/q_t": 0.37097251415252686, + "grad_norm": 86.00871276855469, + "learning_rate": 3.4883312676665534e-07, + "logits/chosen": -0.8688828945159912, + "logits/rejected": -0.8223684430122375, + "logps/chosen": -317.2559509277344, + "logps/ref_chosen": -279.582763671875, + "logps/ref_rejected": -290.041015625, + "logps/rejected": -363.483154296875, + "loss": 4.108, + "margin_dpo/margin_mean": 35.76897430419922, + "margin_dpo/margin_std": 50.78927993774414, + "step": 208 + }, + { + "epoch": 0.437696335078534, + "fcm_dpo/beta": 0.018073974177241325, + "fcm_dpo/delta": 0.07754447311162949, + "fcm_dpo/margin": 26.4322566986084, + "fcm_dpo/q_t": 0.40094897150993347, + "grad_norm": 106.75226593017578, + "learning_rate": 3.4714886441024573e-07, + "logits/chosen": -0.7833099365234375, + "logits/rejected": -0.7856354713439941, + "logps/chosen": -360.97906494140625, + "logps/ref_chosen": -318.8725280761719, + "logps/ref_rejected": -270.64324951171875, + "logps/rejected": -339.18206787109375, + "loss": 4.7071, + "margin_dpo/margin_mean": 26.4322566986084, + "margin_dpo/margin_std": 52.98542785644531, + "step": 209 + }, + { + "epoch": 0.4397905759162304, + "fcm_dpo/beta": 0.018200790509581566, + "fcm_dpo/delta": 0.0208455678075552, + "fcm_dpo/margin": 31.805618286132812, + "fcm_dpo/q_t": 0.3811089098453522, + "grad_norm": 105.60123443603516, + "learning_rate": 3.454593922550693e-07, + "logits/chosen": -0.8259727358818054, + "logits/rejected": -0.8135036826133728, + "logps/chosen": -320.53704833984375, + "logps/ref_chosen": -283.14031982421875, + "logps/ref_rejected": -287.2986755371094, + "logps/rejected": -356.50103759765625, + "loss": 4.3226, + "margin_dpo/margin_mean": 31.805618286132812, + "margin_dpo/margin_std": 50.25780487060547, + "step": 210 + }, + { + "epoch": 0.4418848167539267, + "fcm_dpo/beta": 0.01751658506691456, + "fcm_dpo/delta": -0.1294037252664566, + "fcm_dpo/margin": 40.80807876586914, + "fcm_dpo/q_t": 0.34755995869636536, + "grad_norm": 86.59803771972656, + "learning_rate": 3.4376480090239047e-07, + "logits/chosen": -0.8490579128265381, + "logits/rejected": -0.829590916633606, + "logps/chosen": -310.4613342285156, + "logps/ref_chosen": -276.4228515625, + "logps/ref_rejected": -252.40603637695312, + "logps/rejected": -327.25262451171875, + "loss": 3.7428, + "margin_dpo/margin_mean": 40.80807876586914, + "margin_dpo/margin_std": 43.20057678222656, + "step": 211 + }, + { + "epoch": 0.44397905759162304, + "fcm_dpo/beta": 0.01712076924741268, + "fcm_dpo/delta": 0.03554587438702583, + "fcm_dpo/margin": 28.673137664794922, + "fcm_dpo/q_t": 0.39428529143333435, + "grad_norm": 94.82775115966797, + "learning_rate": 3.4206518122800055e-07, + "logits/chosen": -0.8299760818481445, + "logits/rejected": -0.8329156041145325, + "logps/chosen": -309.0224914550781, + "logps/ref_chosen": -271.7055358886719, + "logps/ref_rejected": -241.18511962890625, + "logps/rejected": -307.17529296875, + "loss": 4.4766, + "margin_dpo/margin_mean": 28.673141479492188, + "margin_dpo/margin_std": 47.071434020996094, + "step": 212 + }, + { + "epoch": 0.44607329842931936, + "fcm_dpo/beta": 0.017721228301525116, + "fcm_dpo/delta": 0.03447887301445007, + "fcm_dpo/margin": 29.419530868530273, + "fcm_dpo/q_t": 0.3976650834083557, + "grad_norm": 103.67435455322266, + "learning_rate": 3.403606243773448e-07, + "logits/chosen": -0.824676513671875, + "logits/rejected": -0.8418750762939453, + "logps/chosen": -341.2528076171875, + "logps/ref_chosen": -302.2976379394531, + "logps/ref_rejected": -303.6202087402344, + "logps/rejected": -371.9948425292969, + "loss": 4.5048, + "margin_dpo/margin_mean": 29.41952896118164, + "margin_dpo/margin_std": 53.222564697265625, + "step": 213 + }, + { + "epoch": 0.4481675392670157, + "fcm_dpo/beta": 0.017746904864907265, + "fcm_dpo/delta": -0.004792161285877228, + "fcm_dpo/margin": 33.86843490600586, + "fcm_dpo/q_t": 0.3744812309741974, + "grad_norm": 106.49107360839844, + "learning_rate": 3.3865122176063385e-07, + "logits/chosen": -0.830028772354126, + "logits/rejected": -0.8322975635528564, + "logps/chosen": -319.8704528808594, + "logps/ref_chosen": -272.13262939453125, + "logps/ref_rejected": -294.82354736328125, + "logps/rejected": -376.4298095703125, + "loss": 4.0953, + "margin_dpo/margin_mean": 33.86843490600586, + "margin_dpo/margin_std": 43.68943405151367, + "step": 214 + }, + { + "epoch": 0.450261780104712, + "fcm_dpo/beta": 0.017707258462905884, + "fcm_dpo/delta": 0.005800800397992134, + "fcm_dpo/margin": 26.85211944580078, + "fcm_dpo/q_t": 0.40805721282958984, + "grad_norm": 100.26293182373047, + "learning_rate": 3.3693706504794243e-07, + "logits/chosen": -0.8703227043151855, + "logits/rejected": -0.8574371933937073, + "logps/chosen": -335.0187072753906, + "logps/ref_chosen": -291.3782958984375, + "logps/ref_rejected": -261.05792236328125, + "logps/rejected": -331.5504150390625, + "loss": 4.6754, + "margin_dpo/margin_mean": 26.85211944580078, + "margin_dpo/margin_std": 53.00439453125, + "step": 215 + }, + { + "epoch": 0.4523560209424084, + "fcm_dpo/beta": 0.017213810235261917, + "fcm_dpo/delta": -0.00978805497288704, + "fcm_dpo/margin": 35.283409118652344, + "fcm_dpo/q_t": 0.3760201334953308, + "grad_norm": 95.87169647216797, + "learning_rate": 3.3521824616429284e-07, + "logits/chosen": -0.8963602185249329, + "logits/rejected": -0.8926108479499817, + "logps/chosen": -375.2848815917969, + "logps/ref_chosen": -338.50543212890625, + "logps/ref_rejected": -305.76104736328125, + "logps/rejected": -377.8238830566406, + "loss": 4.2837, + "margin_dpo/margin_mean": 35.283409118652344, + "margin_dpo/margin_std": 54.5643196105957, + "step": 216 + }, + { + "epoch": 0.4544502617801047, + "fcm_dpo/beta": 0.01671535335481167, + "fcm_dpo/delta": -0.13403168320655823, + "fcm_dpo/margin": 43.277740478515625, + "fcm_dpo/q_t": 0.3528442978858948, + "grad_norm": 85.40447235107422, + "learning_rate": 3.334948572847253e-07, + "logits/chosen": -0.7879663109779358, + "logits/rejected": -0.7589735388755798, + "logps/chosen": -332.1008605957031, + "logps/ref_chosen": -293.5498046875, + "logps/ref_rejected": -256.7830810546875, + "logps/rejected": -338.6118469238281, + "loss": 3.9244, + "margin_dpo/margin_mean": 43.277740478515625, + "margin_dpo/margin_std": 55.66615295410156, + "step": 217 + }, + { + "epoch": 0.45654450261780105, + "fcm_dpo/beta": 0.015897490084171295, + "fcm_dpo/delta": 0.003095601685345173, + "fcm_dpo/margin": 37.482078552246094, + "fcm_dpo/q_t": 0.3739127516746521, + "grad_norm": 89.20011901855469, + "learning_rate": 3.317669908293554e-07, + "logits/chosen": -0.8181397914886475, + "logits/rejected": -0.8422555923461914, + "logps/chosen": -357.48828125, + "logps/ref_chosen": -320.579345703125, + "logps/ref_rejected": -294.0381164550781, + "logps/rejected": -368.42913818359375, + "loss": 4.0961, + "margin_dpo/margin_mean": 37.482078552246094, + "margin_dpo/margin_std": 50.897701263427734, + "step": 218 + }, + { + "epoch": 0.4586387434554974, + "fcm_dpo/beta": 0.015867143869400024, + "fcm_dpo/delta": -0.031162606552243233, + "fcm_dpo/margin": 39.53302001953125, + "fcm_dpo/q_t": 0.36894065141677856, + "grad_norm": 85.81663513183594, + "learning_rate": 3.300347394584172e-07, + "logits/chosen": -0.8200687170028687, + "logits/rejected": -0.846379280090332, + "logps/chosen": -301.2198486328125, + "logps/ref_chosen": -268.4186096191406, + "logps/ref_rejected": -265.7808837890625, + "logps/rejected": -338.1151428222656, + "loss": 4.1022, + "margin_dpo/margin_mean": 39.53302001953125, + "margin_dpo/margin_std": 54.08649826049805, + "step": 219 + }, + { + "epoch": 0.4607329842931937, + "fcm_dpo/beta": 0.015527862124145031, + "fcm_dpo/delta": 0.009382149204611778, + "fcm_dpo/margin": 38.0103759765625, + "fcm_dpo/q_t": 0.3744858205318451, + "grad_norm": 86.28771209716797, + "learning_rate": 3.2829819606729477e-07, + "logits/chosen": -0.8505481481552124, + "logits/rejected": -0.8325619697570801, + "logps/chosen": -347.1203918457031, + "logps/ref_chosen": -312.8864440917969, + "logps/ref_rejected": -259.5191955566406, + "logps/rejected": -331.7634582519531, + "loss": 4.1899, + "margin_dpo/margin_mean": 38.0103759765625, + "margin_dpo/margin_std": 54.48101043701172, + "step": 220 + }, + { + "epoch": 0.46282722513089003, + "fcm_dpo/beta": 0.016174497082829475, + "fcm_dpo/delta": 0.0048094987869262695, + "fcm_dpo/margin": 30.234722137451172, + "fcm_dpo/q_t": 0.4027414321899414, + "grad_norm": 90.07968139648438, + "learning_rate": 3.265574537815398e-07, + "logits/chosen": -0.7801198363304138, + "logits/rejected": -0.79371577501297, + "logps/chosen": -337.3284606933594, + "logps/ref_chosen": -300.32586669921875, + "logps/ref_rejected": -286.312255859375, + "logps/rejected": -353.549560546875, + "loss": 4.5684, + "margin_dpo/margin_mean": 30.234722137451172, + "margin_dpo/margin_std": 54.877281188964844, + "step": 221 + }, + { + "epoch": 0.4649214659685864, + "fcm_dpo/beta": 0.015347619540989399, + "fcm_dpo/delta": -0.009789157658815384, + "fcm_dpo/margin": 36.647762298583984, + "fcm_dpo/q_t": 0.3810715973377228, + "grad_norm": 95.45844268798828, + "learning_rate": 3.248126059518784e-07, + "logits/chosen": -0.8610984086990356, + "logits/rejected": -0.8496800661087036, + "logps/chosen": -329.9424743652344, + "logps/ref_chosen": -297.1113586425781, + "logps/ref_rejected": -235.53146362304688, + "logps/rejected": -305.0103454589844, + "loss": 4.2022, + "margin_dpo/margin_mean": 36.64776611328125, + "margin_dpo/margin_std": 50.83029556274414, + "step": 222 + }, + { + "epoch": 0.46701570680628274, + "fcm_dpo/beta": 0.015580544248223305, + "fcm_dpo/delta": -0.005719708278775215, + "fcm_dpo/margin": 38.75231170654297, + "fcm_dpo/q_t": 0.37368282675743103, + "grad_norm": 83.94607543945312, + "learning_rate": 3.230637461492043e-07, + "logits/chosen": -0.8233493566513062, + "logits/rejected": -0.7984543442726135, + "logps/chosen": -322.42913818359375, + "logps/ref_chosen": -286.41510009765625, + "logps/ref_rejected": -241.1181640625, + "logps/rejected": -315.884521484375, + "loss": 4.139, + "margin_dpo/margin_mean": 38.75231170654297, + "margin_dpo/margin_std": 53.93544006347656, + "step": 223 + }, + { + "epoch": 0.46910994764397906, + "fcm_dpo/beta": 0.015308534726500511, + "fcm_dpo/delta": -0.07908003032207489, + "fcm_dpo/margin": 41.015872955322266, + "fcm_dpo/q_t": 0.36839425563812256, + "grad_norm": 83.50463104248047, + "learning_rate": 3.213109681595612e-07, + "logits/chosen": -0.7854145765304565, + "logits/rejected": -0.8054001927375793, + "logps/chosen": -282.39862060546875, + "logps/ref_chosen": -249.49234008789062, + "logps/ref_rejected": -233.10752868652344, + "logps/rejected": -307.02972412109375, + "loss": 3.9926, + "margin_dpo/margin_mean": 41.015872955322266, + "margin_dpo/margin_std": 51.277225494384766, + "step": 224 + }, + { + "epoch": 0.4712041884816754, + "fcm_dpo/beta": 0.01455092616379261, + "fcm_dpo/delta": 0.04796172305941582, + "fcm_dpo/margin": 38.115787506103516, + "fcm_dpo/q_t": 0.3868432939052582, + "grad_norm": 94.90240478515625, + "learning_rate": 3.1955436597911315e-07, + "logits/chosen": -0.8136327266693115, + "logits/rejected": -0.7935799360275269, + "logps/chosen": -353.4432067871094, + "logps/ref_chosen": -311.8583679199219, + "logps/ref_rejected": -336.8523864746094, + "logps/rejected": -416.5530090332031, + "loss": 4.3046, + "margin_dpo/margin_mean": 38.11579132080078, + "margin_dpo/margin_std": 58.1151237487793, + "step": 225 + }, + { + "epoch": 0.4732984293193717, + "fcm_dpo/beta": 0.015586531721055508, + "fcm_dpo/delta": 0.07917778939008713, + "fcm_dpo/margin": 33.66019821166992, + "fcm_dpo/q_t": 0.3920612037181854, + "grad_norm": 80.37389373779297, + "learning_rate": 3.1779403380910425e-07, + "logits/chosen": -0.8555701971054077, + "logits/rejected": -0.8487062454223633, + "logps/chosen": -290.4698486328125, + "logps/ref_chosen": -252.20123291015625, + "logps/ref_rejected": -254.41162109375, + "logps/rejected": -326.3404541015625, + "loss": 4.3932, + "margin_dpo/margin_mean": 33.66019821166992, + "margin_dpo/margin_std": 55.53483581542969, + "step": 226 + }, + { + "epoch": 0.47539267015706804, + "fcm_dpo/beta": 0.01585298217833042, + "fcm_dpo/delta": -0.0467713437974453, + "fcm_dpo/margin": 40.5180549621582, + "fcm_dpo/q_t": 0.36852991580963135, + "grad_norm": 112.51945495605469, + "learning_rate": 3.160300660508064e-07, + "logits/chosen": -0.8035961985588074, + "logits/rejected": -0.8008553385734558, + "logps/chosen": -324.879150390625, + "logps/ref_chosen": -285.25946044921875, + "logps/ref_rejected": -261.3220520019531, + "logps/rejected": -341.4598083496094, + "loss": 4.2243, + "margin_dpo/margin_mean": 40.5180549621582, + "margin_dpo/margin_std": 60.73136901855469, + "step": 227 + }, + { + "epoch": 0.4774869109947644, + "fcm_dpo/beta": 0.015510935336351395, + "fcm_dpo/delta": -0.051342956721782684, + "fcm_dpo/margin": 41.696563720703125, + "fcm_dpo/q_t": 0.3670775890350342, + "grad_norm": 85.83709716796875, + "learning_rate": 3.1426255730045695e-07, + "logits/chosen": -0.8358519077301025, + "logits/rejected": -0.8068508505821228, + "logps/chosen": -348.1343078613281, + "logps/ref_chosen": -313.81878662109375, + "logps/ref_rejected": -258.07061767578125, + "logps/rejected": -334.08270263671875, + "loss": 4.0336, + "margin_dpo/margin_mean": 41.696563720703125, + "margin_dpo/margin_std": 54.898597717285156, + "step": 228 + }, + { + "epoch": 0.47958115183246075, + "fcm_dpo/beta": 0.014525864273309708, + "fcm_dpo/delta": -0.08014161139726639, + "fcm_dpo/margin": 46.30763244628906, + "fcm_dpo/q_t": 0.3581668734550476, + "grad_norm": 171.63238525390625, + "learning_rate": 3.1249160234418644e-07, + "logits/chosen": -0.8062803149223328, + "logits/rejected": -0.8233762979507446, + "logps/chosen": -334.2206726074219, + "logps/ref_chosen": -291.9707946777344, + "logps/ref_rejected": -263.42059326171875, + "logps/rejected": -351.9781494140625, + "loss": 3.9764, + "margin_dpo/margin_mean": 46.30763244628906, + "margin_dpo/margin_std": 58.0003662109375, + "step": 229 + }, + { + "epoch": 0.4816753926701571, + "fcm_dpo/beta": 0.013805052265524864, + "fcm_dpo/delta": -0.005476825870573521, + "fcm_dpo/margin": 43.70093536376953, + "fcm_dpo/q_t": 0.37367361783981323, + "grad_norm": 79.83263397216797, + "learning_rate": 3.1071729615293424e-07, + "logits/chosen": -0.8613869547843933, + "logits/rejected": -0.8628825545310974, + "logps/chosen": -272.9879150390625, + "logps/ref_chosen": -233.2601318359375, + "logps/ref_rejected": -238.922119140625, + "logps/rejected": -322.3508605957031, + "loss": 4.134, + "margin_dpo/margin_mean": 43.70093536376953, + "margin_dpo/margin_std": 60.809654235839844, + "step": 230 + }, + { + "epoch": 0.4837696335078534, + "fcm_dpo/beta": 0.014280532486736774, + "fcm_dpo/delta": 0.054186657071113586, + "fcm_dpo/margin": 34.104496002197266, + "fcm_dpo/q_t": 0.39545977115631104, + "grad_norm": 89.10991668701172, + "learning_rate": 3.0893973387735683e-07, + "logits/chosen": -0.8317367434501648, + "logits/rejected": -0.8229210376739502, + "logps/chosen": -370.91632080078125, + "logps/ref_chosen": -322.1551818847656, + "logps/ref_rejected": -280.97613525390625, + "logps/rejected": -363.8418273925781, + "loss": 4.4358, + "margin_dpo/margin_mean": 34.10449981689453, + "margin_dpo/margin_std": 54.3597412109375, + "step": 231 + }, + { + "epoch": 0.48586387434554973, + "fcm_dpo/beta": 0.01439041830599308, + "fcm_dpo/delta": -0.028247211128473282, + "fcm_dpo/margin": 37.74383544921875, + "fcm_dpo/q_t": 0.38723382353782654, + "grad_norm": 111.32173919677734, + "learning_rate": 3.071590108427243e-07, + "logits/chosen": -0.8064876198768616, + "logits/rejected": -0.7893252372741699, + "logps/chosen": -321.2291564941406, + "logps/ref_chosen": -271.7437744140625, + "logps/ref_rejected": -249.94981384277344, + "logps/rejected": -337.1790466308594, + "loss": 4.4283, + "margin_dpo/margin_mean": 37.74383544921875, + "margin_dpo/margin_std": 60.81903839111328, + "step": 232 + }, + { + "epoch": 0.48795811518324606, + "fcm_dpo/beta": 0.013932683505117893, + "fcm_dpo/delta": -0.07837289571762085, + "fcm_dpo/margin": 41.53171157836914, + "fcm_dpo/q_t": 0.37816399335861206, + "grad_norm": 93.736328125, + "learning_rate": 3.05375222543809e-07, + "logits/chosen": -0.8585054278373718, + "logits/rejected": -0.8508076071739197, + "logps/chosen": -335.4866943359375, + "logps/ref_chosen": -285.3423156738281, + "logps/ref_rejected": -266.34320068359375, + "logps/rejected": -358.0192565917969, + "loss": 4.2142, + "margin_dpo/margin_mean": 41.53171157836914, + "margin_dpo/margin_std": 59.24362564086914, + "step": 233 + }, + { + "epoch": 0.4900523560209424, + "fcm_dpo/beta": 0.01374006737023592, + "fcm_dpo/delta": 0.036976464092731476, + "fcm_dpo/margin": 41.03116226196289, + "fcm_dpo/q_t": 0.3843136429786682, + "grad_norm": 78.69235229492188, + "learning_rate": 3.035884646397637e-07, + "logits/chosen": -0.829176664352417, + "logits/rejected": -0.812563419342041, + "logps/chosen": -345.6146545410156, + "logps/ref_chosen": -294.9057312011719, + "logps/ref_rejected": -299.37054443359375, + "logps/rejected": -391.11065673828125, + "loss": 4.4275, + "margin_dpo/margin_mean": 41.03116226196289, + "margin_dpo/margin_std": 68.48192596435547, + "step": 234 + }, + { + "epoch": 0.49214659685863876, + "fcm_dpo/beta": 0.01411922462284565, + "fcm_dpo/delta": 0.003345828503370285, + "fcm_dpo/margin": 42.21276092529297, + "fcm_dpo/q_t": 0.37557002902030945, + "grad_norm": 109.72699737548828, + "learning_rate": 3.017988329489923e-07, + "logits/chosen": -0.8408417701721191, + "logits/rejected": -0.8409253358840942, + "logps/chosen": -343.94256591796875, + "logps/ref_chosen": -289.49755859375, + "logps/ref_rejected": -247.55076599121094, + "logps/rejected": -344.20849609375, + "loss": 4.2826, + "margin_dpo/margin_mean": 42.2127571105957, + "margin_dpo/margin_std": 65.22442626953125, + "step": 235 + }, + { + "epoch": 0.4942408376963351, + "fcm_dpo/beta": 0.013934805057942867, + "fcm_dpo/delta": -0.03408358246088028, + "fcm_dpo/margin": 42.292049407958984, + "fcm_dpo/q_t": 0.3777884840965271, + "grad_norm": 81.88858032226562, + "learning_rate": 3.000064234440111e-07, + "logits/chosen": -0.8615151643753052, + "logits/rejected": -0.8628526926040649, + "logps/chosen": -339.2417297363281, + "logps/ref_chosen": -288.8846435546875, + "logps/ref_rejected": -242.0452880859375, + "logps/rejected": -334.6944274902344, + "loss": 4.2538, + "margin_dpo/margin_mean": 42.292049407958984, + "margin_dpo/margin_std": 62.85895538330078, + "step": 236 + }, + { + "epoch": 0.4963350785340314, + "fcm_dpo/beta": 0.013490064069628716, + "fcm_dpo/delta": -0.031569261103868484, + "fcm_dpo/margin": 42.74472427368164, + "fcm_dpo/q_t": 0.3792114853858948, + "grad_norm": 85.20064544677734, + "learning_rate": 2.9821133224630223e-07, + "logits/chosen": -0.8437389731407166, + "logits/rejected": -0.8258963227272034, + "logps/chosen": -320.6917419433594, + "logps/ref_chosen": -265.47869873046875, + "logps/ref_rejected": -267.9891357421875, + "logps/rejected": -365.94683837890625, + "loss": 4.2127, + "margin_dpo/margin_mean": 42.74472427368164, + "margin_dpo/margin_std": 61.919334411621094, + "step": 237 + }, + { + "epoch": 0.49842931937172774, + "fcm_dpo/beta": 0.013200972229242325, + "fcm_dpo/delta": 0.01033791620284319, + "fcm_dpo/margin": 40.789093017578125, + "fcm_dpo/q_t": 0.38993343710899353, + "grad_norm": 93.29105377197266, + "learning_rate": 2.964136556211588e-07, + "logits/chosen": -0.8295376300811768, + "logits/rejected": -0.8033552169799805, + "logps/chosen": -369.406982421875, + "logps/ref_chosen": -312.0026550292969, + "logps/ref_rejected": -270.0257263183594, + "logps/rejected": -368.21917724609375, + "loss": 4.327, + "margin_dpo/margin_mean": 40.789093017578125, + "margin_dpo/margin_std": 64.44735717773438, + "step": 238 + }, + { + "epoch": 0.5005235602094241, + "fcm_dpo/beta": 0.013887631706893444, + "fcm_dpo/delta": 0.09677696973085403, + "fcm_dpo/margin": 36.4874382019043, + "fcm_dpo/q_t": 0.3990153670310974, + "grad_norm": 100.5346908569336, + "learning_rate": 2.946134899725226e-07, + "logits/chosen": -0.8349162936210632, + "logits/rejected": -0.8748633861541748, + "logps/chosen": -320.4220275878906, + "logps/ref_chosen": -267.167236328125, + "logps/ref_rejected": -275.99468994140625, + "logps/rejected": -365.7369384765625, + "loss": 4.6325, + "margin_dpo/margin_mean": 36.4874382019043, + "margin_dpo/margin_std": 70.55658721923828, + "step": 239 + }, + { + "epoch": 0.5026178010471204, + "fcm_dpo/beta": 0.013791955076158047, + "fcm_dpo/delta": -0.048899125307798386, + "fcm_dpo/margin": 46.676414489746094, + "fcm_dpo/q_t": 0.3687818944454193, + "grad_norm": 117.41996765136719, + "learning_rate": 2.9281093183781403e-07, + "logits/chosen": -0.8881155848503113, + "logits/rejected": -0.8836052417755127, + "logps/chosen": -337.365478515625, + "logps/ref_chosen": -285.9796142578125, + "logps/ref_rejected": -256.8258056640625, + "logps/rejected": -354.8880615234375, + "loss": 4.0805, + "margin_dpo/margin_mean": 46.67641830444336, + "margin_dpo/margin_std": 65.10855102539062, + "step": 240 + }, + { + "epoch": 0.5047120418848168, + "fcm_dpo/beta": 0.013698762282729149, + "fcm_dpo/delta": 0.03592575713992119, + "fcm_dpo/margin": 37.19944381713867, + "fcm_dpo/q_t": 0.3960975408554077, + "grad_norm": 95.49946594238281, + "learning_rate": 2.910060778827554e-07, + "logits/chosen": -0.7951388359069824, + "logits/rejected": -0.7752350568771362, + "logps/chosen": -321.134033203125, + "logps/ref_chosen": -261.516845703125, + "logps/ref_rejected": -250.2250518798828, + "logps/rejected": -347.0416564941406, + "loss": 4.529, + "margin_dpo/margin_mean": 37.19944381713867, + "margin_dpo/margin_std": 65.24166107177734, + "step": 241 + }, + { + "epoch": 0.506806282722513, + "fcm_dpo/beta": 0.014109227806329727, + "fcm_dpo/delta": -0.023093625903129578, + "fcm_dpo/margin": 43.914390563964844, + "fcm_dpo/q_t": 0.3734211027622223, + "grad_norm": 97.55506134033203, + "learning_rate": 2.891990248961871e-07, + "logits/chosen": -0.8705978393554688, + "logits/rejected": -0.8577161431312561, + "logps/chosen": -322.5037536621094, + "logps/ref_chosen": -270.51397705078125, + "logps/ref_rejected": -244.8560791015625, + "logps/rejected": -340.76025390625, + "loss": 4.1074, + "margin_dpo/margin_mean": 43.91438674926758, + "margin_dpo/margin_std": 60.707244873046875, + "step": 242 + }, + { + "epoch": 0.5089005235602094, + "fcm_dpo/beta": 0.013829024508595467, + "fcm_dpo/delta": -0.07154600322246552, + "fcm_dpo/margin": 48.14585876464844, + "fcm_dpo/q_t": 0.36528927087783813, + "grad_norm": 109.1782455444336, + "learning_rate": 2.873898697848762e-07, + "logits/chosen": -0.8485463857650757, + "logits/rejected": -0.8369187116622925, + "logps/chosen": -370.865234375, + "logps/ref_chosen": -324.68206787109375, + "logps/ref_rejected": -307.1111755371094, + "logps/rejected": -401.440185546875, + "loss": 4.0443, + "margin_dpo/margin_mean": 48.14585876464844, + "margin_dpo/margin_std": 65.65919494628906, + "step": 243 + }, + { + "epoch": 0.5109947643979058, + "fcm_dpo/beta": 0.012862252071499825, + "fcm_dpo/delta": -0.007831787690520287, + "fcm_dpo/margin": 47.06397247314453, + "fcm_dpo/q_t": 0.3703567385673523, + "grad_norm": 87.85368347167969, + "learning_rate": 2.8557870956832133e-07, + "logits/chosen": -0.8476990461349487, + "logits/rejected": -0.8005751967430115, + "logps/chosen": -365.91729736328125, + "logps/ref_chosen": -318.979248046875, + "logps/ref_rejected": -269.67572021484375, + "logps/rejected": -363.677734375, + "loss": 4.0768, + "margin_dpo/margin_mean": 47.06397247314453, + "margin_dpo/margin_std": 60.87822723388672, + "step": 244 + }, + { + "epoch": 0.5130890052356021, + "fcm_dpo/beta": 0.012774711474776268, + "fcm_dpo/delta": -0.07881193608045578, + "fcm_dpo/margin": 47.673194885253906, + "fcm_dpo/q_t": 0.3689280152320862, + "grad_norm": 81.23341369628906, + "learning_rate": 2.837656413735479e-07, + "logits/chosen": -0.8486171960830688, + "logits/rejected": -0.8539371490478516, + "logps/chosen": -338.697265625, + "logps/ref_chosen": -294.8980712890625, + "logps/ref_rejected": -239.8111114501953, + "logps/rejected": -331.2834777832031, + "loss": 4.0503, + "margin_dpo/margin_mean": 47.67319869995117, + "margin_dpo/margin_std": 59.50359344482422, + "step": 245 + }, + { + "epoch": 0.5151832460732985, + "fcm_dpo/beta": 0.012836070731282234, + "fcm_dpo/delta": 0.08966440707445145, + "fcm_dpo/margin": 35.876922607421875, + "fcm_dpo/q_t": 0.4043683707714081, + "grad_norm": 97.06179809570312, + "learning_rate": 2.8195076242990116e-07, + "logits/chosen": -0.823259711265564, + "logits/rejected": -0.8320043087005615, + "logps/chosen": -336.7489318847656, + "logps/ref_chosen": -280.6854248046875, + "logps/ref_rejected": -253.65382385253906, + "logps/rejected": -345.59423828125, + "loss": 4.5646, + "margin_dpo/margin_mean": 35.876922607421875, + "margin_dpo/margin_std": 64.8729248046875, + "step": 246 + }, + { + "epoch": 0.5172774869109947, + "fcm_dpo/beta": 0.01340182963758707, + "fcm_dpo/delta": 0.010741522535681725, + "fcm_dpo/margin": 40.165985107421875, + "fcm_dpo/q_t": 0.38809463381767273, + "grad_norm": 82.3198013305664, + "learning_rate": 2.801341700638307e-07, + "logits/chosen": -0.8334712386131287, + "logits/rejected": -0.8363280296325684, + "logps/chosen": -332.05615234375, + "logps/ref_chosen": -281.1091003417969, + "logps/ref_rejected": -260.3700866699219, + "logps/rejected": -351.4831237792969, + "loss": 4.295, + "margin_dpo/margin_mean": 40.165985107421875, + "margin_dpo/margin_std": 59.34774398803711, + "step": 247 + }, + { + "epoch": 0.5193717277486911, + "fcm_dpo/beta": 0.013335911557078362, + "fcm_dpo/delta": 0.03803172707557678, + "fcm_dpo/margin": 37.84646224975586, + "fcm_dpo/q_t": 0.39170122146606445, + "grad_norm": 96.70375061035156, + "learning_rate": 2.7831596169367227e-07, + "logits/chosen": -0.795592188835144, + "logits/rejected": -0.8106747269630432, + "logps/chosen": -320.57391357421875, + "logps/ref_chosen": -270.318359375, + "logps/ref_rejected": -233.46778869628906, + "logps/rejected": -321.56982421875, + "loss": 4.3839, + "margin_dpo/margin_mean": 37.84646224975586, + "margin_dpo/margin_std": 58.59114074707031, + "step": 248 + }, + { + "epoch": 0.5214659685863874, + "fcm_dpo/beta": 0.013947556726634502, + "fcm_dpo/delta": 0.03363037109375, + "fcm_dpo/margin": 36.132991790771484, + "fcm_dpo/q_t": 0.3958445191383362, + "grad_norm": 102.5847396850586, + "learning_rate": 2.7649623482442274e-07, + "logits/chosen": -0.8209048509597778, + "logits/rejected": -0.8001272082328796, + "logps/chosen": -337.85406494140625, + "logps/ref_chosen": -275.8088684082031, + "logps/ref_rejected": -243.45138549804688, + "logps/rejected": -341.6295471191406, + "loss": 4.566, + "margin_dpo/margin_mean": 36.132991790771484, + "margin_dpo/margin_std": 66.05538940429688, + "step": 249 + }, + { + "epoch": 0.5235602094240838, + "fcm_dpo/beta": 0.013355924747884274, + "fcm_dpo/delta": -0.04608849063515663, + "fcm_dpo/margin": 47.88051223754883, + "fcm_dpo/q_t": 0.36725619435310364, + "grad_norm": 95.6384048461914, + "learning_rate": 2.7467508704251135e-07, + "logits/chosen": -0.829230010509491, + "logits/rejected": -0.8328065872192383, + "logps/chosen": -355.0364990234375, + "logps/ref_chosen": -292.4945373535156, + "logps/ref_rejected": -284.2869567871094, + "logps/rejected": -394.7093811035156, + "loss": 4.1352, + "margin_dpo/margin_mean": 47.88051223754883, + "margin_dpo/margin_std": 67.44532012939453, + "step": 250 + }, + { + "epoch": 0.5256544502617801, + "fcm_dpo/beta": 0.013813665136694908, + "fcm_dpo/delta": -0.007170406170189381, + "fcm_dpo/margin": 43.663360595703125, + "fcm_dpo/q_t": 0.3814099431037903, + "grad_norm": 100.86103820800781, + "learning_rate": 2.7285261601056697e-07, + "logits/chosen": -0.8296136856079102, + "logits/rejected": -0.8152703046798706, + "logps/chosen": -336.88873291015625, + "logps/ref_chosen": -281.736572265625, + "logps/ref_rejected": -255.9419708251953, + "logps/rejected": -354.75750732421875, + "loss": 4.1787, + "margin_dpo/margin_mean": 43.66335678100586, + "margin_dpo/margin_std": 63.43466567993164, + "step": 251 + }, + { + "epoch": 0.5277486910994764, + "fcm_dpo/beta": 0.013664179481565952, + "fcm_dpo/delta": 0.0335024930536747, + "fcm_dpo/margin": 41.402076721191406, + "fcm_dpo/q_t": 0.3806874752044678, + "grad_norm": 102.68427276611328, + "learning_rate": 2.7102891946217994e-07, + "logits/chosen": -0.8773578405380249, + "logits/rejected": -0.854051411151886, + "logps/chosen": -360.0166931152344, + "logps/ref_chosen": -295.9674072265625, + "logps/ref_rejected": -280.111572265625, + "logps/rejected": -385.56292724609375, + "loss": 4.3841, + "margin_dpo/margin_mean": 41.40208053588867, + "margin_dpo/margin_std": 66.1944580078125, + "step": 252 + }, + { + "epoch": 0.5298429319371728, + "fcm_dpo/beta": 0.013652501627802849, + "fcm_dpo/delta": -0.021744156256318092, + "fcm_dpo/margin": 41.56562805175781, + "fcm_dpo/q_t": 0.38615942001342773, + "grad_norm": 96.21172332763672, + "learning_rate": 2.692040951966617e-07, + "logits/chosen": -0.8553462624549866, + "logits/rejected": -0.848787248134613, + "logps/chosen": -346.29815673828125, + "logps/ref_chosen": -277.072265625, + "logps/ref_rejected": -247.31643676757812, + "logps/rejected": -358.10791015625, + "loss": 4.3891, + "margin_dpo/margin_mean": 41.56563186645508, + "margin_dpo/margin_std": 68.40611267089844, + "step": 253 + }, + { + "epoch": 0.5319371727748691, + "fcm_dpo/beta": 0.01416382659226656, + "fcm_dpo/delta": -0.016133006662130356, + "fcm_dpo/margin": 43.26961898803711, + "fcm_dpo/q_t": 0.37535524368286133, + "grad_norm": 99.73017120361328, + "learning_rate": 2.6737824107379947e-07, + "logits/chosen": -0.7875509858131409, + "logits/rejected": -0.7763053774833679, + "logps/chosen": -334.57989501953125, + "logps/ref_chosen": -269.9478454589844, + "logps/ref_rejected": -249.45005798339844, + "logps/rejected": -357.3516845703125, + "loss": 4.18, + "margin_dpo/margin_mean": 43.26961898803711, + "margin_dpo/margin_std": 61.28417205810547, + "step": 254 + }, + { + "epoch": 0.5340314136125655, + "fcm_dpo/beta": 0.013342966325581074, + "fcm_dpo/delta": -0.06489241868257523, + "fcm_dpo/margin": 49.52783966064453, + "fcm_dpo/q_t": 0.36613547801971436, + "grad_norm": 90.38292694091797, + "learning_rate": 2.655514550086086e-07, + "logits/chosen": -0.8106395602226257, + "logits/rejected": -0.7797207832336426, + "logps/chosen": -370.4023742675781, + "logps/ref_chosen": -306.6552734375, + "logps/ref_rejected": -254.47528076171875, + "logps/rejected": -367.7502136230469, + "loss": 4.1532, + "margin_dpo/margin_mean": 49.52783966064453, + "margin_dpo/margin_std": 72.60646057128906, + "step": 255 + }, + { + "epoch": 0.5361256544502618, + "fcm_dpo/beta": 0.012888522818684578, + "fcm_dpo/delta": -0.017480649054050446, + "fcm_dpo/margin": 47.27513122558594, + "fcm_dpo/q_t": 0.3648688495159149, + "grad_norm": 255.97872924804688, + "learning_rate": 2.6372383496608186e-07, + "logits/chosen": -0.8314058184623718, + "logits/rejected": -0.827141523361206, + "logps/chosen": -388.43408203125, + "logps/ref_chosen": -323.7181701660156, + "logps/ref_rejected": -254.1871337890625, + "logps/rejected": -366.1781311035156, + "loss": 4.5363, + "margin_dpo/margin_mean": 47.2751350402832, + "margin_dpo/margin_std": 78.98124694824219, + "step": 256 + }, + { + "epoch": 0.5382198952879581, + "fcm_dpo/beta": 0.012398256920278072, + "fcm_dpo/delta": -0.015751376748085022, + "fcm_dpo/margin": 49.54781723022461, + "fcm_dpo/q_t": 0.3713992238044739, + "grad_norm": 97.32785034179688, + "learning_rate": 2.618954789559356e-07, + "logits/chosen": -0.8290724158287048, + "logits/rejected": -0.8196491003036499, + "logps/chosen": -331.4079895019531, + "logps/ref_chosen": -267.21209716796875, + "logps/ref_rejected": -249.12579345703125, + "logps/rejected": -362.8694763183594, + "loss": 4.039, + "margin_dpo/margin_mean": 49.54781723022461, + "margin_dpo/margin_std": 66.0081558227539, + "step": 257 + }, + { + "epoch": 0.5403141361256545, + "fcm_dpo/beta": 0.011941884644329548, + "fcm_dpo/delta": -0.021545007824897766, + "fcm_dpo/margin": 51.69853210449219, + "fcm_dpo/q_t": 0.36628904938697815, + "grad_norm": 81.3831787109375, + "learning_rate": 2.600664850273538e-07, + "logits/chosen": -0.8486968278884888, + "logits/rejected": -0.8191419243812561, + "logps/chosen": -345.801025390625, + "logps/ref_chosen": -277.6827392578125, + "logps/ref_rejected": -250.73385620117188, + "logps/rejected": -370.5506591796875, + "loss": 3.9976, + "margin_dpo/margin_mean": 51.69853591918945, + "margin_dpo/margin_std": 62.97686004638672, + "step": 258 + }, + { + "epoch": 0.5424083769633508, + "fcm_dpo/beta": 0.01243941206485033, + "fcm_dpo/delta": 0.02084418572485447, + "fcm_dpo/margin": 46.594276428222656, + "fcm_dpo/q_t": 0.3780772387981415, + "grad_norm": 86.16590118408203, + "learning_rate": 2.582369512637302e-07, + "logits/chosen": -0.8632500171661377, + "logits/rejected": -0.8614512085914612, + "logps/chosen": -354.69976806640625, + "logps/ref_chosen": -294.6099853515625, + "logps/ref_rejected": -272.2725830078125, + "logps/rejected": -378.9566345214844, + "loss": 4.1194, + "margin_dpo/margin_mean": 46.594268798828125, + "margin_dpo/margin_std": 63.69516372680664, + "step": 259 + }, + { + "epoch": 0.5445026178010471, + "fcm_dpo/beta": 0.013677787035703659, + "fcm_dpo/delta": 0.19107326865196228, + "fcm_dpo/margin": 22.353225708007812, + "fcm_dpo/q_t": 0.43973931670188904, + "grad_norm": 113.30580139160156, + "learning_rate": 2.5640697577740815e-07, + "logits/chosen": -0.8496757745742798, + "logits/rejected": -0.8480501174926758, + "logps/chosen": -357.86456298828125, + "logps/ref_chosen": -290.85711669921875, + "logps/ref_rejected": -277.5970153808594, + "logps/rejected": -366.95770263671875, + "loss": 5.1521, + "margin_dpo/margin_mean": 22.353225708007812, + "margin_dpo/margin_std": 64.10260772705078, + "step": 260 + }, + { + "epoch": 0.5465968586387434, + "fcm_dpo/beta": 0.014235386624932289, + "fcm_dpo/delta": -0.05504711717367172, + "fcm_dpo/margin": 37.57318115234375, + "fcm_dpo/q_t": 0.39546385407447815, + "grad_norm": 130.93417358398438, + "learning_rate": 2.5457665670441937e-07, + "logits/chosen": -0.733401358127594, + "logits/rejected": -0.7483704090118408, + "logps/chosen": -322.2603454589844, + "logps/ref_chosen": -251.13223266601562, + "logps/ref_rejected": -244.76016235351562, + "logps/rejected": -353.4614562988281, + "loss": 4.6495, + "margin_dpo/margin_mean": 37.57318115234375, + "margin_dpo/margin_std": 71.4426040649414, + "step": 261 + }, + { + "epoch": 0.5486910994764398, + "fcm_dpo/beta": 0.013635975308716297, + "fcm_dpo/delta": -0.06950134038925171, + "fcm_dpo/margin": 48.76420211791992, + "fcm_dpo/q_t": 0.365522176027298, + "grad_norm": 102.4128646850586, + "learning_rate": 2.527460921992209e-07, + "logits/chosen": -0.7756036520004272, + "logits/rejected": -0.7701444625854492, + "logps/chosen": -363.00665283203125, + "logps/ref_chosen": -299.7217712402344, + "logps/ref_rejected": -277.0969543457031, + "logps/rejected": -389.14605712890625, + "loss": 4.0242, + "margin_dpo/margin_mean": 48.76420211791992, + "margin_dpo/margin_std": 65.38024139404297, + "step": 262 + }, + { + "epoch": 0.5507853403141362, + "fcm_dpo/beta": 0.013076528906822205, + "fcm_dpo/delta": -0.0265921950340271, + "fcm_dpo/margin": 40.98373794555664, + "fcm_dpo/q_t": 0.38731229305267334, + "grad_norm": 84.20980072021484, + "learning_rate": 2.509153804294318e-07, + "logits/chosen": -0.7757068276405334, + "logits/rejected": -0.7594835758209229, + "logps/chosen": -350.37353515625, + "logps/ref_chosen": -279.95257568359375, + "logps/ref_rejected": -256.5327453613281, + "logps/rejected": -367.9373779296875, + "loss": 4.4423, + "margin_dpo/margin_mean": 40.983741760253906, + "margin_dpo/margin_std": 67.5083236694336, + "step": 263 + }, + { + "epoch": 0.5528795811518324, + "fcm_dpo/beta": 0.012376993894577026, + "fcm_dpo/delta": -0.06797336786985397, + "fcm_dpo/margin": 49.21453857421875, + "fcm_dpo/q_t": 0.37006676197052, + "grad_norm": 106.1910400390625, + "learning_rate": 2.4908461957056825e-07, + "logits/chosen": -0.7897322177886963, + "logits/rejected": -0.7906150817871094, + "logps/chosen": -323.62689208984375, + "logps/ref_chosen": -260.53509521484375, + "logps/ref_rejected": -255.53799438476562, + "logps/rejected": -367.8443298339844, + "loss": 4.0584, + "margin_dpo/margin_mean": 49.21453857421875, + "margin_dpo/margin_std": 64.37642669677734, + "step": 264 + }, + { + "epoch": 0.5549738219895288, + "fcm_dpo/beta": 0.011801987886428833, + "fcm_dpo/delta": -0.024343391880393028, + "fcm_dpo/margin": 52.564517974853516, + "fcm_dpo/q_t": 0.36950555443763733, + "grad_norm": 83.8652114868164, + "learning_rate": 2.4725390780077905e-07, + "logits/chosen": -0.8621577024459839, + "logits/rejected": -0.8714127540588379, + "logps/chosen": -347.90740966796875, + "logps/ref_chosen": -283.7130432128906, + "logps/ref_rejected": -270.3209533691406, + "logps/rejected": -387.079833984375, + "loss": 4.1157, + "margin_dpo/margin_mean": 52.564517974853516, + "margin_dpo/margin_std": 71.61198425292969, + "step": 265 + }, + { + "epoch": 0.5570680628272251, + "fcm_dpo/beta": 0.01190432533621788, + "fcm_dpo/delta": -0.02119002863764763, + "fcm_dpo/margin": 51.92703628540039, + "fcm_dpo/q_t": 0.36642715334892273, + "grad_norm": 75.40443420410156, + "learning_rate": 2.454233432955807e-07, + "logits/chosen": -0.8703705072402954, + "logits/rejected": -0.8404238224029541, + "logps/chosen": -333.9207458496094, + "logps/ref_chosen": -278.09930419921875, + "logps/ref_rejected": -260.6734619140625, + "logps/rejected": -368.4219665527344, + "loss": 3.9296, + "margin_dpo/margin_mean": 51.92703628540039, + "margin_dpo/margin_std": 59.450191497802734, + "step": 266 + }, + { + "epoch": 0.5591623036649215, + "fcm_dpo/beta": 0.011871559545397758, + "fcm_dpo/delta": 0.046678848564624786, + "fcm_dpo/margin": 42.63209533691406, + "fcm_dpo/q_t": 0.39243483543395996, + "grad_norm": 101.08575439453125, + "learning_rate": 2.435930242225919e-07, + "logits/chosen": -0.8200643658638, + "logits/rejected": -0.834830105304718, + "logps/chosen": -349.9026794433594, + "logps/ref_chosen": -280.33319091796875, + "logps/ref_rejected": -247.78099060058594, + "logps/rejected": -359.9825744628906, + "loss": 4.3181, + "margin_dpo/margin_mean": 42.63209533691406, + "margin_dpo/margin_std": 63.018951416015625, + "step": 267 + }, + { + "epoch": 0.5612565445026177, + "fcm_dpo/beta": 0.012035196647047997, + "fcm_dpo/delta": -0.05291684344410896, + "fcm_dpo/margin": 53.979488372802734, + "fcm_dpo/q_t": 0.36334070563316345, + "grad_norm": 90.82762145996094, + "learning_rate": 2.4176304873626984e-07, + "logits/chosen": -0.7756884098052979, + "logits/rejected": -0.7557308673858643, + "logps/chosen": -370.0815734863281, + "logps/ref_chosen": -304.1787109375, + "logps/ref_rejected": -272.80316162109375, + "logps/rejected": -392.685546875, + "loss": 3.968, + "margin_dpo/margin_mean": 53.979488372802734, + "margin_dpo/margin_std": 67.94827270507812, + "step": 268 + }, + { + "epoch": 0.5633507853403141, + "fcm_dpo/beta": 0.012536915019154549, + "fcm_dpo/delta": 0.11777209490537643, + "fcm_dpo/margin": 38.674407958984375, + "fcm_dpo/q_t": 0.39720258116722107, + "grad_norm": 126.63288116455078, + "learning_rate": 2.399335149726463e-07, + "logits/chosen": -0.8296777606010437, + "logits/rejected": -0.8268716931343079, + "logps/chosen": -321.98870849609375, + "logps/ref_chosen": -249.84512329101562, + "logps/ref_rejected": -223.37356567382812, + "logps/rejected": -334.19158935546875, + "loss": 4.5857, + "margin_dpo/margin_mean": 38.67441177368164, + "margin_dpo/margin_std": 72.29447174072266, + "step": 269 + }, + { + "epoch": 0.5654450261780105, + "fcm_dpo/beta": 0.012732122093439102, + "fcm_dpo/delta": 0.004775438457727432, + "fcm_dpo/margin": 46.69505310058594, + "fcm_dpo/q_t": 0.3786012828350067, + "grad_norm": 100.78535461425781, + "learning_rate": 2.381045210440644e-07, + "logits/chosen": -0.8925029635429382, + "logits/rejected": -0.9094992876052856, + "logps/chosen": -395.2410888671875, + "logps/ref_chosen": -318.5623779296875, + "logps/ref_rejected": -281.1880798339844, + "logps/rejected": -404.5618591308594, + "loss": 4.3373, + "margin_dpo/margin_mean": 46.69505310058594, + "margin_dpo/margin_std": 75.69879150390625, + "step": 270 + }, + { + "epoch": 0.5675392670157068, + "fcm_dpo/beta": 0.013183288276195526, + "fcm_dpo/delta": -0.0032483600080013275, + "fcm_dpo/margin": 45.56004333496094, + "fcm_dpo/q_t": 0.3815266489982605, + "grad_norm": 102.04464721679688, + "learning_rate": 2.3627616503391812e-07, + "logits/chosen": -0.7499503493309021, + "logits/rejected": -0.7473767995834351, + "logps/chosen": -358.6291198730469, + "logps/ref_chosen": -284.104736328125, + "logps/ref_rejected": -253.9580535888672, + "logps/rejected": -374.0425720214844, + "loss": 4.2719, + "margin_dpo/margin_mean": 45.5600471496582, + "margin_dpo/margin_std": 70.25496673583984, + "step": 271 + }, + { + "epoch": 0.5696335078534032, + "fcm_dpo/beta": 0.01266053318977356, + "fcm_dpo/delta": -0.029928136616945267, + "fcm_dpo/margin": 49.448184967041016, + "fcm_dpo/q_t": 0.37268373370170593, + "grad_norm": 87.62028503417969, + "learning_rate": 2.344485449913914e-07, + "logits/chosen": -0.862612247467041, + "logits/rejected": -0.8508659601211548, + "logps/chosen": -367.183349609375, + "logps/ref_chosen": -297.3590087890625, + "logps/ref_rejected": -279.20196533203125, + "logps/rejected": -398.4744873046875, + "loss": 4.303, + "margin_dpo/margin_mean": 49.448184967041016, + "margin_dpo/margin_std": 78.49717712402344, + "step": 272 + }, + { + "epoch": 0.5717277486910994, + "fcm_dpo/beta": 0.012191718444228172, + "fcm_dpo/delta": -0.02252171002328396, + "fcm_dpo/margin": 50.77750778198242, + "fcm_dpo/q_t": 0.3730708956718445, + "grad_norm": 96.84104919433594, + "learning_rate": 2.3262175892620062e-07, + "logits/chosen": -0.829898476600647, + "logits/rejected": -0.8430629968643188, + "logps/chosen": -365.34466552734375, + "logps/ref_chosen": -293.20574951171875, + "logps/ref_rejected": -274.7646789550781, + "logps/rejected": -397.68109130859375, + "loss": 4.2016, + "margin_dpo/margin_mean": 50.77750778198242, + "margin_dpo/margin_std": 74.58787536621094, + "step": 273 + }, + { + "epoch": 0.5738219895287958, + "fcm_dpo/beta": 0.011691069230437279, + "fcm_dpo/delta": -0.11797457188367844, + "fcm_dpo/margin": 60.72999954223633, + "fcm_dpo/q_t": 0.3517453372478485, + "grad_norm": 88.58201599121094, + "learning_rate": 2.3079590480333827e-07, + "logits/chosen": -0.7908748388290405, + "logits/rejected": -0.7618493437767029, + "logps/chosen": -342.7866516113281, + "logps/ref_chosen": -270.55865478515625, + "logps/ref_rejected": -239.47048950195312, + "logps/rejected": -372.428466796875, + "loss": 3.8295, + "margin_dpo/margin_mean": 60.72999572753906, + "margin_dpo/margin_std": 72.17181396484375, + "step": 274 + }, + { + "epoch": 0.5759162303664922, + "fcm_dpo/beta": 0.010889939963817596, + "fcm_dpo/delta": -0.05324774980545044, + "fcm_dpo/margin": 59.565277099609375, + "fcm_dpo/q_t": 0.3633711040019989, + "grad_norm": 70.90768432617188, + "learning_rate": 2.2897108053782e-07, + "logits/chosen": -0.8442721962928772, + "logits/rejected": -0.8297668099403381, + "logps/chosen": -315.0128173828125, + "logps/ref_chosen": -250.31922912597656, + "logps/ref_rejected": -249.3187255859375, + "logps/rejected": -373.57757568359375, + "loss": 3.905, + "margin_dpo/margin_mean": 59.56527328491211, + "margin_dpo/margin_std": 71.25727844238281, + "step": 275 + }, + { + "epoch": 0.5780104712041885, + "fcm_dpo/beta": 0.010673362761735916, + "fcm_dpo/delta": 0.0510733537375927, + "fcm_dpo/margin": 51.6572265625, + "fcm_dpo/q_t": 0.38332486152648926, + "grad_norm": 80.28546905517578, + "learning_rate": 2.2714738398943308e-07, + "logits/chosen": -0.910760223865509, + "logits/rejected": -0.8880026340484619, + "logps/chosen": -372.0062561035156, + "logps/ref_chosen": -297.6310729980469, + "logps/ref_rejected": -295.225830078125, + "logps/rejected": -421.25823974609375, + "loss": 4.2562, + "margin_dpo/margin_mean": 51.6572265625, + "margin_dpo/margin_std": 74.79243469238281, + "step": 276 + }, + { + "epoch": 0.5801047120418849, + "fcm_dpo/beta": 0.01158787589520216, + "fcm_dpo/delta": 0.07226106524467468, + "fcm_dpo/margin": 45.672664642333984, + "fcm_dpo/q_t": 0.38873162865638733, + "grad_norm": 104.22013092041016, + "learning_rate": 2.2532491295748865e-07, + "logits/chosen": -0.8400160074234009, + "logits/rejected": -0.8426806330680847, + "logps/chosen": -344.77178955078125, + "logps/ref_chosen": -266.3604736328125, + "logps/ref_rejected": -253.36767578125, + "logps/rejected": -377.45166015625, + "loss": 4.4179, + "margin_dpo/margin_mean": 45.672664642333984, + "margin_dpo/margin_std": 74.9579849243164, + "step": 277 + }, + { + "epoch": 0.5821989528795811, + "fcm_dpo/beta": 0.0121334008872509, + "fcm_dpo/delta": 0.04825280234217644, + "fcm_dpo/margin": 33.917503356933594, + "fcm_dpo/q_t": 0.420282781124115, + "grad_norm": 115.2516860961914, + "learning_rate": 2.2350376517557726e-07, + "logits/chosen": -0.8667393326759338, + "logits/rejected": -0.8342878222465515, + "logps/chosen": -357.53857421875, + "logps/ref_chosen": -267.40728759765625, + "logps/ref_rejected": -229.5758514404297, + "logps/rejected": -353.6246337890625, + "loss": 4.9917, + "margin_dpo/margin_mean": 33.917503356933594, + "margin_dpo/margin_std": 81.22914123535156, + "step": 278 + }, + { + "epoch": 0.5842931937172775, + "fcm_dpo/beta": 0.011737332679331303, + "fcm_dpo/delta": -0.12058336287736893, + "fcm_dpo/margin": 55.69242858886719, + "fcm_dpo/q_t": 0.3652134835720062, + "grad_norm": 112.99444580078125, + "learning_rate": 2.2168403830632769e-07, + "logits/chosen": -0.781296968460083, + "logits/rejected": -0.7669795751571655, + "logps/chosen": -393.08892822265625, + "logps/ref_chosen": -313.3677978515625, + "logps/ref_rejected": -299.1744384765625, + "logps/rejected": -434.5880126953125, + "loss": 4.1187, + "margin_dpo/margin_mean": 55.69242477416992, + "margin_dpo/margin_std": 78.3192138671875, + "step": 279 + }, + { + "epoch": 0.5863874345549738, + "fcm_dpo/beta": 0.011380909010767937, + "fcm_dpo/delta": 0.04981427267193794, + "fcm_dpo/margin": 48.567710876464844, + "fcm_dpo/q_t": 0.3854876160621643, + "grad_norm": 81.46392059326172, + "learning_rate": 2.1986582993616925e-07, + "logits/chosen": -0.8543354272842407, + "logits/rejected": -0.8661242127418518, + "logps/chosen": -334.360595703125, + "logps/ref_chosen": -265.5558166503906, + "logps/ref_rejected": -247.1573944091797, + "logps/rejected": -364.5298767089844, + "loss": 4.3441, + "margin_dpo/margin_mean": 48.567710876464844, + "margin_dpo/margin_std": 78.15949249267578, + "step": 280 + }, + { + "epoch": 0.5884816753926702, + "fcm_dpo/beta": 0.011563065461814404, + "fcm_dpo/delta": 0.030172260478138924, + "fcm_dpo/margin": 49.29734802246094, + "fcm_dpo/q_t": 0.3839731514453888, + "grad_norm": 101.9212875366211, + "learning_rate": 2.1804923757009882e-07, + "logits/chosen": -0.8250092267990112, + "logits/rejected": -0.8347154855728149, + "logps/chosen": -380.861328125, + "logps/ref_chosen": -295.2995910644531, + "logps/ref_rejected": -293.80877685546875, + "logps/rejected": -428.6678161621094, + "loss": 4.2825, + "margin_dpo/margin_mean": 49.29734420776367, + "margin_dpo/margin_std": 74.99603271484375, + "step": 281 + }, + { + "epoch": 0.5905759162303665, + "fcm_dpo/beta": 0.011664286255836487, + "fcm_dpo/delta": -0.011024218052625656, + "fcm_dpo/margin": 52.18028259277344, + "fcm_dpo/q_t": 0.3756002187728882, + "grad_norm": 89.68161010742188, + "learning_rate": 2.1623435862645205e-07, + "logits/chosen": -0.8206506967544556, + "logits/rejected": -0.8235145211219788, + "logps/chosen": -391.75213623046875, + "logps/ref_chosen": -318.63714599609375, + "logps/ref_rejected": -273.5943603515625, + "logps/rejected": -398.88958740234375, + "loss": 4.2229, + "margin_dpo/margin_mean": 52.18027877807617, + "margin_dpo/margin_std": 77.2578353881836, + "step": 282 + }, + { + "epoch": 0.5926701570680628, + "fcm_dpo/beta": 0.012338871136307716, + "fcm_dpo/delta": 0.04455633834004402, + "fcm_dpo/margin": 44.96173858642578, + "fcm_dpo/q_t": 0.3896506428718567, + "grad_norm": 90.43144989013672, + "learning_rate": 2.1442129043167873e-07, + "logits/chosen": -0.8331937193870544, + "logits/rejected": -0.8291042447090149, + "logps/chosen": -333.6814270019531, + "logps/ref_chosen": -254.66053771972656, + "logps/ref_rejected": -236.8627166748047, + "logps/rejected": -360.8453369140625, + "loss": 4.3718, + "margin_dpo/margin_mean": 44.96173858642578, + "margin_dpo/margin_std": 73.44696044921875, + "step": 283 + }, + { + "epoch": 0.5947643979057592, + "fcm_dpo/beta": 0.011935784481465816, + "fcm_dpo/delta": -0.03917480632662773, + "fcm_dpo/margin": 53.04762649536133, + "fcm_dpo/q_t": 0.37296316027641296, + "grad_norm": 118.94564819335938, + "learning_rate": 2.1261013021512378e-07, + "logits/chosen": -0.8013940453529358, + "logits/rejected": -0.7824323773384094, + "logps/chosen": -353.3193359375, + "logps/ref_chosen": -273.355224609375, + "logps/ref_rejected": -259.84759521484375, + "logps/rejected": -392.8592834472656, + "loss": 4.2865, + "margin_dpo/margin_mean": 53.047630310058594, + "margin_dpo/margin_std": 81.07416534423828, + "step": 284 + }, + { + "epoch": 0.5968586387434555, + "fcm_dpo/beta": 0.012555155903100967, + "fcm_dpo/delta": 0.06368312239646912, + "fcm_dpo/margin": 36.739646911621094, + "fcm_dpo/q_t": 0.40852105617523193, + "grad_norm": 148.76206970214844, + "learning_rate": 2.1080097510381294e-07, + "logits/chosen": -0.8108698725700378, + "logits/rejected": -0.8106898069381714, + "logps/chosen": -394.7806701660156, + "logps/ref_chosen": -309.8022155761719, + "logps/ref_rejected": -279.11846923828125, + "logps/rejected": -400.8365478515625, + "loss": 4.7712, + "margin_dpo/margin_mean": 36.739646911621094, + "margin_dpo/margin_std": 75.5753402709961, + "step": 285 + }, + { + "epoch": 0.5989528795811518, + "fcm_dpo/beta": 0.012427356094121933, + "fcm_dpo/delta": 0.030025284737348557, + "fcm_dpo/margin": 45.97602844238281, + "fcm_dpo/q_t": 0.38827937841415405, + "grad_norm": 124.07766723632812, + "learning_rate": 2.089939221172446e-07, + "logits/chosen": -0.8039661049842834, + "logits/rejected": -0.7928801774978638, + "logps/chosen": -349.1395568847656, + "logps/ref_chosen": -271.4655456542969, + "logps/ref_rejected": -279.531494140625, + "logps/rejected": -403.1815185546875, + "loss": 4.4449, + "margin_dpo/margin_mean": 45.97602844238281, + "margin_dpo/margin_std": 79.96269989013672, + "step": 286 + }, + { + "epoch": 0.6010471204188481, + "fcm_dpo/beta": 0.012432662770152092, + "fcm_dpo/delta": -0.020902253687381744, + "fcm_dpo/margin": 49.71929931640625, + "fcm_dpo/q_t": 0.37481507658958435, + "grad_norm": 98.54369354248047, + "learning_rate": 2.0718906816218595e-07, + "logits/chosen": -0.8174068331718445, + "logits/rejected": -0.8055183291435242, + "logps/chosen": -350.4097595214844, + "logps/ref_chosen": -277.0932312011719, + "logps/ref_rejected": -233.55599975585938, + "logps/rejected": -356.591796875, + "loss": 4.3139, + "margin_dpo/margin_mean": 49.71929931640625, + "margin_dpo/margin_std": 79.30457305908203, + "step": 287 + }, + { + "epoch": 0.6031413612565445, + "fcm_dpo/beta": 0.01297105010598898, + "fcm_dpo/delta": -0.004447203129529953, + "fcm_dpo/margin": 46.434669494628906, + "fcm_dpo/q_t": 0.3777006268501282, + "grad_norm": 120.47964477539062, + "learning_rate": 2.053865100274774e-07, + "logits/chosen": -0.8263804316520691, + "logits/rejected": -0.8423773050308228, + "logps/chosen": -362.74114990234375, + "logps/ref_chosen": -293.1681823730469, + "logps/ref_rejected": -263.4059143066406, + "logps/rejected": -379.41351318359375, + "loss": 4.2667, + "margin_dpo/margin_mean": 46.43466567993164, + "margin_dpo/margin_std": 71.86286926269531, + "step": 288 + }, + { + "epoch": 0.6052356020942409, + "fcm_dpo/beta": 0.013167420402169228, + "fcm_dpo/delta": 0.1251918226480484, + "fcm_dpo/margin": 32.636474609375, + "fcm_dpo/q_t": 0.41369497776031494, + "grad_norm": 108.58908081054688, + "learning_rate": 2.035863443788411e-07, + "logits/chosen": -0.8092857599258423, + "logits/rejected": -0.7957339882850647, + "logps/chosen": -412.3819580078125, + "logps/ref_chosen": -329.9574279785156, + "logps/ref_rejected": -276.7565002441406, + "logps/rejected": -391.8175048828125, + "loss": 4.8066, + "margin_dpo/margin_mean": 32.636474609375, + "margin_dpo/margin_std": 70.96094512939453, + "step": 289 + }, + { + "epoch": 0.6073298429319371, + "fcm_dpo/beta": 0.012977060861885548, + "fcm_dpo/delta": -0.08185821771621704, + "fcm_dpo/margin": 44.748985290527344, + "fcm_dpo/q_t": 0.38607901334762573, + "grad_norm": 140.78160095214844, + "learning_rate": 2.0178866775369774e-07, + "logits/chosen": -0.8182957172393799, + "logits/rejected": -0.7599232196807861, + "logps/chosen": -399.31103515625, + "logps/ref_chosen": -324.6690673828125, + "logps/ref_rejected": -311.8439636230469, + "logps/rejected": -431.23492431640625, + "loss": 4.4597, + "margin_dpo/margin_mean": 44.74897766113281, + "margin_dpo/margin_std": 74.92218780517578, + "step": 290 + }, + { + "epoch": 0.6094240837696335, + "fcm_dpo/beta": 0.012201309204101562, + "fcm_dpo/delta": -0.08980172872543335, + "fcm_dpo/margin": 55.73931121826172, + "fcm_dpo/q_t": 0.3614313304424286, + "grad_norm": 100.59260559082031, + "learning_rate": 1.9999357655598891e-07, + "logits/chosen": -0.7959886193275452, + "logits/rejected": -0.789124608039856, + "logps/chosen": -342.9815673828125, + "logps/ref_chosen": -274.1440734863281, + "logps/ref_rejected": -278.07208251953125, + "logps/rejected": -402.6488342285156, + "loss": 3.9778, + "margin_dpo/margin_mean": 55.73931121826172, + "margin_dpo/margin_std": 71.53327941894531, + "step": 291 + }, + { + "epoch": 0.6115183246073298, + "fcm_dpo/beta": 0.012481886893510818, + "fcm_dpo/delta": 0.098934106528759, + "fcm_dpo/margin": 40.45347213745117, + "fcm_dpo/q_t": 0.39631906151771545, + "grad_norm": 105.07350158691406, + "learning_rate": 1.9820116705100775e-07, + "logits/chosen": -0.7960292100906372, + "logits/rejected": -0.7907694578170776, + "logps/chosen": -324.7724914550781, + "logps/ref_chosen": -259.3636779785156, + "logps/ref_rejected": -279.30218505859375, + "logps/rejected": -385.16448974609375, + "loss": 4.5424, + "margin_dpo/margin_mean": 40.45347213745117, + "margin_dpo/margin_std": 71.65106964111328, + "step": 292 + }, + { + "epoch": 0.6136125654450262, + "fcm_dpo/beta": 0.012850621715188026, + "fcm_dpo/delta": -0.047699183225631714, + "fcm_dpo/margin": 50.01988983154297, + "fcm_dpo/q_t": 0.3677240014076233, + "grad_norm": 105.40121459960938, + "learning_rate": 1.9641153536023642e-07, + "logits/chosen": -0.8889198899269104, + "logits/rejected": -0.8521823287010193, + "logps/chosen": -376.5350646972656, + "logps/ref_chosen": -303.77081298828125, + "logps/ref_rejected": -270.07513427734375, + "logps/rejected": -392.8592224121094, + "loss": 4.0238, + "margin_dpo/margin_mean": 50.01988220214844, + "margin_dpo/margin_std": 65.39283752441406, + "step": 293 + }, + { + "epoch": 0.6157068062827226, + "fcm_dpo/beta": 0.012599381618201733, + "fcm_dpo/delta": -0.005292973015457392, + "fcm_dpo/margin": 47.930747985839844, + "fcm_dpo/q_t": 0.3788926601409912, + "grad_norm": 105.63341522216797, + "learning_rate": 1.9462477745619106e-07, + "logits/chosen": -0.795003354549408, + "logits/rejected": -0.8052266240119934, + "logps/chosen": -302.889892578125, + "logps/ref_chosen": -240.23831176757812, + "logps/ref_rejected": -229.187744140625, + "logps/rejected": -339.77008056640625, + "loss": 4.1926, + "margin_dpo/margin_mean": 47.93075180053711, + "margin_dpo/margin_std": 71.22593688964844, + "step": 294 + }, + { + "epoch": 0.6178010471204188, + "fcm_dpo/beta": 0.012672440148890018, + "fcm_dpo/delta": 0.043098170310258865, + "fcm_dpo/margin": 44.029075622558594, + "fcm_dpo/q_t": 0.38533294200897217, + "grad_norm": 89.81253814697266, + "learning_rate": 1.928409891572757e-07, + "logits/chosen": -0.7766979932785034, + "logits/rejected": -0.7932155728340149, + "logps/chosen": -319.8603210449219, + "logps/ref_chosen": -251.00970458984375, + "logps/ref_rejected": -244.15142822265625, + "logps/rejected": -357.0310974121094, + "loss": 4.3008, + "margin_dpo/margin_mean": 44.029075622558594, + "margin_dpo/margin_std": 67.06430053710938, + "step": 295 + }, + { + "epoch": 0.6198952879581152, + "fcm_dpo/beta": 0.012121832929551601, + "fcm_dpo/delta": -0.1331343948841095, + "fcm_dpo/margin": 59.48346710205078, + "fcm_dpo/q_t": 0.3515579402446747, + "grad_norm": 86.63916015625, + "learning_rate": 1.9106026612264315e-07, + "logits/chosen": -0.7734822034835815, + "logits/rejected": -0.7490954995155334, + "logps/chosen": -363.75494384765625, + "logps/ref_chosen": -293.880615234375, + "logps/ref_rejected": -283.4175720214844, + "logps/rejected": -412.775390625, + "loss": 3.9594, + "margin_dpo/margin_mean": 59.48346710205078, + "margin_dpo/margin_std": 77.31208801269531, + "step": 296 + }, + { + "epoch": 0.6219895287958115, + "fcm_dpo/beta": 0.011517000384628773, + "fcm_dpo/delta": 0.023262428119778633, + "fcm_dpo/margin": 41.557411193847656, + "fcm_dpo/q_t": 0.39879322052001953, + "grad_norm": 90.58515930175781, + "learning_rate": 1.8928270384706582e-07, + "logits/chosen": -0.8670139312744141, + "logits/rejected": -0.8624626994132996, + "logps/chosen": -358.82000732421875, + "logps/ref_chosen": -289.4600830078125, + "logps/ref_rejected": -283.69110107421875, + "logps/rejected": -394.60845947265625, + "loss": 4.4825, + "margin_dpo/margin_mean": 41.557411193847656, + "margin_dpo/margin_std": 69.22006225585938, + "step": 297 + }, + { + "epoch": 0.6240837696335079, + "fcm_dpo/beta": 0.01156252808868885, + "fcm_dpo/delta": -0.06845314055681229, + "fcm_dpo/margin": 48.89856719970703, + "fcm_dpo/q_t": 0.38479888439178467, + "grad_norm": 105.2696533203125, + "learning_rate": 1.875083976558136e-07, + "logits/chosen": -0.7988805770874023, + "logits/rejected": -0.7908245921134949, + "logps/chosen": -369.48431396484375, + "logps/ref_chosen": -306.5150146484375, + "logps/ref_rejected": -280.6969909667969, + "logps/rejected": -392.5648193359375, + "loss": 4.3527, + "margin_dpo/margin_mean": 48.89856719970703, + "margin_dpo/margin_std": 77.35851287841797, + "step": 298 + }, + { + "epoch": 0.6261780104712041, + "fcm_dpo/beta": 0.01131986640393734, + "fcm_dpo/delta": 0.04895632341504097, + "fcm_dpo/margin": 43.01826477050781, + "fcm_dpo/q_t": 0.39540231227874756, + "grad_norm": 94.2397232055664, + "learning_rate": 1.8573744269954297e-07, + "logits/chosen": -0.7741419076919556, + "logits/rejected": -0.7654407024383545, + "logps/chosen": -358.94085693359375, + "logps/ref_chosen": -281.36376953125, + "logps/ref_rejected": -270.39508056640625, + "logps/rejected": -390.9903869628906, + "loss": 4.399, + "margin_dpo/margin_mean": 43.01826477050781, + "margin_dpo/margin_std": 66.27005767822266, + "step": 299 + }, + { + "epoch": 0.6282722513089005, + "fcm_dpo/beta": 0.012323617935180664, + "fcm_dpo/delta": 0.09153569489717484, + "fcm_dpo/margin": 41.455352783203125, + "fcm_dpo/q_t": 0.39375266432762146, + "grad_norm": 146.0800323486328, + "learning_rate": 1.839699339491937e-07, + "logits/chosen": -0.81211256980896, + "logits/rejected": -0.788737952709198, + "logps/chosen": -392.5552978515625, + "logps/ref_chosen": -314.83575439453125, + "logps/ref_rejected": -269.1154479980469, + "logps/rejected": -388.29034423828125, + "loss": 4.4804, + "margin_dpo/margin_mean": 41.45535659790039, + "margin_dpo/margin_std": 71.92596435546875, + "step": 300 + }, + { + "epoch": 0.6303664921465969, + "fcm_dpo/beta": 0.0128701226785779, + "fcm_dpo/delta": 0.04166974872350693, + "fcm_dpo/margin": 43.445735931396484, + "fcm_dpo/q_t": 0.3868556618690491, + "grad_norm": 91.15668487548828, + "learning_rate": 1.8220596619089573e-07, + "logits/chosen": -0.814331591129303, + "logits/rejected": -0.8293969035148621, + "logps/chosen": -353.07177734375, + "logps/ref_chosen": -279.89453125, + "logps/ref_rejected": -271.6694641113281, + "logps/rejected": -388.2925109863281, + "loss": 4.3454, + "margin_dpo/margin_mean": 43.44573974609375, + "margin_dpo/margin_std": 68.72089385986328, + "step": 301 + }, + { + "epoch": 0.6324607329842932, + "fcm_dpo/beta": 0.012415561825037003, + "fcm_dpo/delta": -0.08762803673744202, + "fcm_dpo/margin": 54.83326721191406, + "fcm_dpo/q_t": 0.3612514138221741, + "grad_norm": 117.01398468017578, + "learning_rate": 1.8044563402088682e-07, + "logits/chosen": -0.7894245386123657, + "logits/rejected": -0.7746908068656921, + "logps/chosen": -341.82904052734375, + "logps/ref_chosen": -271.3318176269531, + "logps/ref_rejected": -256.5587158203125, + "logps/rejected": -381.88922119140625, + "loss": 4.0128, + "margin_dpo/margin_mean": 54.83326721191406, + "margin_dpo/margin_std": 74.75352478027344, + "step": 302 + }, + { + "epoch": 0.6345549738219896, + "fcm_dpo/beta": 0.012025467120110989, + "fcm_dpo/delta": -0.04249938949942589, + "fcm_dpo/margin": 48.98454666137695, + "fcm_dpo/q_t": 0.3781017065048218, + "grad_norm": 116.30992126464844, + "learning_rate": 1.7868903184043885e-07, + "logits/chosen": -0.7718071937561035, + "logits/rejected": -0.7559300661087036, + "logps/chosen": -381.57781982421875, + "logps/ref_chosen": -304.88104248046875, + "logps/ref_rejected": -269.063720703125, + "logps/rejected": -394.7451171875, + "loss": 4.2912, + "margin_dpo/margin_mean": 48.98455047607422, + "margin_dpo/margin_std": 75.46881103515625, + "step": 303 + }, + { + "epoch": 0.6366492146596858, + "fcm_dpo/beta": 0.011567480862140656, + "fcm_dpo/delta": -0.02077743411064148, + "fcm_dpo/margin": 53.529052734375, + "fcm_dpo/q_t": 0.37393832206726074, + "grad_norm": 108.21533966064453, + "learning_rate": 1.7693625385079574e-07, + "logits/chosen": -0.7794772982597351, + "logits/rejected": -0.7964142560958862, + "logps/chosen": -375.22418212890625, + "logps/ref_chosen": -290.7109680175781, + "logps/ref_rejected": -237.6885986328125, + "logps/rejected": -375.7308654785156, + "loss": 4.1337, + "margin_dpo/margin_mean": 53.529056549072266, + "margin_dpo/margin_std": 77.61477661132812, + "step": 304 + }, + { + "epoch": 0.6387434554973822, + "fcm_dpo/beta": 0.010576148517429829, + "fcm_dpo/delta": -0.1733783483505249, + "fcm_dpo/margin": 71.53874206542969, + "fcm_dpo/q_t": 0.3409091830253601, + "grad_norm": 89.68358612060547, + "learning_rate": 1.7518739404812155e-07, + "logits/chosen": -0.8426798582077026, + "logits/rejected": -0.8134666085243225, + "logps/chosen": -331.08544921875, + "logps/ref_chosen": -256.4839782714844, + "logps/ref_rejected": -266.4063415527344, + "logps/rejected": -412.5465087890625, + "loss": 3.7185, + "margin_dpo/margin_mean": 71.53873443603516, + "margin_dpo/margin_std": 78.6550064086914, + "step": 305 + }, + { + "epoch": 0.6408376963350786, + "fcm_dpo/beta": 0.010183380916714668, + "fcm_dpo/delta": 0.026483479887247086, + "fcm_dpo/margin": 45.928916931152344, + "fcm_dpo/q_t": 0.3991745114326477, + "grad_norm": 85.03260803222656, + "learning_rate": 1.7344254621846017e-07, + "logits/chosen": -0.8300163745880127, + "logits/rejected": -0.8189243078231812, + "logps/chosen": -402.5002746582031, + "logps/ref_chosen": -320.6492004394531, + "logps/ref_rejected": -273.36773681640625, + "logps/rejected": -401.1476745605469, + "loss": 4.3841, + "margin_dpo/margin_mean": 45.92892074584961, + "margin_dpo/margin_std": 69.21966552734375, + "step": 306 + }, + { + "epoch": 0.6429319371727749, + "fcm_dpo/beta": 0.010290293022990227, + "fcm_dpo/delta": -0.004134609363973141, + "fcm_dpo/margin": 51.01483154296875, + "fcm_dpo/q_t": 0.38504621386528015, + "grad_norm": 133.7060546875, + "learning_rate": 1.717018039327053e-07, + "logits/chosen": -0.7672021389007568, + "logits/rejected": -0.8132136464118958, + "logps/chosen": -379.48583984375, + "logps/ref_chosen": -279.4541931152344, + "logps/ref_rejected": -240.3796844482422, + "logps/rejected": -391.4261474609375, + "loss": 4.2074, + "margin_dpo/margin_mean": 51.01482391357422, + "margin_dpo/margin_std": 66.68528747558594, + "step": 307 + }, + { + "epoch": 0.6450261780104712, + "fcm_dpo/beta": 0.010651452466845512, + "fcm_dpo/delta": 0.11725203692913055, + "fcm_dpo/margin": 41.120540618896484, + "fcm_dpo/q_t": 0.4069848656654358, + "grad_norm": 93.3102035522461, + "learning_rate": 1.699652605415828e-07, + "logits/chosen": -0.8160425424575806, + "logits/rejected": -0.8356633186340332, + "logps/chosen": -400.3565673828125, + "logps/ref_chosen": -296.598388671875, + "logps/ref_rejected": -258.6953430175781, + "logps/rejected": -403.57403564453125, + "loss": 4.584, + "margin_dpo/margin_mean": 41.120540618896484, + "margin_dpo/margin_std": 74.20211791992188, + "step": 308 + }, + { + "epoch": 0.6471204188481675, + "fcm_dpo/beta": 0.011160111054778099, + "fcm_dpo/delta": -0.028207721188664436, + "fcm_dpo/margin": 56.09125518798828, + "fcm_dpo/q_t": 0.3682219386100769, + "grad_norm": 90.17739868164062, + "learning_rate": 1.6823300917064458e-07, + "logits/chosen": -0.8190463781356812, + "logits/rejected": -0.832842230796814, + "logps/chosen": -382.3316955566406, + "logps/ref_chosen": -281.3881530761719, + "logps/ref_rejected": -262.458740234375, + "logps/rejected": -419.4935302734375, + "loss": 4.0162, + "margin_dpo/margin_mean": 56.09125518798828, + "margin_dpo/margin_std": 73.06241607666016, + "step": 309 + }, + { + "epoch": 0.6492146596858639, + "fcm_dpo/beta": 0.011178172193467617, + "fcm_dpo/delta": 0.008381815627217293, + "fcm_dpo/margin": 52.827457427978516, + "fcm_dpo/q_t": 0.3756198287010193, + "grad_norm": 120.2673568725586, + "learning_rate": 1.6650514271527465e-07, + "logits/chosen": -0.8169420957565308, + "logits/rejected": -0.7943635582923889, + "logps/chosen": -377.6767578125, + "logps/ref_chosen": -279.1872863769531, + "logps/ref_rejected": -261.8279724121094, + "logps/rejected": -413.1448974609375, + "loss": 4.1525, + "margin_dpo/margin_mean": 52.82746124267578, + "margin_dpo/margin_std": 73.31507873535156, + "step": 310 + }, + { + "epoch": 0.6513089005235602, + "fcm_dpo/beta": 0.011097338050603867, + "fcm_dpo/delta": 0.006144438870251179, + "fcm_dpo/margin": 53.27901840209961, + "fcm_dpo/q_t": 0.37421154975891113, + "grad_norm": 124.52708435058594, + "learning_rate": 1.647817538357072e-07, + "logits/chosen": -0.8149024844169617, + "logits/rejected": -0.7987397909164429, + "logps/chosen": -371.7431335449219, + "logps/ref_chosen": -271.39813232421875, + "logps/ref_rejected": -266.12701416015625, + "logps/rejected": -419.75103759765625, + "loss": 4.2385, + "margin_dpo/margin_mean": 53.279022216796875, + "margin_dpo/margin_std": 77.10747528076172, + "step": 311 + }, + { + "epoch": 0.6534031413612565, + "fcm_dpo/beta": 0.011262207292020321, + "fcm_dpo/delta": 0.04970509931445122, + "fcm_dpo/margin": 48.75176239013672, + "fcm_dpo/q_t": 0.3897601068019867, + "grad_norm": 105.6718521118164, + "learning_rate": 1.6306293495205755e-07, + "logits/chosen": -0.8199286460876465, + "logits/rejected": -0.8051372766494751, + "logps/chosen": -381.46502685546875, + "logps/ref_chosen": -282.3850402832031, + "logps/ref_rejected": -246.35389709472656, + "logps/rejected": -394.1856689453125, + "loss": 4.5143, + "margin_dpo/margin_mean": 48.75176239013672, + "margin_dpo/margin_std": 85.32847595214844, + "step": 312 + }, + { + "epoch": 0.6554973821989529, + "fcm_dpo/beta": 0.011512380093336105, + "fcm_dpo/delta": -0.049063071608543396, + "fcm_dpo/margin": 51.31576156616211, + "fcm_dpo/q_t": 0.3808843493461609, + "grad_norm": 92.33170318603516, + "learning_rate": 1.6134877823936607e-07, + "logits/chosen": -0.8606759309768677, + "logits/rejected": -0.8551607131958008, + "logps/chosen": -401.7897033691406, + "logps/ref_chosen": -303.630859375, + "logps/ref_rejected": -273.1156921386719, + "logps/rejected": -422.5903015136719, + "loss": 4.3579, + "margin_dpo/margin_mean": 51.31576156616211, + "margin_dpo/margin_std": 80.45419311523438, + "step": 313 + }, + { + "epoch": 0.6575916230366492, + "fcm_dpo/beta": 0.011472068727016449, + "fcm_dpo/delta": 0.017066676169633865, + "fcm_dpo/margin": 50.73048782348633, + "fcm_dpo/q_t": 0.3782009482383728, + "grad_norm": 96.01194763183594, + "learning_rate": 1.5963937562265522e-07, + "logits/chosen": -0.8795362114906311, + "logits/rejected": -0.8655129671096802, + "logps/chosen": -394.734619140625, + "logps/ref_chosen": -302.3042907714844, + "logps/ref_rejected": -273.6416015625, + "logps/rejected": -416.8023681640625, + "loss": 4.2074, + "margin_dpo/margin_mean": 50.73048782348633, + "margin_dpo/margin_std": 72.94244384765625, + "step": 314 + }, + { + "epoch": 0.6596858638743456, + "fcm_dpo/beta": 0.011077978648245335, + "fcm_dpo/delta": -0.06065046787261963, + "fcm_dpo/margin": 59.16315841674805, + "fcm_dpo/q_t": 0.3630554676055908, + "grad_norm": 93.33431243896484, + "learning_rate": 1.5793481877199943e-07, + "logits/chosen": -0.8478763103485107, + "logits/rejected": -0.834101140499115, + "logps/chosen": -394.13946533203125, + "logps/ref_chosen": -302.729248046875, + "logps/ref_rejected": -270.26910400390625, + "logps/rejected": -420.8424377441406, + "loss": 4.0009, + "margin_dpo/margin_mean": 59.16315841674805, + "margin_dpo/margin_std": 75.37049102783203, + "step": 315 + }, + { + "epoch": 0.6617801047120419, + "fcm_dpo/beta": 0.010612869635224342, + "fcm_dpo/delta": -0.011615972965955734, + "fcm_dpo/margin": 57.42588806152344, + "fcm_dpo/q_t": 0.37472862005233765, + "grad_norm": 80.97152709960938, + "learning_rate": 1.562351990976095e-07, + "logits/chosen": -0.8666278123855591, + "logits/rejected": -0.8581745624542236, + "logps/chosen": -398.5106506347656, + "logps/ref_chosen": -310.5706481933594, + "logps/ref_rejected": -272.9354553222656, + "logps/rejected": -418.3013610839844, + "loss": 4.1489, + "margin_dpo/margin_mean": 57.42588806152344, + "margin_dpo/margin_std": 81.77729797363281, + "step": 316 + }, + { + "epoch": 0.6638743455497382, + "fcm_dpo/beta": 0.010676562786102295, + "fcm_dpo/delta": 0.02444746345281601, + "fcm_dpo/margin": 54.00886535644531, + "fcm_dpo/q_t": 0.3743758201599121, + "grad_norm": 83.4330825805664, + "learning_rate": 1.5454060774493065e-07, + "logits/chosen": -0.8651271462440491, + "logits/rejected": -0.8354383111000061, + "logps/chosen": -327.0922546386719, + "logps/ref_chosen": -253.90036010742188, + "logps/ref_rejected": -218.74078369140625, + "logps/rejected": -345.9415283203125, + "loss": 4.0552, + "margin_dpo/margin_mean": 54.00886535644531, + "margin_dpo/margin_std": 67.14466094970703, + "step": 317 + }, + { + "epoch": 0.6659685863874345, + "fcm_dpo/beta": 0.010473274625837803, + "fcm_dpo/delta": -0.02130315639078617, + "fcm_dpo/margin": 58.93336486816406, + "fcm_dpo/q_t": 0.36683323979377747, + "grad_norm": 77.39559173583984, + "learning_rate": 1.5285113558975427e-07, + "logits/chosen": -0.883613646030426, + "logits/rejected": -0.8504911065101624, + "logps/chosen": -352.9744873046875, + "logps/ref_chosen": -270.8228759765625, + "logps/ref_rejected": -255.30972290039062, + "logps/rejected": -396.39471435546875, + "loss": 3.9828, + "margin_dpo/margin_mean": 58.93336486816406, + "margin_dpo/margin_std": 70.93016052246094, + "step": 318 + }, + { + "epoch": 0.6680628272251309, + "fcm_dpo/beta": 0.010347644798457623, + "fcm_dpo/delta": 0.004265286028385162, + "fcm_dpo/margin": 57.502281188964844, + "fcm_dpo/q_t": 0.3711587190628052, + "grad_norm": 106.93011474609375, + "learning_rate": 1.5116687323334464e-07, + "logits/chosen": -0.8568066358566284, + "logits/rejected": -0.8343677520751953, + "logps/chosen": -389.6893310546875, + "logps/ref_chosen": -301.0028076171875, + "logps/ref_rejected": -242.39002990722656, + "logps/rejected": -388.5788269042969, + "loss": 4.0047, + "margin_dpo/margin_mean": 57.50227737426758, + "margin_dpo/margin_std": 70.3616714477539, + "step": 319 + }, + { + "epoch": 0.6701570680628273, + "fcm_dpo/beta": 0.010691437870264053, + "fcm_dpo/delta": 0.03203843906521797, + "fcm_dpo/margin": 53.148414611816406, + "fcm_dpo/q_t": 0.38301903009414673, + "grad_norm": 128.77078247070312, + "learning_rate": 1.4948791099758052e-07, + "logits/chosen": -0.823917806148529, + "logits/rejected": -0.8286012411117554, + "logps/chosen": -385.59344482421875, + "logps/ref_chosen": -303.6225891113281, + "logps/ref_rejected": -280.85174560546875, + "logps/rejected": -415.97100830078125, + "loss": 4.3482, + "margin_dpo/margin_mean": 53.148414611816406, + "margin_dpo/margin_std": 85.3245849609375, + "step": 320 + }, + { + "epoch": 0.6722513089005235, + "fcm_dpo/beta": 0.011223748326301575, + "fcm_dpo/delta": 0.034036025404930115, + "fcm_dpo/margin": 40.71882629394531, + "fcm_dpo/q_t": 0.4071481227874756, + "grad_norm": 100.50800323486328, + "learning_rate": 1.478143389201113e-07, + "logits/chosen": -0.8584508299827576, + "logits/rejected": -0.8295794725418091, + "logps/chosen": -380.6787109375, + "logps/ref_chosen": -288.98583984375, + "logps/ref_rejected": -241.1822052001953, + "logps/rejected": -373.59393310546875, + "loss": 4.6134, + "margin_dpo/margin_mean": 40.71883010864258, + "margin_dpo/margin_std": 77.2356948852539, + "step": 321 + }, + { + "epoch": 0.6743455497382199, + "fcm_dpo/beta": 0.011451047845184803, + "fcm_dpo/delta": -0.009086892008781433, + "fcm_dpo/margin": 52.995948791503906, + "fcm_dpo/q_t": 0.37670472264289856, + "grad_norm": 84.11673736572266, + "learning_rate": 1.461462467495284e-07, + "logits/chosen": -0.9030950665473938, + "logits/rejected": -0.8643764853477478, + "logps/chosen": -400.96453857421875, + "logps/ref_chosen": -308.54345703125, + "logps/ref_rejected": -269.7995910644531, + "logps/rejected": -415.2165832519531, + "loss": 4.1867, + "margin_dpo/margin_mean": 52.99595260620117, + "margin_dpo/margin_std": 78.33987426757812, + "step": 322 + }, + { + "epoch": 0.6764397905759162, + "fcm_dpo/beta": 0.011756744235754013, + "fcm_dpo/delta": 0.1289975494146347, + "fcm_dpo/margin": 36.21906661987305, + "fcm_dpo/q_t": 0.41570037603378296, + "grad_norm": 103.57855987548828, + "learning_rate": 1.4448372394055246e-07, + "logits/chosen": -0.8654804229736328, + "logits/rejected": -0.8599724173545837, + "logps/chosen": -372.8677978515625, + "logps/ref_chosen": -282.49365234375, + "logps/ref_rejected": -227.7105255126953, + "logps/rejected": -354.30377197265625, + "loss": 4.8564, + "margin_dpo/margin_mean": 36.21906280517578, + "margin_dpo/margin_std": 81.94871520996094, + "step": 323 + }, + { + "epoch": 0.6785340314136126, + "fcm_dpo/beta": 0.011317353695631027, + "fcm_dpo/delta": -0.13536657392978668, + "fcm_dpo/margin": 63.58089065551758, + "fcm_dpo/q_t": 0.34825509786605835, + "grad_norm": 99.381103515625, + "learning_rate": 1.428268596492364e-07, + "logits/chosen": -0.8112601637840271, + "logits/rejected": -0.8100103139877319, + "logps/chosen": -317.1282958984375, + "logps/ref_chosen": -239.33836364746094, + "logps/ref_rejected": -230.53775024414062, + "logps/rejected": -371.9085388183594, + "loss": 3.756, + "margin_dpo/margin_mean": 63.58089065551758, + "margin_dpo/margin_std": 70.64297485351562, + "step": 324 + }, + { + "epoch": 0.680628272251309, + "fcm_dpo/beta": 0.011040986515581608, + "fcm_dpo/delta": -0.02803659997880459, + "fcm_dpo/margin": 52.0232048034668, + "fcm_dpo/q_t": 0.38475099205970764, + "grad_norm": 113.279296875, + "learning_rate": 1.4117574272818386e-07, + "logits/chosen": -0.8139001131057739, + "logits/rejected": -0.7986257076263428, + "logps/chosen": -370.62933349609375, + "logps/ref_chosen": -280.62896728515625, + "logps/ref_rejected": -270.5085754394531, + "logps/rejected": -412.5321350097656, + "loss": 4.3798, + "margin_dpo/margin_mean": 52.0232048034668, + "margin_dpo/margin_std": 84.57878112792969, + "step": 325 + }, + { + "epoch": 0.6827225130890052, + "fcm_dpo/beta": 0.011147797107696533, + "fcm_dpo/delta": 0.03236812353134155, + "fcm_dpo/margin": 51.00239562988281, + "fcm_dpo/q_t": 0.38107889890670776, + "grad_norm": 111.60675048828125, + "learning_rate": 1.3953046172178413e-07, + "logits/chosen": -0.9249294996261597, + "logits/rejected": -0.9140468239784241, + "logps/chosen": -322.252685546875, + "logps/ref_chosen": -240.9871368408203, + "logps/ref_rejected": -261.0238342285156, + "logps/rejected": -393.291748046875, + "loss": 4.2602, + "margin_dpo/margin_mean": 51.00239562988281, + "margin_dpo/margin_std": 76.8713607788086, + "step": 326 + }, + { + "epoch": 0.6848167539267016, + "fcm_dpo/beta": 0.010913331992924213, + "fcm_dpo/delta": -0.048158351331949234, + "fcm_dpo/margin": 58.91179275512695, + "fcm_dpo/q_t": 0.36473149061203003, + "grad_norm": 80.92235565185547, + "learning_rate": 1.3789110486146468e-07, + "logits/chosen": -0.8732993006706238, + "logits/rejected": -0.8531113862991333, + "logps/chosen": -352.11798095703125, + "logps/ref_chosen": -279.52001953125, + "logps/ref_rejected": -269.51824951171875, + "logps/rejected": -401.02801513671875, + "loss": 3.9857, + "margin_dpo/margin_mean": 58.91179275512695, + "margin_dpo/margin_std": 74.66950225830078, + "step": 327 + }, + { + "epoch": 0.6869109947643979, + "fcm_dpo/beta": 0.010607855394482613, + "fcm_dpo/delta": 0.03731272369623184, + "fcm_dpo/margin": 53.156368255615234, + "fcm_dpo/q_t": 0.38036584854125977, + "grad_norm": 105.32549285888672, + "learning_rate": 1.362577600609588e-07, + "logits/chosen": -0.8312807083129883, + "logits/rejected": -0.8335475325584412, + "logps/chosen": -384.21630859375, + "logps/ref_chosen": -301.033447265625, + "logps/ref_rejected": -284.2101135253906, + "logps/rejected": -420.5493469238281, + "loss": 4.1238, + "margin_dpo/margin_mean": 53.156368255615234, + "margin_dpo/margin_std": 68.87464141845703, + "step": 328 + }, + { + "epoch": 0.6890052356020943, + "fcm_dpo/beta": 0.011003939434885979, + "fcm_dpo/delta": -0.0023946845903992653, + "fcm_dpo/margin": 54.590736389160156, + "fcm_dpo/q_t": 0.3825053870677948, + "grad_norm": 104.08448791503906, + "learning_rate": 1.3463051491159093e-07, + "logits/chosen": -0.8463307619094849, + "logits/rejected": -0.8228050470352173, + "logps/chosen": -409.5216369628906, + "logps/ref_chosen": -319.9888610839844, + "logps/ref_rejected": -307.5588684082031, + "logps/rejected": -451.6824035644531, + "loss": 4.2917, + "margin_dpo/margin_mean": 54.590736389160156, + "margin_dpo/margin_std": 86.76227569580078, + "step": 329 + }, + { + "epoch": 0.6910994764397905, + "fcm_dpo/beta": 0.011315654963254929, + "fcm_dpo/delta": 0.03985806554555893, + "fcm_dpo/margin": 49.51683807373047, + "fcm_dpo/q_t": 0.3813677728176117, + "grad_norm": 110.84941864013672, + "learning_rate": 1.3300945667758012e-07, + "logits/chosen": -0.8280748128890991, + "logits/rejected": -0.8414457440376282, + "logps/chosen": -388.23834228515625, + "logps/ref_chosen": -301.11474609375, + "logps/ref_rejected": -299.673095703125, + "logps/rejected": -436.3134765625, + "loss": 4.1843, + "margin_dpo/margin_mean": 49.51683807373047, + "margin_dpo/margin_std": 68.95203399658203, + "step": 330 + }, + { + "epoch": 0.6931937172774869, + "fcm_dpo/beta": 0.011244787834584713, + "fcm_dpo/delta": 0.009060085751116276, + "fcm_dpo/margin": 52.50252151489258, + "fcm_dpo/q_t": 0.38197970390319824, + "grad_norm": 180.15573120117188, + "learning_rate": 1.3139467229135998e-07, + "logits/chosen": -0.8683615922927856, + "logits/rejected": -0.8551488518714905, + "logps/chosen": -356.96160888671875, + "logps/ref_chosen": -277.59149169921875, + "logps/ref_rejected": -256.025634765625, + "logps/rejected": -387.8982849121094, + "loss": 4.3397, + "margin_dpo/margin_mean": 52.50252151489258, + "margin_dpo/margin_std": 86.5692138671875, + "step": 331 + }, + { + "epoch": 0.6952879581151833, + "fcm_dpo/beta": 0.011165878735482693, + "fcm_dpo/delta": -0.0032293088734149933, + "fcm_dpo/margin": 53.855308532714844, + "fcm_dpo/q_t": 0.37883564829826355, + "grad_norm": 115.64824676513672, + "learning_rate": 1.2978624834891626e-07, + "logits/chosen": -0.865576982498169, + "logits/rejected": -0.8442394137382507, + "logps/chosen": -352.7916259765625, + "logps/ref_chosen": -269.97369384765625, + "logps/ref_rejected": -235.03164672851562, + "logps/rejected": -371.70489501953125, + "loss": 4.2568, + "margin_dpo/margin_mean": 53.855308532714844, + "margin_dpo/margin_std": 82.26235961914062, + "step": 332 + }, + { + "epoch": 0.6973821989528796, + "fcm_dpo/beta": 0.011527864262461662, + "fcm_dpo/delta": 0.004975374788045883, + "fcm_dpo/margin": 47.826690673828125, + "fcm_dpo/q_t": 0.38533908128738403, + "grad_norm": 103.64472198486328, + "learning_rate": 1.281842711051438e-07, + "logits/chosen": -0.9276981949806213, + "logits/rejected": -0.8942596316337585, + "logps/chosen": -381.27313232421875, + "logps/ref_chosen": -296.76300048828125, + "logps/ref_rejected": -265.97991943359375, + "logps/rejected": -398.31671142578125, + "loss": 4.226, + "margin_dpo/margin_mean": 47.826690673828125, + "margin_dpo/margin_std": 69.83184051513672, + "step": 333 + }, + { + "epoch": 0.6994764397905759, + "fcm_dpo/beta": 0.011430593207478523, + "fcm_dpo/delta": -0.059879280626773834, + "fcm_dpo/margin": 57.263526916503906, + "fcm_dpo/q_t": 0.3644816279411316, + "grad_norm": 102.11288452148438, + "learning_rate": 1.2658882646922033e-07, + "logits/chosen": -0.839641809463501, + "logits/rejected": -0.8142789602279663, + "logps/chosen": -379.44012451171875, + "logps/ref_chosen": -301.0367431640625, + "logps/ref_rejected": -268.87652587890625, + "logps/rejected": -404.54339599609375, + "loss": 4.0678, + "margin_dpo/margin_mean": 57.263526916503906, + "margin_dpo/margin_std": 76.02151489257812, + "step": 334 + }, + { + "epoch": 0.7015706806282722, + "fcm_dpo/beta": 0.010619346983730793, + "fcm_dpo/delta": -7.020309567451477e-05, + "fcm_dpo/margin": 56.28835678100586, + "fcm_dpo/q_t": 0.377109557390213, + "grad_norm": 112.79352569580078, + "learning_rate": 1.2500000000000005e-07, + "logits/chosen": -0.8311583399772644, + "logits/rejected": -0.827835738658905, + "logps/chosen": -365.9309997558594, + "logps/ref_chosen": -276.13275146484375, + "logps/ref_rejected": -243.44203186035156, + "logps/rejected": -389.5285949707031, + "loss": 4.245, + "margin_dpo/margin_mean": 56.28835678100586, + "margin_dpo/margin_std": 83.26809692382812, + "step": 335 + }, + { + "epoch": 0.7036649214659686, + "fcm_dpo/beta": 0.010861254297196865, + "fcm_dpo/delta": -0.004219849593937397, + "fcm_dpo/margin": 50.423587799072266, + "fcm_dpo/q_t": 0.3916303515434265, + "grad_norm": 112.85342407226562, + "learning_rate": 1.2341787690142435e-07, + "logits/chosen": -0.839414119720459, + "logits/rejected": -0.7747617959976196, + "logps/chosen": -337.7890319824219, + "logps/ref_chosen": -246.2626495361328, + "logps/ref_rejected": -261.0617980957031, + "logps/rejected": -403.0118408203125, + "loss": 4.3563, + "margin_dpo/margin_mean": 50.423587799072266, + "margin_dpo/margin_std": 81.8228988647461, + "step": 336 + }, + { + "epoch": 0.7057591623036649, + "fcm_dpo/beta": 0.010828062891960144, + "fcm_dpo/delta": -0.06099002808332443, + "fcm_dpo/margin": 60.63716125488281, + "fcm_dpo/q_t": 0.3647434711456299, + "grad_norm": 89.56388854980469, + "learning_rate": 1.2184254201795363e-07, + "logits/chosen": -0.8643673062324524, + "logits/rejected": -0.8361295461654663, + "logps/chosen": -350.9415283203125, + "logps/ref_chosen": -266.9937744140625, + "logps/ref_rejected": -253.015625, + "logps/rejected": -397.60052490234375, + "loss": 3.9831, + "margin_dpo/margin_mean": 60.63715362548828, + "margin_dpo/margin_std": 78.05126953125, + "step": 337 + }, + { + "epoch": 0.7078534031413612, + "fcm_dpo/beta": 0.010671587660908699, + "fcm_dpo/delta": 0.035113610327243805, + "fcm_dpo/margin": 52.91345977783203, + "fcm_dpo/q_t": 0.38380661606788635, + "grad_norm": 123.61051177978516, + "learning_rate": 1.202740798300168e-07, + "logits/chosen": -0.8847794532775879, + "logits/rejected": -0.867152214050293, + "logps/chosen": -357.63946533203125, + "logps/ref_chosen": -276.5925598144531, + "logps/ref_rejected": -233.979248046875, + "logps/rejected": -367.9396057128906, + "loss": 4.264, + "margin_dpo/margin_mean": 52.9134521484375, + "margin_dpo/margin_std": 80.15204620361328, + "step": 338 + }, + { + "epoch": 0.7099476439790576, + "fcm_dpo/beta": 0.010630465112626553, + "fcm_dpo/delta": -0.030053602531552315, + "fcm_dpo/margin": 58.987159729003906, + "fcm_dpo/q_t": 0.36951741576194763, + "grad_norm": 107.49327087402344, + "learning_rate": 1.1871257444948096e-07, + "logits/chosen": -0.8885621428489685, + "logits/rejected": -0.8790793418884277, + "logps/chosen": -392.2843933105469, + "logps/ref_chosen": -303.5277404785156, + "logps/ref_rejected": -283.11676025390625, + "logps/rejected": -430.860595703125, + "loss": 4.1489, + "margin_dpo/margin_mean": 58.987159729003906, + "margin_dpo/margin_std": 83.707763671875, + "step": 339 + }, + { + "epoch": 0.7120418848167539, + "fcm_dpo/beta": 0.010451890528202057, + "fcm_dpo/delta": -0.00904519110918045, + "fcm_dpo/margin": 53.435546875, + "fcm_dpo/q_t": 0.3880341053009033, + "grad_norm": 126.61689758300781, + "learning_rate": 1.1715810961514072e-07, + "logits/chosen": -0.8415927886962891, + "logits/rejected": -0.8401827812194824, + "logps/chosen": -354.4100341796875, + "logps/ref_chosen": -261.5257568359375, + "logps/ref_rejected": -259.39862060546875, + "logps/rejected": -405.7184143066406, + "loss": 4.4717, + "margin_dpo/margin_mean": 53.435546875, + "margin_dpo/margin_std": 92.72853088378906, + "step": 340 + }, + { + "epoch": 0.7141361256544503, + "fcm_dpo/beta": 0.010697471909224987, + "fcm_dpo/delta": 0.08240307867527008, + "fcm_dpo/margin": 40.59882354736328, + "fcm_dpo/q_t": 0.4121650159358978, + "grad_norm": 146.87213134765625, + "learning_rate": 1.1561076868822755e-07, + "logits/chosen": -0.86383056640625, + "logits/rejected": -0.8326124548912048, + "logps/chosen": -426.71337890625, + "logps/ref_chosen": -315.903564453125, + "logps/ref_rejected": -308.02392578125, + "logps/rejected": -459.4324951171875, + "loss": 4.9048, + "margin_dpo/margin_mean": 40.59882354736328, + "margin_dpo/margin_std": 90.46697235107422, + "step": 341 + }, + { + "epoch": 0.7162303664921466, + "fcm_dpo/beta": 0.011284704320132732, + "fcm_dpo/delta": -0.020554201677441597, + "fcm_dpo/margin": 54.822906494140625, + "fcm_dpo/q_t": 0.3675943613052368, + "grad_norm": 101.63287353515625, + "learning_rate": 1.1407063464793965e-07, + "logits/chosen": -0.8525506258010864, + "logits/rejected": -0.8500516414642334, + "logps/chosen": -356.83026123046875, + "logps/ref_chosen": -269.17864990234375, + "logps/ref_rejected": -260.8977355957031, + "logps/rejected": -403.3722229003906, + "loss": 4.0639, + "margin_dpo/margin_mean": 54.822906494140625, + "margin_dpo/margin_std": 71.24502563476562, + "step": 342 + }, + { + "epoch": 0.7183246073298429, + "fcm_dpo/beta": 0.01099632028490305, + "fcm_dpo/delta": 0.047995634377002716, + "fcm_dpo/margin": 50.445194244384766, + "fcm_dpo/q_t": 0.3869495093822479, + "grad_norm": 110.69547271728516, + "learning_rate": 1.125377900869913e-07, + "logits/chosen": -0.8448514938354492, + "logits/rejected": -0.8279154896736145, + "logps/chosen": -402.746826171875, + "logps/ref_chosen": -310.719970703125, + "logps/ref_rejected": -263.5224914550781, + "logps/rejected": -405.9945373535156, + "loss": 4.3061, + "margin_dpo/margin_mean": 50.445194244384766, + "margin_dpo/margin_std": 79.09446716308594, + "step": 343 + }, + { + "epoch": 0.7204188481675393, + "fcm_dpo/beta": 0.011513441801071167, + "fcm_dpo/delta": -0.023599928244948387, + "fcm_dpo/margin": 53.812904357910156, + "fcm_dpo/q_t": 0.3725927770137787, + "grad_norm": 115.21056365966797, + "learning_rate": 1.110123172071844e-07, + "logits/chosen": -0.8441615104675293, + "logits/rejected": -0.8278071880340576, + "logps/chosen": -395.754150390625, + "logps/ref_chosen": -301.7999267578125, + "logps/ref_rejected": -257.9061584472656, + "logps/rejected": -405.67327880859375, + "loss": 4.225, + "margin_dpo/margin_mean": 53.812904357910156, + "margin_dpo/margin_std": 79.05390930175781, + "step": 344 + }, + { + "epoch": 0.7225130890052356, + "fcm_dpo/beta": 0.011425694450736046, + "fcm_dpo/delta": 0.05544426292181015, + "fcm_dpo/margin": 47.61176300048828, + "fcm_dpo/q_t": 0.387326180934906, + "grad_norm": 137.6807098388672, + "learning_rate": 1.09494297815e-07, + "logits/chosen": -0.842475175857544, + "logits/rejected": -0.842012345790863, + "logps/chosen": -375.22137451171875, + "logps/ref_chosen": -283.0184326171875, + "logps/ref_rejected": -266.8457336425781, + "logps/rejected": -406.660400390625, + "loss": 4.2707, + "margin_dpo/margin_mean": 47.61176300048828, + "margin_dpo/margin_std": 67.73701477050781, + "step": 345 + }, + { + "epoch": 0.724607329842932, + "fcm_dpo/beta": 0.011425861157476902, + "fcm_dpo/delta": -0.07231096923351288, + "fcm_dpo/margin": 58.19831848144531, + "fcm_dpo/q_t": 0.36130863428115845, + "grad_norm": 90.98426055908203, + "learning_rate": 1.0798381331721107e-07, + "logits/chosen": -0.9370063543319702, + "logits/rejected": -0.8892075419425964, + "logps/chosen": -366.372802734375, + "logps/ref_chosen": -268.44122314453125, + "logps/ref_rejected": -227.8225860595703, + "logps/rejected": -383.952392578125, + "loss": 4.1066, + "margin_dpo/margin_mean": 58.19831848144531, + "margin_dpo/margin_std": 78.29531860351562, + "step": 346 + }, + { + "epoch": 0.7267015706806282, + "fcm_dpo/beta": 0.010861432179808617, + "fcm_dpo/delta": -0.010385667905211449, + "fcm_dpo/margin": 51.505802154541016, + "fcm_dpo/q_t": 0.380726158618927, + "grad_norm": 98.91201782226562, + "learning_rate": 1.0648094471651722e-07, + "logits/chosen": -0.7857590913772583, + "logits/rejected": -0.8135133981704712, + "logps/chosen": -364.33441162109375, + "logps/ref_chosen": -273.70355224609375, + "logps/ref_rejected": -243.65521240234375, + "logps/rejected": -385.7918395996094, + "loss": 4.2412, + "margin_dpo/margin_mean": 51.505802154541016, + "margin_dpo/margin_std": 73.8308334350586, + "step": 347 + }, + { + "epoch": 0.7287958115183246, + "fcm_dpo/beta": 0.011517523787915707, + "fcm_dpo/delta": 0.09734243154525757, + "fcm_dpo/margin": 43.836822509765625, + "fcm_dpo/q_t": 0.40036576986312866, + "grad_norm": 90.68925476074219, + "learning_rate": 1.0498577260720048e-07, + "logits/chosen": -0.882627010345459, + "logits/rejected": -0.8689060807228088, + "logps/chosen": -374.64874267578125, + "logps/ref_chosen": -285.64141845703125, + "logps/ref_rejected": -265.6270446777344, + "logps/rejected": -398.47119140625, + "loss": 4.4704, + "margin_dpo/margin_mean": 43.836822509765625, + "margin_dpo/margin_std": 75.92671966552734, + "step": 348 + }, + { + "epoch": 0.7308900523560209, + "fcm_dpo/beta": 0.011294200085103512, + "fcm_dpo/delta": -0.10136254876852036, + "fcm_dpo/margin": 61.32181930541992, + "fcm_dpo/q_t": 0.36070722341537476, + "grad_norm": 167.52288818359375, + "learning_rate": 1.0349837717080347e-07, + "logits/chosen": -0.8177285194396973, + "logits/rejected": -0.8121789693832397, + "logps/chosen": -418.723876953125, + "logps/ref_chosen": -328.3175048828125, + "logps/ref_rejected": -292.37872314453125, + "logps/rejected": -444.10687255859375, + "loss": 4.0752, + "margin_dpo/margin_mean": 61.32181930541992, + "margin_dpo/margin_std": 85.29824829101562, + "step": 349 + }, + { + "epoch": 0.7329842931937173, + "fcm_dpo/beta": 0.011082770302891731, + "fcm_dpo/delta": 0.0001247054897248745, + "fcm_dpo/margin": 49.612892150878906, + "fcm_dpo/q_t": 0.38704627752304077, + "grad_norm": 104.57560729980469, + "learning_rate": 1.0201883817182949e-07, + "logits/chosen": -0.8255881071090698, + "logits/rejected": -0.8411324620246887, + "logps/chosen": -392.31982421875, + "logps/ref_chosen": -292.8046569824219, + "logps/ref_rejected": -250.35504150390625, + "logps/rejected": -399.4831237792969, + "loss": 4.4145, + "margin_dpo/margin_mean": 49.61289978027344, + "margin_dpo/margin_std": 82.11981201171875, + "step": 350 + }, + { + "epoch": 0.7350785340314137, + "fcm_dpo/beta": 0.011276098899543285, + "fcm_dpo/delta": 0.053270816802978516, + "fcm_dpo/margin": 36.07026290893555, + "fcm_dpo/q_t": 0.4199068546295166, + "grad_norm": 156.5530242919922, + "learning_rate": 1.0054723495346482e-07, + "logits/chosen": -0.8887529373168945, + "logits/rejected": -0.8778947591781616, + "logps/chosen": -404.2864074707031, + "logps/ref_chosen": -311.8890380859375, + "logps/ref_rejected": -263.59033203125, + "logps/rejected": -392.0579833984375, + "loss": 4.9768, + "margin_dpo/margin_mean": 36.07026290893555, + "margin_dpo/margin_std": 87.96586608886719, + "step": 351 + }, + { + "epoch": 0.7371727748691099, + "fcm_dpo/beta": 0.010786263272166252, + "fcm_dpo/delta": -0.10786393284797668, + "fcm_dpo/margin": 64.68121337890625, + "fcm_dpo/q_t": 0.3563269376754761, + "grad_norm": 107.21913146972656, + "learning_rate": 9.908364643332398e-08, + "logits/chosen": -0.8178911805152893, + "logits/rejected": -0.7894106507301331, + "logps/chosen": -341.0050354003906, + "logps/ref_chosen": -254.9078826904297, + "logps/ref_rejected": -257.1688232421875, + "logps/rejected": -407.94720458984375, + "loss": 3.9965, + "margin_dpo/margin_mean": 64.68122100830078, + "margin_dpo/margin_std": 83.1053466796875, + "step": 352 + }, + { + "epoch": 0.7392670157068063, + "fcm_dpo/beta": 0.01031852513551712, + "fcm_dpo/delta": 0.012895338237285614, + "fcm_dpo/margin": 50.85631561279297, + "fcm_dpo/q_t": 0.3920055627822876, + "grad_norm": 121.84527587890625, + "learning_rate": 9.76281510992176e-08, + "logits/chosen": -0.836536169052124, + "logits/rejected": -0.8306083679199219, + "logps/chosen": -365.62139892578125, + "logps/ref_chosen": -270.3760681152344, + "logps/ref_rejected": -264.65234375, + "logps/rejected": -410.7539367675781, + "loss": 4.4158, + "margin_dpo/margin_mean": 50.85631561279297, + "margin_dpo/margin_std": 82.86323547363281, + "step": 353 + }, + { + "epoch": 0.7413612565445026, + "fcm_dpo/beta": 0.01093815453350544, + "fcm_dpo/delta": 0.10080662369728088, + "fcm_dpo/margin": 37.49406433105469, + "fcm_dpo/q_t": 0.4171503186225891, + "grad_norm": 136.33518981933594, + "learning_rate": 9.618082700494318e-08, + "logits/chosen": -0.8385964632034302, + "logits/rejected": -0.8738152384757996, + "logps/chosen": -354.9613037109375, + "logps/ref_chosen": -257.6485595703125, + "logps/ref_rejected": -246.94203186035156, + "logps/rejected": -381.74884033203125, + "loss": 4.8602, + "margin_dpo/margin_mean": 37.49407196044922, + "margin_dpo/margin_std": 83.91649627685547, + "step": 354 + }, + { + "epoch": 0.743455497382199, + "fcm_dpo/beta": 0.010349645279347897, + "fcm_dpo/delta": -0.12334014475345612, + "fcm_dpo/margin": 62.511470794677734, + "fcm_dpo/q_t": 0.36480429768562317, + "grad_norm": 95.06636047363281, + "learning_rate": 9.474175176609956e-08, + "logits/chosen": -0.8743699193000793, + "logits/rejected": -0.875370979309082, + "logps/chosen": -384.0447692871094, + "logps/ref_chosen": -293.35333251953125, + "logps/ref_rejected": -275.6051940917969, + "logps/rejected": -428.80804443359375, + "loss": 4.1536, + "margin_dpo/margin_mean": 62.511470794677734, + "margin_dpo/margin_std": 87.62345886230469, + "step": 355 + }, + { + "epoch": 0.7455497382198953, + "fcm_dpo/beta": 0.01060514897108078, + "fcm_dpo/delta": 0.06647256016731262, + "fcm_dpo/margin": 40.10423278808594, + "fcm_dpo/q_t": 0.4083283841609955, + "grad_norm": 88.60588836669922, + "learning_rate": 9.331100255592436e-08, + "logits/chosen": -0.796362042427063, + "logits/rejected": -0.8256345391273499, + "logps/chosen": -293.0960388183594, + "logps/ref_chosen": -204.25550842285156, + "logps/ref_rejected": -213.467529296875, + "logps/rejected": -342.41229248046875, + "loss": 4.548, + "margin_dpo/margin_mean": 40.10423278808594, + "margin_dpo/margin_std": 67.21572875976562, + "step": 356 + }, + { + "epoch": 0.7476439790575916, + "fcm_dpo/beta": 0.010486958548426628, + "fcm_dpo/delta": -0.07790210843086243, + "fcm_dpo/margin": 58.63288879394531, + "fcm_dpo/q_t": 0.37552568316459656, + "grad_norm": 98.0359115600586, + "learning_rate": 9.18886561011557e-08, + "logits/chosen": -0.7627823352813721, + "logits/rejected": -0.7633357048034668, + "logps/chosen": -362.4690246582031, + "logps/ref_chosen": -266.3705749511719, + "logps/ref_rejected": -239.04490661621094, + "logps/rejected": -393.7762451171875, + "loss": 4.2021, + "margin_dpo/margin_mean": 58.63289260864258, + "margin_dpo/margin_std": 85.52519226074219, + "step": 357 + }, + { + "epoch": 0.749738219895288, + "fcm_dpo/beta": 0.009996353648602962, + "fcm_dpo/delta": -0.06562351435422897, + "fcm_dpo/margin": 66.09445190429688, + "fcm_dpo/q_t": 0.3610166311264038, + "grad_norm": 88.50709533691406, + "learning_rate": 9.047478867791731e-08, + "logits/chosen": -0.8669772148132324, + "logits/rejected": -0.8496595621109009, + "logps/chosen": -382.9401550292969, + "logps/ref_chosen": -299.1474609375, + "logps/ref_rejected": -257.2531433105469, + "logps/rejected": -407.1402587890625, + "loss": 4.0085, + "margin_dpo/margin_mean": 66.09444427490234, + "margin_dpo/margin_std": 85.0299072265625, + "step": 358 + }, + { + "epoch": 0.7518324607329843, + "fcm_dpo/beta": 0.010169594548642635, + "fcm_dpo/delta": 0.03530079498887062, + "fcm_dpo/margin": 55.34699249267578, + "fcm_dpo/q_t": 0.3793519139289856, + "grad_norm": 106.86293029785156, + "learning_rate": 8.906947610762825e-08, + "logits/chosen": -0.8287184238433838, + "logits/rejected": -0.8446385860443115, + "logps/chosen": -390.8289794921875, + "logps/ref_chosen": -302.99786376953125, + "logps/ref_rejected": -260.4137268066406, + "logps/rejected": -403.5918273925781, + "loss": 4.1275, + "margin_dpo/margin_mean": 55.34699249267578, + "margin_dpo/margin_std": 72.260009765625, + "step": 359 + }, + { + "epoch": 0.7539267015706806, + "fcm_dpo/beta": 0.010196023620665073, + "fcm_dpo/delta": 0.05344226956367493, + "fcm_dpo/margin": 48.37961196899414, + "fcm_dpo/q_t": 0.39275315403938293, + "grad_norm": 115.53006744384766, + "learning_rate": 8.76727937529367e-08, + "logits/chosen": -0.8422183394432068, + "logits/rejected": -0.8362429141998291, + "logps/chosen": -404.19610595703125, + "logps/ref_chosen": -309.6114501953125, + "logps/ref_rejected": -256.64031982421875, + "logps/rejected": -399.6045837402344, + "loss": 4.4924, + "margin_dpo/margin_mean": 48.37961196899414, + "margin_dpo/margin_std": 80.97713470458984, + "step": 360 + }, + { + "epoch": 0.7560209424083769, + "fcm_dpo/beta": 0.010226656682789326, + "fcm_dpo/delta": -0.05834663659334183, + "fcm_dpo/margin": 64.01289367675781, + "fcm_dpo/q_t": 0.36634212732315063, + "grad_norm": 100.24212646484375, + "learning_rate": 8.628481651367875e-08, + "logits/chosen": -0.798484742641449, + "logits/rejected": -0.7778838276863098, + "logps/chosen": -340.5452575683594, + "logps/ref_chosen": -263.3797607421875, + "logps/ref_rejected": -271.18157958984375, + "logps/rejected": -412.3599548339844, + "loss": 4.1178, + "margin_dpo/margin_mean": 64.01289367675781, + "margin_dpo/margin_std": 91.16770935058594, + "step": 361 + }, + { + "epoch": 0.7581151832460733, + "fcm_dpo/beta": 0.009908447042107582, + "fcm_dpo/delta": 0.03887121379375458, + "fcm_dpo/margin": 56.83973693847656, + "fcm_dpo/q_t": 0.3782539367675781, + "grad_norm": 90.40959930419922, + "learning_rate": 8.490561882286135e-08, + "logits/chosen": -0.8111223578453064, + "logits/rejected": -0.8046758770942688, + "logps/chosen": -389.388916015625, + "logps/ref_chosen": -303.2583923339844, + "logps/ref_rejected": -243.22891235351562, + "logps/rejected": -386.1991271972656, + "loss": 4.0886, + "margin_dpo/margin_mean": 56.83973693847656, + "margin_dpo/margin_std": 72.43896484375, + "step": 362 + }, + { + "epoch": 0.7602094240837697, + "fcm_dpo/beta": 0.010420668870210648, + "fcm_dpo/delta": 0.029644204303622246, + "fcm_dpo/margin": 54.629852294921875, + "fcm_dpo/q_t": 0.38353899121284485, + "grad_norm": 97.74794006347656, + "learning_rate": 8.353527464267104e-08, + "logits/chosen": -0.8362611532211304, + "logits/rejected": -0.7902975678443909, + "logps/chosen": -395.0887451171875, + "logps/ref_chosen": -303.34722900390625, + "logps/ref_rejected": -262.05419921875, + "logps/rejected": -408.4255676269531, + "loss": 4.3097, + "margin_dpo/margin_mean": 54.62985610961914, + "margin_dpo/margin_std": 84.8177261352539, + "step": 363 + }, + { + "epoch": 0.762303664921466, + "fcm_dpo/beta": 0.010761250741779804, + "fcm_dpo/delta": 0.1065862700343132, + "fcm_dpo/margin": 46.43891143798828, + "fcm_dpo/q_t": 0.3989133834838867, + "grad_norm": 97.99282836914062, + "learning_rate": 8.217385746050742e-08, + "logits/chosen": -0.806189239025116, + "logits/rejected": -0.8205310702323914, + "logps/chosen": -395.6390075683594, + "logps/ref_chosen": -285.54376220703125, + "logps/ref_rejected": -284.84619140625, + "logps/rejected": -441.38031005859375, + "loss": 4.6575, + "margin_dpo/margin_mean": 46.43891143798828, + "margin_dpo/margin_std": 89.2318115234375, + "step": 364 + }, + { + "epoch": 0.7643979057591623, + "fcm_dpo/beta": 0.011114663444459438, + "fcm_dpo/delta": -0.062224358320236206, + "fcm_dpo/margin": 54.65919876098633, + "fcm_dpo/q_t": 0.37894606590270996, + "grad_norm": 99.08690643310547, + "learning_rate": 8.082144028504231e-08, + "logits/chosen": -0.8273904323577881, + "logits/rejected": -0.8326528668403625, + "logps/chosen": -370.6837158203125, + "logps/ref_chosen": -274.7878112792969, + "logps/ref_rejected": -256.5738220214844, + "logps/rejected": -407.1288757324219, + "loss": 4.2368, + "margin_dpo/margin_mean": 54.6591911315918, + "margin_dpo/margin_std": 82.35843658447266, + "step": 365 + }, + { + "epoch": 0.7664921465968586, + "fcm_dpo/beta": 0.010609567165374756, + "fcm_dpo/delta": -0.053685709834098816, + "fcm_dpo/margin": 61.24087905883789, + "fcm_dpo/q_t": 0.3654269874095917, + "grad_norm": 92.26556396484375, + "learning_rate": 7.947809564230445e-08, + "logits/chosen": -0.7945237159729004, + "logits/rejected": -0.8086446523666382, + "logps/chosen": -376.56878662109375, + "logps/ref_chosen": -286.6496276855469, + "logps/ref_rejected": -251.97140502929688, + "logps/rejected": -403.1314697265625, + "loss": 4.0641, + "margin_dpo/margin_mean": 61.24087905883789, + "margin_dpo/margin_std": 84.4163818359375, + "step": 366 + }, + { + "epoch": 0.768586387434555, + "fcm_dpo/beta": 0.009982116520404816, + "fcm_dpo/delta": -0.006225086748600006, + "fcm_dpo/margin": 60.51777648925781, + "fcm_dpo/q_t": 0.3717145025730133, + "grad_norm": 107.34187316894531, + "learning_rate": 7.814389557179016e-08, + "logits/chosen": -0.7962571382522583, + "logits/rejected": -0.7791531085968018, + "logps/chosen": -392.7152099609375, + "logps/ref_chosen": -301.9449768066406, + "logps/ref_rejected": -265.5677185058594, + "logps/rejected": -416.85565185546875, + "loss": 4.0549, + "margin_dpo/margin_mean": 60.51777648925781, + "margin_dpo/margin_std": 78.20709228515625, + "step": 367 + }, + { + "epoch": 0.7706806282722513, + "fcm_dpo/beta": 0.009898173622786999, + "fcm_dpo/delta": -0.10373516380786896, + "fcm_dpo/margin": 70.26689147949219, + "fcm_dpo/q_t": 0.3502688705921173, + "grad_norm": 72.27176666259766, + "learning_rate": 7.681891162260015e-08, + "logits/chosen": -0.7817418575286865, + "logits/rejected": -0.7951399683952332, + "logps/chosen": -379.85211181640625, + "logps/ref_chosen": -294.62652587890625, + "logps/ref_rejected": -258.7628479003906, + "logps/rejected": -414.2553405761719, + "loss": 3.7321, + "margin_dpo/margin_mean": 70.26689910888672, + "margin_dpo/margin_std": 73.98336791992188, + "step": 368 + }, + { + "epoch": 0.7727748691099476, + "fcm_dpo/beta": 0.009732791222631931, + "fcm_dpo/delta": 0.07320413738489151, + "fcm_dpo/margin": 54.49217987060547, + "fcm_dpo/q_t": 0.38609111309051514, + "grad_norm": 93.12295532226562, + "learning_rate": 7.550321484960251e-08, + "logits/chosen": -0.8595657348632812, + "logits/rejected": -0.8424580097198486, + "logps/chosen": -375.50518798828125, + "logps/ref_chosen": -282.5057373046875, + "logps/ref_rejected": -266.41607666015625, + "logps/rejected": -413.9076843261719, + "loss": 4.2228, + "margin_dpo/margin_mean": 54.49217987060547, + "margin_dpo/margin_std": 75.60814666748047, + "step": 369 + }, + { + "epoch": 0.774869109947644, + "fcm_dpo/beta": 0.009743990376591682, + "fcm_dpo/delta": -0.03128061443567276, + "fcm_dpo/margin": 64.41328430175781, + "fcm_dpo/q_t": 0.36713510751724243, + "grad_norm": 76.50689697265625, + "learning_rate": 7.419687580962222e-08, + "logits/chosen": -0.8467559218406677, + "logits/rejected": -0.8696060180664062, + "logps/chosen": -336.12493896484375, + "logps/ref_chosen": -251.00640869140625, + "logps/ref_rejected": -238.12542724609375, + "logps/rejected": -387.6571960449219, + "loss": 4.0569, + "margin_dpo/margin_mean": 64.41327667236328, + "margin_dpo/margin_std": 86.21026611328125, + "step": 370 + }, + { + "epoch": 0.7769633507853403, + "fcm_dpo/beta": 0.010253066197037697, + "fcm_dpo/delta": 0.0842670351266861, + "fcm_dpo/margin": 50.67913818359375, + "fcm_dpo/q_t": 0.3899438977241516, + "grad_norm": 123.32413482666016, + "learning_rate": 7.289996455765748e-08, + "logits/chosen": -0.7954655885696411, + "logits/rejected": -0.7937295436859131, + "logps/chosen": -393.655029296875, + "logps/ref_chosen": -296.6591491699219, + "logps/ref_rejected": -251.14675903320312, + "logps/rejected": -398.8217468261719, + "loss": 4.3229, + "margin_dpo/margin_mean": 50.679134368896484, + "margin_dpo/margin_std": 76.93683624267578, + "step": 371 + }, + { + "epoch": 0.7790575916230367, + "fcm_dpo/beta": 0.010081680491566658, + "fcm_dpo/delta": -0.051262035965919495, + "fcm_dpo/margin": 63.9815788269043, + "fcm_dpo/q_t": 0.365226149559021, + "grad_norm": 83.84464263916016, + "learning_rate": 7.161255064312283e-08, + "logits/chosen": -0.7702327370643616, + "logits/rejected": -0.7675243020057678, + "logps/chosen": -424.36273193359375, + "logps/ref_chosen": -331.3714599609375, + "logps/ref_rejected": -285.56805419921875, + "logps/rejected": -442.5409240722656, + "loss": 4.0537, + "margin_dpo/margin_mean": 63.98158645629883, + "margin_dpo/margin_std": 84.29653930664062, + "step": 372 + }, + { + "epoch": 0.7811518324607329, + "fcm_dpo/beta": 0.009867929853498936, + "fcm_dpo/delta": -0.005473626311868429, + "fcm_dpo/margin": 61.17123031616211, + "fcm_dpo/q_t": 0.3673017919063568, + "grad_norm": 91.18738555908203, + "learning_rate": 7.033470310611945e-08, + "logits/chosen": -0.8663382530212402, + "logits/rejected": -0.843439519405365, + "logps/chosen": -405.7951354980469, + "logps/ref_chosen": -321.9429931640625, + "logps/ref_rejected": -271.2288513183594, + "logps/rejected": -416.25225830078125, + "loss": 4.0013, + "margin_dpo/margin_mean": 61.171226501464844, + "margin_dpo/margin_std": 72.36864471435547, + "step": 373 + }, + { + "epoch": 0.7832460732984293, + "fcm_dpo/beta": 0.010404913686215878, + "fcm_dpo/delta": 0.08060853183269501, + "fcm_dpo/margin": 50.20440673828125, + "fcm_dpo/q_t": 0.39187973737716675, + "grad_norm": 74.0364990234375, + "learning_rate": 6.906649047373245e-08, + "logits/chosen": -0.8531290292739868, + "logits/rejected": -0.8525895476341248, + "logps/chosen": -410.4658203125, + "logps/ref_chosen": -319.1685485839844, + "logps/ref_rejected": -284.6263732910156, + "logps/rejected": -426.1280212402344, + "loss": 4.3572, + "margin_dpo/margin_mean": 50.204410552978516, + "margin_dpo/margin_std": 79.54742431640625, + "step": 374 + }, + { + "epoch": 0.7853403141361257, + "fcm_dpo/beta": 0.010893258266150951, + "fcm_dpo/delta": 0.029163816943764687, + "fcm_dpo/margin": 47.64557647705078, + "fcm_dpo/q_t": 0.39366090297698975, + "grad_norm": 104.41280364990234, + "learning_rate": 6.780798075635675e-08, + "logits/chosen": -0.8502262830734253, + "logits/rejected": -0.8328761458396912, + "logps/chosen": -412.74224853515625, + "logps/ref_chosen": -314.87579345703125, + "logps/ref_rejected": -259.1965026855469, + "logps/rejected": -404.70849609375, + "loss": 4.457, + "margin_dpo/margin_mean": 47.64557647705078, + "margin_dpo/margin_std": 81.04498291015625, + "step": 375 + }, + { + "epoch": 0.787434554973822, + "fcm_dpo/beta": 0.010992622934281826, + "fcm_dpo/delta": -0.005566142499446869, + "fcm_dpo/margin": 54.897132873535156, + "fcm_dpo/q_t": 0.3781394064426422, + "grad_norm": 112.78710174560547, + "learning_rate": 6.655924144404906e-08, + "logits/chosen": -0.8241918087005615, + "logits/rejected": -0.832420825958252, + "logps/chosen": -385.7311096191406, + "logps/ref_chosen": -287.6732482910156, + "logps/ref_rejected": -256.6697082519531, + "logps/rejected": -409.6247253417969, + "loss": 4.2815, + "margin_dpo/margin_mean": 54.897132873535156, + "margin_dpo/margin_std": 85.11792755126953, + "step": 376 + }, + { + "epoch": 0.7895287958115184, + "fcm_dpo/beta": 0.01128990575671196, + "fcm_dpo/delta": 0.05416107177734375, + "fcm_dpo/margin": 38.42709732055664, + "fcm_dpo/q_t": 0.41005009412765503, + "grad_norm": 113.04798889160156, + "learning_rate": 6.532033950290885e-08, + "logits/chosen": -0.8132824897766113, + "logits/rejected": -0.8157401084899902, + "logps/chosen": -409.5943298339844, + "logps/ref_chosen": -305.261474609375, + "logps/ref_rejected": -271.8887023925781, + "logps/rejected": -414.6486511230469, + "loss": 4.8146, + "margin_dpo/margin_mean": 38.427101135253906, + "margin_dpo/margin_std": 82.6573486328125, + "step": 377 + }, + { + "epoch": 0.7916230366492146, + "fcm_dpo/beta": 0.011431505903601646, + "fcm_dpo/delta": 0.02444280870258808, + "fcm_dpo/margin": 46.53923416137695, + "fcm_dpo/q_t": 0.39048752188682556, + "grad_norm": 110.65091705322266, + "learning_rate": 6.409134137148736e-08, + "logits/chosen": -0.8158414363861084, + "logits/rejected": -0.8029335737228394, + "logps/chosen": -378.9710388183594, + "logps/ref_chosen": -281.5295715332031, + "logps/ref_rejected": -296.980224609375, + "logps/rejected": -440.9609069824219, + "loss": 4.3736, + "margin_dpo/margin_mean": 46.53923416137695, + "margin_dpo/margin_std": 74.37672424316406, + "step": 378 + }, + { + "epoch": 0.793717277486911, + "fcm_dpo/beta": 0.011517210863530636, + "fcm_dpo/delta": 0.008636513724923134, + "fcm_dpo/margin": 51.24773406982422, + "fcm_dpo/q_t": 0.38045018911361694, + "grad_norm": 115.9316635131836, + "learning_rate": 6.28723129572247e-08, + "logits/chosen": -0.8725168704986572, + "logits/rejected": -0.8533939123153687, + "logps/chosen": -355.72607421875, + "logps/ref_chosen": -265.0807800292969, + "logps/ref_rejected": -230.58932495117188, + "logps/rejected": -372.4823303222656, + "loss": 4.336, + "margin_dpo/margin_mean": 51.247737884521484, + "margin_dpo/margin_std": 82.44475555419922, + "step": 379 + }, + { + "epoch": 0.7958115183246073, + "fcm_dpo/beta": 0.011387725360691547, + "fcm_dpo/delta": -0.07844444364309311, + "fcm_dpo/margin": 53.415035247802734, + "fcm_dpo/q_t": 0.37550121545791626, + "grad_norm": 118.78459167480469, + "learning_rate": 6.166331963291519e-08, + "logits/chosen": -0.8518512845039368, + "logits/rejected": -0.8342669010162354, + "logps/chosen": -403.8897399902344, + "logps/ref_chosen": -305.90838623046875, + "logps/ref_rejected": -286.5906677246094, + "logps/rejected": -437.987060546875, + "loss": 4.2305, + "margin_dpo/margin_mean": 53.4150390625, + "margin_dpo/margin_std": 78.6050033569336, + "step": 380 + }, + { + "epoch": 0.7979057591623037, + "fcm_dpo/beta": 0.011148151010274887, + "fcm_dpo/delta": -0.023326825350522995, + "fcm_dpo/margin": 55.73320388793945, + "fcm_dpo/q_t": 0.3738594651222229, + "grad_norm": 100.60095977783203, + "learning_rate": 6.046442623320145e-08, + "logits/chosen": -0.8115476369857788, + "logits/rejected": -0.7750450372695923, + "logps/chosen": -346.8482666015625, + "logps/ref_chosen": -252.87066650390625, + "logps/ref_rejected": -261.1927490234375, + "logps/rejected": -410.9035339355469, + "loss": 4.1363, + "margin_dpo/margin_mean": 55.73320007324219, + "margin_dpo/margin_std": 79.52117919921875, + "step": 381 + }, + { + "epoch": 0.8, + "fcm_dpo/beta": 0.010607222095131874, + "fcm_dpo/delta": -0.08224906027317047, + "fcm_dpo/margin": 63.71092987060547, + "fcm_dpo/q_t": 0.35713696479797363, + "grad_norm": 90.26844024658203, + "learning_rate": 5.9275697051098275e-08, + "logits/chosen": -0.8479326963424683, + "logits/rejected": -0.8441295623779297, + "logps/chosen": -379.1060791015625, + "logps/ref_chosen": -289.2114562988281, + "logps/ref_rejected": -278.45751953125, + "logps/rejected": -432.06298828125, + "loss": 3.9126, + "margin_dpo/margin_mean": 63.71092224121094, + "margin_dpo/margin_std": 76.458740234375, + "step": 382 + }, + { + "epoch": 0.8020942408376963, + "fcm_dpo/beta": 0.010085361078381538, + "fcm_dpo/delta": -0.03180404752492905, + "fcm_dpo/margin": 57.359859466552734, + "fcm_dpo/q_t": 0.37756213545799255, + "grad_norm": 110.38035583496094, + "learning_rate": 5.809719583454414e-08, + "logits/chosen": -0.8326891660690308, + "logits/rejected": -0.8139215111732483, + "logps/chosen": -362.66534423828125, + "logps/ref_chosen": -273.630859375, + "logps/ref_rejected": -261.44024658203125, + "logps/rejected": -407.83465576171875, + "loss": 4.187, + "margin_dpo/margin_mean": 57.359867095947266, + "margin_dpo/margin_std": 80.45777893066406, + "step": 383 + }, + { + "epoch": 0.8041884816753927, + "fcm_dpo/beta": 0.010192757472395897, + "fcm_dpo/delta": 0.057503946125507355, + "fcm_dpo/margin": 48.359901428222656, + "fcm_dpo/q_t": 0.3959079384803772, + "grad_norm": 82.36161804199219, + "learning_rate": 5.6928985782982524e-08, + "logits/chosen": -0.8383417725563049, + "logits/rejected": -0.8363715410232544, + "logps/chosen": -369.3538818359375, + "logps/ref_chosen": -274.5699462890625, + "logps/ref_rejected": -285.8253479003906, + "logps/rejected": -428.9691467285156, + "loss": 4.4124, + "margin_dpo/margin_mean": 48.35989761352539, + "margin_dpo/margin_std": 78.35002899169922, + "step": 384 + }, + { + "epoch": 0.806282722513089, + "fcm_dpo/beta": 0.010394207201898098, + "fcm_dpo/delta": 0.002360312268137932, + "fcm_dpo/margin": 52.75077819824219, + "fcm_dpo/q_t": 0.3835112154483795, + "grad_norm": 88.78260803222656, + "learning_rate": 5.57711295439732e-08, + "logits/chosen": -0.7930533289909363, + "logits/rejected": -0.794459342956543, + "logps/chosen": -380.3506774902344, + "logps/ref_chosen": -284.150634765625, + "logps/ref_rejected": -244.87921142578125, + "logps/rejected": -393.8300476074219, + "loss": 4.204, + "margin_dpo/margin_mean": 52.75077819824219, + "margin_dpo/margin_std": 73.82457733154297, + "step": 385 + }, + { + "epoch": 0.8083769633507853, + "fcm_dpo/beta": 0.009621590375900269, + "fcm_dpo/delta": -0.12054447084665298, + "fcm_dpo/margin": 67.64607238769531, + "fcm_dpo/q_t": 0.35985732078552246, + "grad_norm": 86.24301147460938, + "learning_rate": 5.4623689209832484e-08, + "logits/chosen": -0.785068929195404, + "logits/rejected": -0.7856448888778687, + "logps/chosen": -407.9813537597656, + "logps/ref_chosen": -320.1762390136719, + "logps/ref_rejected": -302.05023193359375, + "logps/rejected": -457.50140380859375, + "loss": 3.8964, + "margin_dpo/margin_mean": 67.64607238769531, + "margin_dpo/margin_std": 75.94105529785156, + "step": 386 + }, + { + "epoch": 0.8104712041884817, + "fcm_dpo/beta": 0.009473450481891632, + "fcm_dpo/delta": 0.00044431351125240326, + "fcm_dpo/margin": 57.35693359375, + "fcm_dpo/q_t": 0.3816065788269043, + "grad_norm": 81.3988037109375, + "learning_rate": 5.3486726314303175e-08, + "logits/chosen": -0.8247092962265015, + "logits/rejected": -0.8297352194786072, + "logps/chosen": -366.90478515625, + "logps/ref_chosen": -272.2801513671875, + "logps/ref_rejected": -265.1615905761719, + "logps/rejected": -417.14312744140625, + "loss": 4.2071, + "margin_dpo/margin_mean": 57.356929779052734, + "margin_dpo/margin_std": 78.60884094238281, + "step": 387 + }, + { + "epoch": 0.812565445026178, + "fcm_dpo/beta": 0.009732890874147415, + "fcm_dpo/delta": 0.09739725291728973, + "fcm_dpo/margin": 41.571998596191406, + "fcm_dpo/q_t": 0.41511738300323486, + "grad_norm": 105.03797149658203, + "learning_rate": 5.2360301829254745e-08, + "logits/chosen": -0.8036607503890991, + "logits/rejected": -0.7966702580451965, + "logps/chosen": -378.40643310546875, + "logps/ref_chosen": -272.5313415527344, + "logps/ref_rejected": -239.55735778808594, + "logps/rejected": -387.00445556640625, + "loss": 4.7304, + "margin_dpo/margin_mean": 41.571998596191406, + "margin_dpo/margin_std": 82.30770874023438, + "step": 388 + }, + { + "epoch": 0.8146596858638744, + "fcm_dpo/beta": 0.009908687323331833, + "fcm_dpo/delta": -0.03899161145091057, + "fcm_dpo/margin": 52.967166900634766, + "fcm_dpo/q_t": 0.3908618688583374, + "grad_norm": 86.0737075805664, + "learning_rate": 5.1244476161413806e-08, + "logits/chosen": -0.8428322076797485, + "logits/rejected": -0.8418431878089905, + "logps/chosen": -380.26837158203125, + "logps/ref_chosen": -281.0892639160156, + "logps/ref_rejected": -246.50045776367188, + "logps/rejected": -398.646728515625, + "loss": 4.3874, + "margin_dpo/margin_mean": 52.96717071533203, + "margin_dpo/margin_std": 83.29864501953125, + "step": 389 + }, + { + "epoch": 0.8167539267015707, + "fcm_dpo/beta": 0.010237889364361763, + "fcm_dpo/delta": 0.0414692722260952, + "fcm_dpo/margin": 54.53743362426758, + "fcm_dpo/q_t": 0.3818510174751282, + "grad_norm": 83.15040588378906, + "learning_rate": 5.013930914912476e-08, + "logits/chosen": -0.852079451084137, + "logits/rejected": -0.8583500981330872, + "logps/chosen": -382.2542419433594, + "logps/ref_chosen": -283.98748779296875, + "logps/ref_rejected": -283.465087890625, + "logps/rejected": -436.26922607421875, + "loss": 4.2404, + "margin_dpo/margin_mean": 54.53743362426758, + "margin_dpo/margin_std": 78.8434066772461, + "step": 390 + }, + { + "epoch": 0.818848167539267, + "fcm_dpo/beta": 0.009870692156255245, + "fcm_dpo/delta": -0.02596093714237213, + "fcm_dpo/margin": 57.44294738769531, + "fcm_dpo/q_t": 0.38027477264404297, + "grad_norm": 101.38391876220703, + "learning_rate": 4.904486005914027e-08, + "logits/chosen": -0.7972782850265503, + "logits/rejected": -0.7920839190483093, + "logps/chosen": -389.5296325683594, + "logps/ref_chosen": -283.86138916015625, + "logps/ref_rejected": -263.5093688964844, + "logps/rejected": -426.6205749511719, + "loss": 4.1996, + "margin_dpo/margin_mean": 57.44294357299805, + "margin_dpo/margin_std": 80.77877807617188, + "step": 391 + }, + { + "epoch": 0.8209424083769633, + "fcm_dpo/beta": 0.009380877017974854, + "fcm_dpo/delta": -0.06458516418933868, + "fcm_dpo/margin": 69.83071899414062, + "fcm_dpo/q_t": 0.3584578335285187, + "grad_norm": 85.31986236572266, + "learning_rate": 4.796118758344353e-08, + "logits/chosen": -0.7884517312049866, + "logits/rejected": -0.8135141730308533, + "logps/chosen": -403.9764099121094, + "logps/ref_chosen": -310.070068359375, + "logps/ref_rejected": -252.89817810058594, + "logps/rejected": -416.63519287109375, + "loss": 3.8778, + "margin_dpo/margin_mean": 69.83071899414062, + "margin_dpo/margin_std": 76.77499389648438, + "step": 392 + }, + { + "epoch": 0.8230366492146597, + "fcm_dpo/beta": 0.010007185861468315, + "fcm_dpo/delta": 0.061848465353250504, + "fcm_dpo/margin": 53.800540924072266, + "fcm_dpo/q_t": 0.3861052393913269, + "grad_norm": 114.60466766357422, + "learning_rate": 4.688834983610082e-08, + "logits/chosen": -0.8373547792434692, + "logits/rejected": -0.8279107213020325, + "logps/chosen": -378.2579040527344, + "logps/ref_chosen": -286.7156677246094, + "logps/ref_rejected": -230.00357055664062, + "logps/rejected": -375.34637451171875, + "loss": 4.2635, + "margin_dpo/margin_mean": 53.800537109375, + "margin_dpo/margin_std": 79.83255767822266, + "step": 393 + }, + { + "epoch": 0.8251308900523561, + "fcm_dpo/beta": 0.010027028620243073, + "fcm_dpo/delta": 0.04232503101229668, + "fcm_dpo/margin": 49.44554901123047, + "fcm_dpo/q_t": 0.39943477511405945, + "grad_norm": 78.94566345214844, + "learning_rate": 4.582640435014459e-08, + "logits/chosen": -0.8648529052734375, + "logits/rejected": -0.8650112152099609, + "logps/chosen": -419.1053161621094, + "logps/ref_chosen": -325.9934387207031, + "logps/ref_rejected": -317.42706298828125, + "logps/rejected": -459.9844970703125, + "loss": 4.4582, + "margin_dpo/margin_mean": 49.4455451965332, + "margin_dpo/margin_std": 82.50776672363281, + "step": 394 + }, + { + "epoch": 0.8272251308900523, + "fcm_dpo/beta": 0.010306437499821186, + "fcm_dpo/delta": -0.029076773673295975, + "fcm_dpo/margin": 60.80992889404297, + "fcm_dpo/q_t": 0.37129712104797363, + "grad_norm": 75.36946868896484, + "learning_rate": 4.477540807448832e-08, + "logits/chosen": -0.8019086122512817, + "logits/rejected": -0.8114342093467712, + "logps/chosen": -360.0174560546875, + "logps/ref_chosen": -268.90081787109375, + "logps/ref_rejected": -272.85809326171875, + "logps/rejected": -424.7846984863281, + "loss": 4.0261, + "margin_dpo/margin_mean": 60.8099250793457, + "margin_dpo/margin_std": 79.55804443359375, + "step": 395 + }, + { + "epoch": 0.8293193717277487, + "fcm_dpo/beta": 0.010009893216192722, + "fcm_dpo/delta": -0.016297191381454468, + "fcm_dpo/margin": 55.63848114013672, + "fcm_dpo/q_t": 0.3808242976665497, + "grad_norm": 90.71900177001953, + "learning_rate": 4.373541737087263e-08, + "logits/chosen": -0.8296109437942505, + "logits/rejected": -0.8163138628005981, + "logps/chosen": -384.39410400390625, + "logps/ref_chosen": -291.19830322265625, + "logps/ref_rejected": -253.2803955078125, + "logps/rejected": -402.11468505859375, + "loss": 4.1991, + "margin_dpo/margin_mean": 55.63848114013672, + "margin_dpo/margin_std": 76.70056915283203, + "step": 396 + }, + { + "epoch": 0.831413612565445, + "fcm_dpo/beta": 0.009893465787172318, + "fcm_dpo/delta": -0.023841019719839096, + "fcm_dpo/margin": 49.24290466308594, + "fcm_dpo/q_t": 0.39738088846206665, + "grad_norm": 90.27240753173828, + "learning_rate": 4.270648801084295e-08, + "logits/chosen": -0.8341606259346008, + "logits/rejected": -0.8116894960403442, + "logps/chosen": -400.8775939941406, + "logps/ref_chosen": -309.8224182128906, + "logps/ref_rejected": -291.9057922363281, + "logps/rejected": -432.20391845703125, + "loss": 4.5174, + "margin_dpo/margin_mean": 49.24290466308594, + "margin_dpo/margin_std": 83.20286560058594, + "step": 397 + }, + { + "epoch": 0.8335078534031414, + "fcm_dpo/beta": 0.009832684881985188, + "fcm_dpo/delta": 0.07155661284923553, + "fcm_dpo/margin": 46.61724853515625, + "fcm_dpo/q_t": 0.40298062562942505, + "grad_norm": 107.44989776611328, + "learning_rate": 4.168867517275806e-08, + "logits/chosen": -0.7414498925209045, + "logits/rejected": -0.7821962833404541, + "logps/chosen": -398.432861328125, + "logps/ref_chosen": -297.8135070800781, + "logps/ref_rejected": -270.5025634765625, + "logps/rejected": -417.7391662597656, + "loss": 4.726, + "margin_dpo/margin_mean": 46.61724853515625, + "margin_dpo/margin_std": 91.47262573242188, + "step": 398 + }, + { + "epoch": 0.8356020942408376, + "fcm_dpo/beta": 0.010510783642530441, + "fcm_dpo/delta": 0.05873828008770943, + "fcm_dpo/margin": 51.76239776611328, + "fcm_dpo/q_t": 0.38723278045654297, + "grad_norm": 91.62894439697266, + "learning_rate": 4.0682033438831584e-08, + "logits/chosen": -0.8432673811912537, + "logits/rejected": -0.80589359998703, + "logps/chosen": -392.64324951171875, + "logps/ref_chosen": -292.8467712402344, + "logps/ref_rejected": -268.3638916015625, + "logps/rejected": -419.9228210449219, + "loss": 4.3291, + "margin_dpo/margin_mean": 51.76239776611328, + "margin_dpo/margin_std": 81.01602172851562, + "step": 399 + }, + { + "epoch": 0.837696335078534, + "fcm_dpo/beta": 0.010746605694293976, + "fcm_dpo/delta": 0.025092536583542824, + "fcm_dpo/margin": 53.3397331237793, + "fcm_dpo/q_t": 0.3807898461818695, + "grad_norm": 134.78067016601562, + "learning_rate": 3.968661679220467e-08, + "logits/chosen": -0.8801178932189941, + "logits/rejected": -0.8791629672050476, + "logps/chosen": -358.38555908203125, + "logps/ref_chosen": -263.6763916015625, + "logps/ref_rejected": -258.67266845703125, + "logps/rejected": -406.7215881347656, + "loss": 4.3167, + "margin_dpo/margin_mean": 53.3397331237793, + "margin_dpo/margin_std": 79.3423080444336, + "step": 400 + }, + { + "epoch": 0.837696335078534, + "eval_fcm_dpo/beta": 0.011007222346961498, + "eval_logits/chosen": -0.840668797492981, + "eval_logits/rejected": -0.8345889449119568, + "eval_logps/chosen": -383.9891357421875, + "eval_logps/ref_chosen": -287.8267517089844, + "eval_logps/ref_rejected": -266.9313659667969, + "eval_logps/rejected": -417.3312072753906, + "eval_loss": 0.5351805090904236, + "eval_margin_dpo/margin_mean": 54.237510681152344, + "eval_margin_dpo/margin_std": 83.07901763916016, + "eval_runtime": 81.6128, + "eval_samples_per_second": 24.506, + "eval_steps_per_second": 1.532, + "step": 400 + }, + { + "epoch": 0.8397905759162304, + "fcm_dpo/beta": 0.01083466224372387, + "fcm_dpo/delta": -0.03301737830042839, + "fcm_dpo/margin": 58.17015075683594, + "fcm_dpo/q_t": 0.3694632053375244, + "grad_norm": 130.7917022705078, + "learning_rate": 3.8702478614051345e-08, + "logits/chosen": -0.8163310289382935, + "logits/rejected": -0.8166416883468628, + "logps/chosen": -411.48193359375, + "logps/ref_chosen": -318.2853088378906, + "logps/ref_rejected": -293.75225830078125, + "logps/rejected": -445.11895751953125, + "loss": 4.0959, + "margin_dpo/margin_mean": 58.1701545715332, + "margin_dpo/margin_std": 81.16681671142578, + "step": 401 + }, + { + "epoch": 0.8418848167539267, + "fcm_dpo/beta": 0.010784516111016273, + "fcm_dpo/delta": 0.002740806434303522, + "fcm_dpo/margin": 55.34971237182617, + "fcm_dpo/q_t": 0.37727218866348267, + "grad_norm": 109.63217163085938, + "learning_rate": 3.772967168071517e-08, + "logits/chosen": -0.8767110705375671, + "logits/rejected": -0.8513585329055786, + "logps/chosen": -398.0880126953125, + "logps/ref_chosen": -309.4278564453125, + "logps/ref_rejected": -282.0279846191406, + "logps/rejected": -426.037841796875, + "loss": 4.1861, + "margin_dpo/margin_mean": 55.34970474243164, + "margin_dpo/margin_std": 82.14326477050781, + "step": 402 + }, + { + "epoch": 0.8439790575916231, + "fcm_dpo/beta": 0.010111565701663494, + "fcm_dpo/delta": -0.1565774530172348, + "fcm_dpo/margin": 73.72638702392578, + "fcm_dpo/q_t": 0.34308868646621704, + "grad_norm": 77.10204315185547, + "learning_rate": 3.676824816087978e-08, + "logits/chosen": -0.8601398468017578, + "logits/rejected": -0.8417026996612549, + "logps/chosen": -399.95440673828125, + "logps/ref_chosen": -309.0284729003906, + "logps/ref_rejected": -272.9622497558594, + "logps/rejected": -437.61456298828125, + "loss": 3.6889, + "margin_dpo/margin_mean": 73.72638702392578, + "margin_dpo/margin_std": 79.83676147460938, + "step": 403 + }, + { + "epoch": 0.8460732984293193, + "fcm_dpo/beta": 0.009832248091697693, + "fcm_dpo/delta": 0.06779766827821732, + "fcm_dpo/margin": 54.37714385986328, + "fcm_dpo/q_t": 0.3864296078681946, + "grad_norm": 93.5862045288086, + "learning_rate": 3.581825961277074e-08, + "logits/chosen": -0.88753741979599, + "logits/rejected": -0.8670026063919067, + "logps/chosen": -398.1080627441406, + "logps/ref_chosen": -297.2837219238281, + "logps/ref_rejected": -256.99041748046875, + "logps/rejected": -412.1919250488281, + "loss": 4.3263, + "margin_dpo/margin_mean": 54.377140045166016, + "margin_dpo/margin_std": 83.19239044189453, + "step": 404 + }, + { + "epoch": 0.8481675392670157, + "fcm_dpo/beta": 0.010070566087961197, + "fcm_dpo/delta": -0.0018516681157052517, + "fcm_dpo/margin": 59.72123718261719, + "fcm_dpo/q_t": 0.37372201681137085, + "grad_norm": 72.33039093017578, + "learning_rate": 3.487975698139084e-08, + "logits/chosen": -0.7841629385948181, + "logits/rejected": -0.7917266488075256, + "logps/chosen": -349.7501220703125, + "logps/ref_chosen": -257.96533203125, + "logps/ref_rejected": -255.811279296875, + "logps/rejected": -407.3173522949219, + "loss": 4.0942, + "margin_dpo/margin_mean": 59.72124099731445, + "margin_dpo/margin_std": 81.7040786743164, + "step": 405 + }, + { + "epoch": 0.8502617801047121, + "fcm_dpo/beta": 0.010788071900606155, + "fcm_dpo/delta": 0.11286494135856628, + "fcm_dpo/margin": 45.265289306640625, + "fcm_dpo/q_t": 0.3963577449321747, + "grad_norm": 122.36735534667969, + "learning_rate": 3.3952790595787986e-08, + "logits/chosen": -0.8172638416290283, + "logits/rejected": -0.7938133478164673, + "logps/chosen": -388.5911865234375, + "logps/ref_chosen": -285.1810607910156, + "logps/ref_rejected": -264.41351318359375, + "logps/rejected": -413.0889892578125, + "loss": 4.4481, + "margin_dpo/margin_mean": 45.265289306640625, + "margin_dpo/margin_std": 74.5055160522461, + "step": 406 + }, + { + "epoch": 0.8523560209424084, + "fcm_dpo/beta": 0.010678643360733986, + "fcm_dpo/delta": -0.038571376353502274, + "fcm_dpo/margin": 59.46702575683594, + "fcm_dpo/q_t": 0.3713955879211426, + "grad_norm": 115.4225082397461, + "learning_rate": 3.303741016635614e-08, + "logits/chosen": -0.8237298130989075, + "logits/rejected": -0.8525005578994751, + "logps/chosen": -370.30657958984375, + "logps/ref_chosen": -265.23809814453125, + "logps/ref_rejected": -219.0631561279297, + "logps/rejected": -383.59869384765625, + "loss": 4.1242, + "margin_dpo/margin_mean": 59.46702575683594, + "margin_dpo/margin_std": 84.35752868652344, + "step": 407 + }, + { + "epoch": 0.8544502617801047, + "fcm_dpo/beta": 0.010710010305047035, + "fcm_dpo/delta": -0.021216176450252533, + "fcm_dpo/margin": 57.599510192871094, + "fcm_dpo/q_t": 0.3745374381542206, + "grad_norm": 82.85662841796875, + "learning_rate": 3.2133664782169944e-08, + "logits/chosen": -0.853847861289978, + "logits/rejected": -0.8488500118255615, + "logps/chosen": -388.76116943359375, + "logps/ref_chosen": -296.9726257324219, + "logps/ref_rejected": -295.4786376953125, + "logps/rejected": -444.86669921875, + "loss": 4.1418, + "margin_dpo/margin_mean": 57.599510192871094, + "margin_dpo/margin_std": 79.41829681396484, + "step": 408 + }, + { + "epoch": 0.856544502617801, + "fcm_dpo/beta": 0.010306498035788536, + "fcm_dpo/delta": -0.026171572506427765, + "fcm_dpo/margin": 55.8836669921875, + "fcm_dpo/q_t": 0.38002270460128784, + "grad_norm": 89.53182220458984, + "learning_rate": 3.12416029083514e-08, + "logits/chosen": -0.8308712244033813, + "logits/rejected": -0.8197529315948486, + "logps/chosen": -387.86822509765625, + "logps/ref_chosen": -287.37933349609375, + "logps/ref_rejected": -275.80291748046875, + "logps/rejected": -432.1754150390625, + "loss": 4.3765, + "margin_dpo/margin_mean": 55.8836669921875, + "margin_dpo/margin_std": 91.40107727050781, + "step": 409 + }, + { + "epoch": 0.8586387434554974, + "fcm_dpo/beta": 0.010539250448346138, + "fcm_dpo/delta": 0.05248191952705383, + "fcm_dpo/margin": 52.03704833984375, + "fcm_dpo/q_t": 0.38655808568000793, + "grad_norm": 104.86951446533203, + "learning_rate": 3.036127238347164e-08, + "logits/chosen": -0.8444973826408386, + "logits/rejected": -0.8523566722869873, + "logps/chosen": -379.1742248535156, + "logps/ref_chosen": -281.7801818847656, + "logps/ref_rejected": -266.7550354003906, + "logps/rejected": -416.18609619140625, + "loss": 4.3978, + "margin_dpo/margin_mean": 52.03704833984375, + "margin_dpo/margin_std": 85.04647827148438, + "step": 410 + }, + { + "epoch": 0.8607329842931937, + "fcm_dpo/beta": 0.010161810554564, + "fcm_dpo/delta": -0.07766594737768173, + "fcm_dpo/margin": 65.88399505615234, + "fcm_dpo/q_t": 0.35847824811935425, + "grad_norm": 82.89816284179688, + "learning_rate": 2.9492720416985e-08, + "logits/chosen": -0.8404784798622131, + "logits/rejected": -0.8063231706619263, + "logps/chosen": -373.0326843261719, + "logps/ref_chosen": -281.5872497558594, + "logps/ref_rejected": -254.78916931152344, + "logps/rejected": -412.1186218261719, + "loss": 3.8683, + "margin_dpo/margin_mean": 65.88399505615234, + "margin_dpo/margin_std": 77.45508575439453, + "step": 411 + }, + { + "epoch": 0.86282722513089, + "fcm_dpo/beta": 0.009724740870296955, + "fcm_dpo/delta": -0.014092553406953812, + "fcm_dpo/margin": 49.745506286621094, + "fcm_dpo/q_t": 0.39898359775543213, + "grad_norm": 88.29672241210938, + "learning_rate": 2.863599358669755e-08, + "logits/chosen": -0.8222418427467346, + "logits/rejected": -0.8297065496444702, + "logps/chosen": -382.5504455566406, + "logps/ref_chosen": -276.5341796875, + "logps/ref_rejected": -273.8751220703125, + "logps/rejected": -429.636962890625, + "loss": 4.4639, + "margin_dpo/margin_mean": 49.745506286621094, + "margin_dpo/margin_std": 82.65047454833984, + "step": 412 + }, + { + "epoch": 0.8649214659685864, + "fcm_dpo/beta": 0.01038271188735962, + "fcm_dpo/delta": 0.1108207255601883, + "fcm_dpo/margin": 47.65922927856445, + "fcm_dpo/q_t": 0.3976641297340393, + "grad_norm": 122.68524169921875, + "learning_rate": 2.7791137836269158e-08, + "logits/chosen": -0.8296762704849243, + "logits/rejected": -0.8280857801437378, + "logps/chosen": -370.8761901855469, + "logps/ref_chosen": -271.2745666503906, + "logps/ref_rejected": -270.16912841796875, + "logps/rejected": -417.4300231933594, + "loss": 4.4352, + "margin_dpo/margin_mean": 47.659236907958984, + "margin_dpo/margin_std": 79.33601379394531, + "step": 413 + }, + { + "epoch": 0.8670157068062827, + "fcm_dpo/beta": 0.010576970875263214, + "fcm_dpo/delta": -0.07696720957756042, + "fcm_dpo/margin": 63.35627746582031, + "fcm_dpo/q_t": 0.3609466850757599, + "grad_norm": 91.79287719726562, + "learning_rate": 2.6958198472749717e-08, + "logits/chosen": -0.8634947538375854, + "logits/rejected": -0.8709216117858887, + "logps/chosen": -394.9339904785156, + "logps/ref_chosen": -297.11505126953125, + "logps/ref_rejected": -271.7034606933594, + "logps/rejected": -432.8786926269531, + "loss": 3.9873, + "margin_dpo/margin_mean": 63.35627746582031, + "margin_dpo/margin_std": 80.128173828125, + "step": 414 + }, + { + "epoch": 0.8691099476439791, + "fcm_dpo/beta": 0.010473713278770447, + "fcm_dpo/delta": 0.03152439743280411, + "fcm_dpo/margin": 54.287376403808594, + "fcm_dpo/q_t": 0.37784260511398315, + "grad_norm": 88.63931274414062, + "learning_rate": 2.613722016414943e-08, + "logits/chosen": -0.8671582937240601, + "logits/rejected": -0.8537446856498718, + "logps/chosen": -394.3064880371094, + "logps/ref_chosen": -297.6926574707031, + "logps/ref_rejected": -279.0503234863281, + "logps/rejected": -429.9515380859375, + "loss": 4.1391, + "margin_dpo/margin_mean": 54.287376403808594, + "margin_dpo/margin_std": 73.61995697021484, + "step": 415 + }, + { + "epoch": 0.8712041884816754, + "fcm_dpo/beta": 0.010003462433815002, + "fcm_dpo/delta": -0.06168883666396141, + "fcm_dpo/margin": 65.28002166748047, + "fcm_dpo/q_t": 0.3622613847255707, + "grad_norm": 75.9556655883789, + "learning_rate": 2.5328246937043525e-08, + "logits/chosen": -0.8746985197067261, + "logits/rejected": -0.8851325511932373, + "logps/chosen": -402.248046875, + "logps/ref_chosen": -311.8255615234375, + "logps/ref_rejected": -268.6170654296875, + "logps/rejected": -424.3195495605469, + "loss": 4.0131, + "margin_dpo/margin_mean": 65.28001403808594, + "margin_dpo/margin_std": 82.11227416992188, + "step": 416 + }, + { + "epoch": 0.8732984293193717, + "fcm_dpo/beta": 0.009751483798027039, + "fcm_dpo/delta": -0.026388350874185562, + "fcm_dpo/margin": 57.194740295410156, + "fcm_dpo/q_t": 0.38324517011642456, + "grad_norm": 92.91184997558594, + "learning_rate": 2.4531322174210973e-08, + "logits/chosen": -0.8104668259620667, + "logits/rejected": -0.8152583837509155, + "logps/chosen": -410.4869384765625, + "logps/ref_chosen": -310.43682861328125, + "logps/ref_rejected": -277.15283203125, + "logps/rejected": -434.3976745605469, + "loss": 4.2952, + "margin_dpo/margin_mean": 57.194740295410156, + "margin_dpo/margin_std": 84.70370483398438, + "step": 417 + }, + { + "epoch": 0.875392670157068, + "fcm_dpo/beta": 0.009737811051309109, + "fcm_dpo/delta": -0.03723875805735588, + "fcm_dpo/margin": 54.46815490722656, + "fcm_dpo/q_t": 0.38777798414230347, + "grad_norm": 96.64009094238281, + "learning_rate": 2.3746488612308295e-08, + "logits/chosen": -0.8096103072166443, + "logits/rejected": -0.7874211668968201, + "logps/chosen": -387.23211669921875, + "logps/ref_chosen": -278.49591064453125, + "logps/ref_rejected": -276.56671142578125, + "logps/rejected": -439.77105712890625, + "loss": 4.3411, + "margin_dpo/margin_mean": 54.46815490722656, + "margin_dpo/margin_std": 80.28997802734375, + "step": 418 + }, + { + "epoch": 0.8774869109947644, + "fcm_dpo/beta": 0.00948832742869854, + "fcm_dpo/delta": 0.0006105322390794754, + "fcm_dpo/margin": 62.94663619995117, + "fcm_dpo/q_t": 0.3723425269126892, + "grad_norm": 94.83244323730469, + "learning_rate": 2.297378833957761e-08, + "logits/chosen": -0.8623223304748535, + "logits/rejected": -0.841428816318512, + "logps/chosen": -406.687744140625, + "logps/ref_chosen": -298.9002380371094, + "logps/ref_rejected": -246.1540985107422, + "logps/rejected": -416.88824462890625, + "loss": 4.1616, + "margin_dpo/margin_mean": 62.946632385253906, + "margin_dpo/margin_std": 87.8830337524414, + "step": 419 + }, + { + "epoch": 0.8795811518324608, + "fcm_dpo/beta": 0.009303269907832146, + "fcm_dpo/delta": -0.02820839360356331, + "fcm_dpo/margin": 67.16934967041016, + "fcm_dpo/q_t": 0.3698027729988098, + "grad_norm": 119.92971801757812, + "learning_rate": 2.2213262793589482e-08, + "logits/chosen": -0.8005006909370422, + "logits/rejected": -0.7742573618888855, + "logps/chosen": -369.0135498046875, + "logps/ref_chosen": -264.5608825683594, + "logps/ref_rejected": -245.67031860351562, + "logps/rejected": -417.29229736328125, + "loss": 4.1315, + "margin_dpo/margin_mean": 67.16934967041016, + "margin_dpo/margin_std": 95.13330078125, + "step": 420 + }, + { + "epoch": 0.881675392670157, + "fcm_dpo/beta": 0.009417861700057983, + "fcm_dpo/delta": 0.0553901270031929, + "fcm_dpo/margin": 58.053810119628906, + "fcm_dpo/q_t": 0.3806764483451843, + "grad_norm": 95.21514129638672, + "learning_rate": 2.1464952759020856e-08, + "logits/chosen": -0.87691730260849, + "logits/rejected": -0.8619418144226074, + "logps/chosen": -393.3926086425781, + "logps/ref_chosen": -297.70501708984375, + "logps/ref_rejected": -243.74771118164062, + "logps/rejected": -397.4891052246094, + "loss": 4.1826, + "margin_dpo/margin_mean": 58.053810119628906, + "margin_dpo/margin_std": 79.26122283935547, + "step": 421 + }, + { + "epoch": 0.8837696335078534, + "fcm_dpo/beta": 0.009782630950212479, + "fcm_dpo/delta": 0.011894671246409416, + "fcm_dpo/margin": 60.101036071777344, + "fcm_dpo/q_t": 0.378351628780365, + "grad_norm": 73.66893768310547, + "learning_rate": 2.07288983654679e-08, + "logits/chosen": -0.7312873601913452, + "logits/rejected": -0.7808342576026917, + "logps/chosen": -388.74200439453125, + "logps/ref_chosen": -288.3587646484375, + "logps/ref_rejected": -256.4377746582031, + "logps/rejected": -416.9220886230469, + "loss": 4.2494, + "margin_dpo/margin_mean": 60.101036071777344, + "margin_dpo/margin_std": 90.90327453613281, + "step": 422 + }, + { + "epoch": 0.8858638743455497, + "fcm_dpo/beta": 0.009724876843392849, + "fcm_dpo/delta": -0.009197833016514778, + "fcm_dpo/margin": 62.44007873535156, + "fcm_dpo/q_t": 0.3724360466003418, + "grad_norm": 106.5963134765625, + "learning_rate": 2.0005139085293942e-08, + "logits/chosen": -0.8626726269721985, + "logits/rejected": -0.847291886806488, + "logps/chosen": -398.4643859863281, + "logps/ref_chosen": -296.00701904296875, + "logps/ref_rejected": -261.3480529785156, + "logps/rejected": -426.2454833984375, + "loss": 4.104, + "margin_dpo/margin_mean": 62.44007873535156, + "margin_dpo/margin_std": 85.05935668945312, + "step": 423 + }, + { + "epoch": 0.8879581151832461, + "fcm_dpo/beta": 0.00962867308408022, + "fcm_dpo/delta": -0.012859391048550606, + "fcm_dpo/margin": 63.36909103393555, + "fcm_dpo/q_t": 0.36758118867874146, + "grad_norm": 92.60458374023438, + "learning_rate": 1.9293713731512673e-08, + "logits/chosen": -0.8467947840690613, + "logits/rejected": -0.8503403663635254, + "logps/chosen": -404.1105041503906, + "logps/ref_chosen": -309.421875, + "logps/ref_rejected": -249.14886474609375, + "logps/rejected": -407.2065734863281, + "loss": 3.9839, + "margin_dpo/margin_mean": 63.36909103393555, + "margin_dpo/margin_std": 75.94871520996094, + "step": 424 + }, + { + "epoch": 0.8900523560209425, + "fcm_dpo/beta": 0.009777205064892769, + "fcm_dpo/delta": 0.03504558652639389, + "fcm_dpo/margin": 50.999881744384766, + "fcm_dpo/q_t": 0.3966800570487976, + "grad_norm": 110.89618682861328, + "learning_rate": 1.8594660455706763e-08, + "logits/chosen": -0.82796710729599, + "logits/rejected": -0.8337902426719666, + "logps/chosen": -382.77001953125, + "logps/ref_chosen": -280.50909423828125, + "logps/ref_rejected": -276.8252258300781, + "logps/rejected": -430.08599853515625, + "loss": 4.5049, + "margin_dpo/margin_mean": 50.99988555908203, + "margin_dpo/margin_std": 87.4363784790039, + "step": 425 + }, + { + "epoch": 0.8921465968586387, + "fcm_dpo/beta": 0.009954184293746948, + "fcm_dpo/delta": 0.007840080186724663, + "fcm_dpo/margin": 59.37195587158203, + "fcm_dpo/q_t": 0.37472671270370483, + "grad_norm": 97.03230285644531, + "learning_rate": 1.7908016745981856e-08, + "logits/chosen": -0.852469801902771, + "logits/rejected": -0.839727520942688, + "logps/chosen": -397.4804992675781, + "logps/ref_chosen": -292.78521728515625, + "logps/ref_rejected": -255.62698364257812, + "logps/rejected": -419.69415283203125, + "loss": 4.1268, + "margin_dpo/margin_mean": 59.37195587158203, + "margin_dpo/margin_std": 79.54149627685547, + "step": 426 + }, + { + "epoch": 0.8942408376963351, + "fcm_dpo/beta": 0.009696273133158684, + "fcm_dpo/delta": -0.15165768563747406, + "fcm_dpo/margin": 76.89569091796875, + "fcm_dpo/q_t": 0.34561559557914734, + "grad_norm": 90.61172485351562, + "learning_rate": 1.7233819424956247e-08, + "logits/chosen": -0.8438408374786377, + "logits/rejected": -0.8143002390861511, + "logps/chosen": -388.6639099121094, + "logps/ref_chosen": -288.7687072753906, + "logps/ref_rejected": -268.4986572265625, + "logps/rejected": -445.2895812988281, + "loss": 3.8296, + "margin_dpo/margin_mean": 76.89569091796875, + "margin_dpo/margin_std": 89.48991394042969, + "step": 427 + }, + { + "epoch": 0.8963350785340314, + "fcm_dpo/beta": 0.008838072419166565, + "fcm_dpo/delta": -0.012139791622757912, + "fcm_dpo/margin": 69.09696197509766, + "fcm_dpo/q_t": 0.36812734603881836, + "grad_norm": 81.80809783935547, + "learning_rate": 1.6572104647786245e-08, + "logits/chosen": -0.79007887840271, + "logits/rejected": -0.8174630999565125, + "logps/chosen": -407.681640625, + "logps/ref_chosen": -295.5209655761719, + "logps/ref_rejected": -275.71026611328125, + "logps/rejected": -456.96795654296875, + "loss": 4.0523, + "margin_dpo/margin_mean": 69.09696197509766, + "margin_dpo/margin_std": 90.01223754882812, + "step": 428 + }, + { + "epoch": 0.8984293193717278, + "fcm_dpo/beta": 0.00869191437959671, + "fcm_dpo/delta": -0.019767988473176956, + "fcm_dpo/margin": 62.65784454345703, + "fcm_dpo/q_t": 0.3779388666152954, + "grad_norm": 152.89610290527344, + "learning_rate": 1.5922907900227017e-08, + "logits/chosen": -0.8012307286262512, + "logits/rejected": -0.8117492198944092, + "logps/chosen": -377.2466735839844, + "logps/ref_chosen": -274.392333984375, + "logps/ref_rejected": -258.574462890625, + "logps/rejected": -424.086669921875, + "loss": 4.3069, + "margin_dpo/margin_mean": 62.65784454345703, + "margin_dpo/margin_std": 93.02066802978516, + "step": 429 + }, + { + "epoch": 0.900523560209424, + "fcm_dpo/beta": 0.008904652670025826, + "fcm_dpo/delta": 0.03332711011171341, + "fcm_dpo/margin": 52.0653076171875, + "fcm_dpo/q_t": 0.400870144367218, + "grad_norm": 87.17733001708984, + "learning_rate": 1.5286263996730026e-08, + "logits/chosen": -0.8745531439781189, + "logits/rejected": -0.8473076224327087, + "logps/chosen": -389.5596923828125, + "logps/ref_chosen": -288.7391357421875, + "logps/ref_rejected": -268.6106262207031, + "logps/rejected": -421.49652099609375, + "loss": 4.4476, + "margin_dpo/margin_mean": 52.06529998779297, + "margin_dpo/margin_std": 83.41305541992188, + "step": 430 + }, + { + "epoch": 0.9026178010471204, + "fcm_dpo/beta": 0.009530465118587017, + "fcm_dpo/delta": 0.10892680287361145, + "fcm_dpo/margin": 46.10047912597656, + "fcm_dpo/q_t": 0.4076777696609497, + "grad_norm": 102.02164459228516, + "learning_rate": 1.4662207078575684e-08, + "logits/chosen": -0.8528344631195068, + "logits/rejected": -0.8196998238563538, + "logps/chosen": -378.56854248046875, + "logps/ref_chosen": -275.7247314453125, + "logps/ref_rejected": -268.91729736328125, + "logps/rejected": -417.8615417480469, + "loss": 4.5373, + "margin_dpo/margin_mean": 46.10047912597656, + "margin_dpo/margin_std": 79.43936157226562, + "step": 431 + }, + { + "epoch": 0.9047120418848168, + "fcm_dpo/beta": 0.009441605769097805, + "fcm_dpo/delta": 0.008324447087943554, + "fcm_dpo/margin": 62.6112174987793, + "fcm_dpo/q_t": 0.3774021565914154, + "grad_norm": 76.50348663330078, + "learning_rate": 1.40507706120426e-08, + "logits/chosen": -0.8662209510803223, + "logits/rejected": -0.8538703918457031, + "logps/chosen": -387.158203125, + "logps/ref_chosen": -291.42010498046875, + "logps/ref_rejected": -255.48202514648438, + "logps/rejected": -413.8314208984375, + "loss": 4.1391, + "margin_dpo/margin_mean": 62.6112174987793, + "margin_dpo/margin_std": 86.902587890625, + "step": 432 + }, + { + "epoch": 0.9068062827225131, + "fcm_dpo/beta": 0.009825142100453377, + "fcm_dpo/delta": 0.0619993582367897, + "fcm_dpo/margin": 55.066165924072266, + "fcm_dpo/q_t": 0.3830508589744568, + "grad_norm": 89.54483795166016, + "learning_rate": 1.345198738661285e-08, + "logits/chosen": -0.8318926692008972, + "logits/rejected": -0.8286322951316833, + "logps/chosen": -353.9482116699219, + "logps/ref_chosen": -246.2268829345703, + "logps/ref_rejected": -253.65924072265625, + "logps/rejected": -416.4466552734375, + "loss": 4.2477, + "margin_dpo/margin_mean": 55.066165924072266, + "margin_dpo/margin_std": 80.00988006591797, + "step": 433 + }, + { + "epoch": 0.9089005235602095, + "fcm_dpo/beta": 0.01034282986074686, + "fcm_dpo/delta": 0.03874684125185013, + "fcm_dpo/margin": 54.308753967285156, + "fcm_dpo/q_t": 0.3828258812427521, + "grad_norm": 85.50724029541016, + "learning_rate": 1.2865889513213628e-08, + "logits/chosen": -0.8225914239883423, + "logits/rejected": -0.8342767953872681, + "logps/chosen": -406.2464294433594, + "logps/ref_chosen": -295.4618225097656, + "logps/ref_rejected": -256.2254333496094, + "logps/rejected": -421.31878662109375, + "loss": 4.2621, + "margin_dpo/margin_mean": 54.308753967285156, + "margin_dpo/margin_std": 81.47319030761719, + "step": 434 + }, + { + "epoch": 0.9109947643979057, + "fcm_dpo/beta": 0.010198265314102173, + "fcm_dpo/delta": -0.014171771705150604, + "fcm_dpo/margin": 59.84620666503906, + "fcm_dpo/q_t": 0.37271490693092346, + "grad_norm": 118.80712890625, + "learning_rate": 1.2292508422495157e-08, + "logits/chosen": -0.8360690474510193, + "logits/rejected": -0.8230299949645996, + "logps/chosen": -361.0164489746094, + "logps/ref_chosen": -260.7384033203125, + "logps/ref_rejected": -248.5688018798828, + "logps/rejected": -408.69305419921875, + "loss": 4.0566, + "margin_dpo/margin_mean": 59.84620666503906, + "margin_dpo/margin_std": 77.26177978515625, + "step": 435 + }, + { + "epoch": 0.9130890052356021, + "fcm_dpo/beta": 0.010558899492025375, + "fcm_dpo/delta": 0.0584358386695385, + "fcm_dpo/margin": 51.58165740966797, + "fcm_dpo/q_t": 0.38922375440597534, + "grad_norm": 111.06973266601562, + "learning_rate": 1.1731874863145142e-08, + "logits/chosen": -0.8108433485031128, + "logits/rejected": -0.8116201162338257, + "logps/chosen": -426.4559326171875, + "logps/ref_chosen": -319.3224792480469, + "logps/ref_rejected": -299.30322265625, + "logps/rejected": -458.01837158203125, + "loss": 4.3549, + "margin_dpo/margin_mean": 51.58165740966797, + "margin_dpo/margin_std": 84.18111419677734, + "step": 436 + }, + { + "epoch": 0.9151832460732985, + "fcm_dpo/beta": 0.010173209011554718, + "fcm_dpo/delta": -0.1447606235742569, + "fcm_dpo/margin": 67.71795654296875, + "fcm_dpo/q_t": 0.35751214623451233, + "grad_norm": 94.34660339355469, + "learning_rate": 1.118401890024001e-08, + "logits/chosen": -0.844616174697876, + "logits/rejected": -0.8318252563476562, + "logps/chosen": -377.5007019042969, + "logps/ref_chosen": -278.82879638671875, + "logps/ref_rejected": -272.55303955078125, + "logps/rejected": -438.94293212890625, + "loss": 3.9626, + "margin_dpo/margin_mean": 67.71794891357422, + "margin_dpo/margin_std": 85.74967956542969, + "step": 437 + }, + { + "epoch": 0.9172774869109948, + "fcm_dpo/beta": 0.009965099394321442, + "fcm_dpo/delta": 0.061277735978364944, + "fcm_dpo/margin": 36.24957275390625, + "fcm_dpo/q_t": 0.42433011531829834, + "grad_norm": 114.82047271728516, + "learning_rate": 1.06489699136324e-08, + "logits/chosen": -0.81844162940979, + "logits/rejected": -0.842022716999054, + "logps/chosen": -362.96392822265625, + "logps/ref_chosen": -259.31903076171875, + "logps/ref_rejected": -240.99581909179688, + "logps/rejected": -380.8902282714844, + "loss": 4.9148, + "margin_dpo/margin_mean": 36.24957275390625, + "margin_dpo/margin_std": 83.63807678222656, + "step": 438 + }, + { + "epoch": 0.9193717277486911, + "fcm_dpo/beta": 0.010149678215384483, + "fcm_dpo/delta": 0.01735379360616207, + "fcm_dpo/margin": 57.44043731689453, + "fcm_dpo/q_t": 0.3788164556026459, + "grad_norm": 111.58253479003906, + "learning_rate": 1.0126756596375685e-08, + "logits/chosen": -0.8203510046005249, + "logits/rejected": -0.8303657174110413, + "logps/chosen": -361.6324462890625, + "logps/ref_chosen": -257.1243896484375, + "logps/ref_rejected": -243.20416259765625, + "logps/rejected": -405.1526184082031, + "loss": 4.1973, + "margin_dpo/margin_mean": 57.44043731689453, + "margin_dpo/margin_std": 83.50491333007812, + "step": 439 + }, + { + "epoch": 0.9214659685863874, + "fcm_dpo/beta": 0.01079685427248478, + "fcm_dpo/delta": 0.07901112735271454, + "fcm_dpo/margin": 43.94521713256836, + "fcm_dpo/q_t": 0.39794009923934937, + "grad_norm": 109.37852478027344, + "learning_rate": 9.617406953185136e-09, + "logits/chosen": -0.8688513040542603, + "logits/rejected": -0.8636762499809265, + "logps/chosen": -421.8122863769531, + "logps/ref_chosen": -307.5315246582031, + "logps/ref_rejected": -264.3540954589844, + "logps/rejected": -422.580078125, + "loss": 4.5155, + "margin_dpo/margin_mean": 43.94521713256836, + "margin_dpo/margin_std": 75.19562530517578, + "step": 440 + }, + { + "epoch": 0.9235602094240838, + "fcm_dpo/beta": 0.01067368034273386, + "fcm_dpo/delta": -0.08818989247083664, + "fcm_dpo/margin": 63.886962890625, + "fcm_dpo/q_t": 0.3568004071712494, + "grad_norm": 96.44715881347656, + "learning_rate": 9.12094829893642e-09, + "logits/chosen": -0.820861279964447, + "logits/rejected": -0.8048292994499207, + "logps/chosen": -411.4300537109375, + "logps/ref_chosen": -309.9819641113281, + "logps/ref_rejected": -297.4968566894531, + "logps/rejected": -462.8319091796875, + "loss": 3.9218, + "margin_dpo/margin_mean": 63.88697052001953, + "margin_dpo/margin_std": 77.22992706298828, + "step": 441 + }, + { + "epoch": 0.9256544502617801, + "fcm_dpo/beta": 0.010254503227770329, + "fcm_dpo/delta": 0.07677368074655533, + "fcm_dpo/margin": 51.52596664428711, + "fcm_dpo/q_t": 0.3904913365840912, + "grad_norm": 98.88241577148438, + "learning_rate": 8.637407257200496e-09, + "logits/chosen": -0.8967298865318298, + "logits/rejected": -0.8527672290802002, + "logps/chosen": -388.2881774902344, + "logps/ref_chosen": -278.9791564941406, + "logps/ref_rejected": -242.87310791015625, + "logps/rejected": -403.7081298828125, + "loss": 4.4585, + "margin_dpo/margin_mean": 51.525962829589844, + "margin_dpo/margin_std": 85.75384521484375, + "step": 442 + }, + { + "epoch": 0.9277486910994764, + "fcm_dpo/beta": 0.010872803628444672, + "fcm_dpo/delta": -0.026267580687999725, + "fcm_dpo/margin": 57.026920318603516, + "fcm_dpo/q_t": 0.3696047067642212, + "grad_norm": 103.62533569335938, + "learning_rate": 8.166809758815895e-09, + "logits/chosen": -0.7956724166870117, + "logits/rejected": -0.8195681571960449, + "logps/chosen": -375.178955078125, + "logps/ref_chosen": -273.5590515136719, + "logps/ref_rejected": -264.0199279785156, + "logps/rejected": -422.6667785644531, + "loss": 4.1475, + "margin_dpo/margin_mean": 57.026920318603516, + "margin_dpo/margin_std": 78.09822082519531, + "step": 443 + }, + { + "epoch": 0.9298429319371728, + "fcm_dpo/beta": 0.010260455310344696, + "fcm_dpo/delta": -0.04022660851478577, + "fcm_dpo/margin": 61.927947998046875, + "fcm_dpo/q_t": 0.3720618486404419, + "grad_norm": 100.3301773071289, + "learning_rate": 7.709181040498253e-09, + "logits/chosen": -0.807881772518158, + "logits/rejected": -0.7976375818252563, + "logps/chosen": -399.5924377441406, + "logps/ref_chosen": -298.1441955566406, + "logps/ref_rejected": -268.0572814941406, + "logps/rejected": -431.4334716796875, + "loss": 4.209, + "margin_dpo/margin_mean": 61.927947998046875, + "margin_dpo/margin_std": 93.60353088378906, + "step": 444 + }, + { + "epoch": 0.9319371727748691, + "fcm_dpo/beta": 0.009982587769627571, + "fcm_dpo/delta": -0.0828336626291275, + "fcm_dpo/margin": 50.54986572265625, + "fcm_dpo/q_t": 0.39341387152671814, + "grad_norm": 95.27164459228516, + "learning_rate": 7.2645456434869965e-09, + "logits/chosen": -0.8636192679405212, + "logits/rejected": -0.8787074685096741, + "logps/chosen": -358.3545837402344, + "logps/ref_chosen": -254.54067993164062, + "logps/ref_rejected": -264.2445983886719, + "logps/rejected": -418.6083679199219, + "loss": 4.4375, + "margin_dpo/margin_mean": 50.54986572265625, + "margin_dpo/margin_std": 77.68645477294922, + "step": 445 + }, + { + "epoch": 0.9340314136125655, + "fcm_dpo/beta": 0.009562542662024498, + "fcm_dpo/delta": 0.023602399975061417, + "fcm_dpo/margin": 60.29633331298828, + "fcm_dpo/q_t": 0.3763912618160248, + "grad_norm": 82.18879699707031, + "learning_rate": 6.832927412229017e-09, + "logits/chosen": -0.8063375949859619, + "logits/rejected": -0.8075209856033325, + "logps/chosen": -404.150634765625, + "logps/ref_chosen": -306.72247314453125, + "logps/ref_rejected": -266.3735656738281, + "logps/rejected": -424.0980224609375, + "loss": 4.179, + "margin_dpo/margin_mean": 60.29633331298828, + "margin_dpo/margin_std": 84.00418853759766, + "step": 446 + }, + { + "epoch": 0.9361256544502617, + "fcm_dpo/beta": 0.009372793138027191, + "fcm_dpo/delta": -0.06785252690315247, + "fcm_dpo/margin": 65.36695098876953, + "fcm_dpo/q_t": 0.36677664518356323, + "grad_norm": 81.27397155761719, + "learning_rate": 6.414349493100129e-09, + "logits/chosen": -0.8006303906440735, + "logits/rejected": -0.8021730184555054, + "logps/chosen": -357.7692565917969, + "logps/ref_chosen": -260.51727294921875, + "logps/ref_rejected": -236.47061157226562, + "logps/rejected": -399.0894775390625, + "loss": 3.948, + "margin_dpo/margin_mean": 65.36695098876953, + "margin_dpo/margin_std": 76.74752044677734, + "step": 447 + }, + { + "epoch": 0.9382198952879581, + "fcm_dpo/beta": 0.009358673356473446, + "fcm_dpo/delta": 0.04219186305999756, + "fcm_dpo/margin": 59.74993133544922, + "fcm_dpo/q_t": 0.38221871852874756, + "grad_norm": 101.68222045898438, + "learning_rate": 6.0088343331638756e-09, + "logits/chosen": -0.8103606104850769, + "logits/rejected": -0.8069367408752441, + "logps/chosen": -372.63238525390625, + "logps/ref_chosen": -268.78704833984375, + "logps/ref_rejected": -262.1703796386719, + "logps/rejected": -425.76568603515625, + "loss": 4.17, + "margin_dpo/margin_mean": 59.74993896484375, + "margin_dpo/margin_std": 81.6711654663086, + "step": 448 + }, + { + "epoch": 0.9403141361256544, + "fcm_dpo/beta": 0.00959862396121025, + "fcm_dpo/delta": -0.02114713191986084, + "fcm_dpo/margin": 64.47396850585938, + "fcm_dpo/q_t": 0.36557552218437195, + "grad_norm": 131.04855346679688, + "learning_rate": 5.616403678967624e-09, + "logits/chosen": -0.893824577331543, + "logits/rejected": -0.8799617290496826, + "logps/chosen": -422.57275390625, + "logps/ref_chosen": -330.9514465332031, + "logps/ref_rejected": -239.76974487304688, + "logps/rejected": -395.8650207519531, + "loss": 4.0123, + "margin_dpo/margin_mean": 64.47396850585938, + "margin_dpo/margin_std": 80.27033233642578, + "step": 449 + }, + { + "epoch": 0.9424083769633508, + "fcm_dpo/beta": 0.009571806527674198, + "fcm_dpo/delta": 0.05357804149389267, + "fcm_dpo/margin": 52.08460235595703, + "fcm_dpo/q_t": 0.3920726478099823, + "grad_norm": 97.84994506835938, + "learning_rate": 5.2370785753763356e-09, + "logits/chosen": -0.784131646156311, + "logits/rejected": -0.7929754257202148, + "logps/chosen": -395.12738037109375, + "logps/ref_chosen": -284.26544189453125, + "logps/ref_rejected": -250.5401611328125, + "logps/rejected": -413.4866943359375, + "loss": 4.2733, + "margin_dpo/margin_mean": 52.08460235595703, + "margin_dpo/margin_std": 72.87914276123047, + "step": 450 + }, + { + "epoch": 0.9445026178010472, + "fcm_dpo/beta": 0.009548072703182697, + "fcm_dpo/delta": -0.01880437321960926, + "fcm_dpo/margin": 52.78652572631836, + "fcm_dpo/q_t": 0.3943302035331726, + "grad_norm": 102.84935760498047, + "learning_rate": 4.8708793644441086e-09, + "logits/chosen": -0.8045037984848022, + "logits/rejected": -0.777286171913147, + "logps/chosen": -414.0666809082031, + "logps/ref_chosen": -302.3209228515625, + "logps/ref_rejected": -254.09747314453125, + "logps/rejected": -418.62969970703125, + "loss": 4.419, + "margin_dpo/margin_mean": 52.78652572631836, + "margin_dpo/margin_std": 83.30145263671875, + "step": 451 + }, + { + "epoch": 0.9465968586387434, + "fcm_dpo/beta": 0.009476564824581146, + "fcm_dpo/delta": 0.00583769753575325, + "fcm_dpo/margin": 57.515316009521484, + "fcm_dpo/q_t": 0.3849991261959076, + "grad_norm": 92.07205963134766, + "learning_rate": 4.517825684323323e-09, + "logits/chosen": -0.8670358061790466, + "logits/rejected": -0.8449291586875916, + "logps/chosen": -398.6337890625, + "logps/ref_chosen": -299.39215087890625, + "logps/ref_rejected": -284.3475036621094, + "logps/rejected": -441.1044616699219, + "loss": 4.2481, + "margin_dpo/margin_mean": 57.51531219482422, + "margin_dpo/margin_std": 82.63733673095703, + "step": 452 + }, + { + "epoch": 0.9486910994764398, + "fcm_dpo/beta": 0.009641487151384354, + "fcm_dpo/delta": -0.013517485931515694, + "fcm_dpo/margin": 63.50697326660156, + "fcm_dpo/q_t": 0.3711569905281067, + "grad_norm": 95.07856750488281, + "learning_rate": 4.1779364682113794e-09, + "logits/chosen": -0.8013238310813904, + "logits/rejected": -0.7985789179801941, + "logps/chosen": -429.9082336425781, + "logps/ref_chosen": -324.6517028808594, + "logps/ref_rejected": -304.1527099609375, + "logps/rejected": -472.91619873046875, + "loss": 4.0431, + "margin_dpo/margin_mean": 63.50697326660156, + "margin_dpo/margin_std": 84.71268463134766, + "step": 453 + }, + { + "epoch": 0.9507853403141361, + "fcm_dpo/beta": 0.009615411050617695, + "fcm_dpo/delta": -0.0042562056332826614, + "fcm_dpo/margin": 62.60576248168945, + "fcm_dpo/q_t": 0.3715764582157135, + "grad_norm": 76.87505340576172, + "learning_rate": 3.851229943335393e-09, + "logits/chosen": -0.8534815907478333, + "logits/rejected": -0.8655160665512085, + "logps/chosen": -401.67681884765625, + "logps/ref_chosen": -299.6117248535156, + "logps/ref_rejected": -303.74224853515625, + "logps/rejected": -468.4130554199219, + "loss": 4.1252, + "margin_dpo/margin_mean": 62.60576248168945, + "margin_dpo/margin_std": 85.04026794433594, + "step": 454 + }, + { + "epoch": 0.9528795811518325, + "fcm_dpo/beta": 0.010230256244540215, + "fcm_dpo/delta": 0.13277457654476166, + "fcm_dpo/margin": 46.14201736450195, + "fcm_dpo/q_t": 0.402716726064682, + "grad_norm": 95.32615661621094, + "learning_rate": 3.5377236299748147e-09, + "logits/chosen": -0.807562530040741, + "logits/rejected": -0.8190088272094727, + "logps/chosen": -374.5747985839844, + "logps/ref_chosen": -273.6116943359375, + "logps/ref_rejected": -274.4293518066406, + "logps/rejected": -421.5345458984375, + "loss": 4.5798, + "margin_dpo/margin_mean": 46.14202117919922, + "margin_dpo/margin_std": 85.73726654052734, + "step": 455 + }, + { + "epoch": 0.9549738219895288, + "fcm_dpo/beta": 0.010185835883021355, + "fcm_dpo/delta": -0.10059641301631927, + "fcm_dpo/margin": 63.664310455322266, + "fcm_dpo/q_t": 0.3761172890663147, + "grad_norm": 98.11104583740234, + "learning_rate": 3.2374343405217884e-09, + "logits/chosen": -0.7371918559074402, + "logits/rejected": -0.7502031326293945, + "logps/chosen": -438.56854248046875, + "logps/ref_chosen": -322.17193603515625, + "logps/ref_rejected": -294.54461669921875, + "logps/rejected": -474.6055603027344, + "loss": 4.3393, + "margin_dpo/margin_mean": 63.664310455322266, + "margin_dpo/margin_std": 105.6181411743164, + "step": 456 + }, + { + "epoch": 0.9570680628272251, + "fcm_dpo/beta": 0.009687078185379505, + "fcm_dpo/delta": -0.013452993705868721, + "fcm_dpo/margin": 63.15357971191406, + "fcm_dpo/q_t": 0.3667003810405731, + "grad_norm": 85.27921295166016, + "learning_rate": 2.9503781785795713e-09, + "logits/chosen": -0.7975083589553833, + "logits/rejected": -0.8043266534805298, + "logps/chosen": -416.79241943359375, + "logps/ref_chosen": -307.7962341308594, + "logps/ref_rejected": -274.5501403808594, + "logps/rejected": -446.69989013671875, + "loss": 4.1568, + "margin_dpo/margin_mean": 63.15358352661133, + "margin_dpo/margin_std": 89.4337158203125, + "step": 457 + }, + { + "epoch": 0.9591623036649215, + "fcm_dpo/beta": 0.010026252828538418, + "fcm_dpo/delta": 0.03702447563409805, + "fcm_dpo/margin": 56.15598678588867, + "fcm_dpo/q_t": 0.3839564621448517, + "grad_norm": 83.73405456542969, + "learning_rate": 2.6765705380989432e-09, + "logits/chosen": -0.822134256362915, + "logits/rejected": -0.8091610670089722, + "logps/chosen": -403.46807861328125, + "logps/ref_chosen": -297.0316467285156, + "logps/ref_rejected": -276.1112365722656, + "logps/rejected": -438.7036437988281, + "loss": 4.318, + "margin_dpo/margin_mean": 56.15598678588867, + "margin_dpo/margin_std": 87.26219940185547, + "step": 458 + }, + { + "epoch": 0.9612565445026178, + "fcm_dpo/beta": 0.010189807042479515, + "fcm_dpo/delta": 0.022624600678682327, + "fcm_dpo/margin": 51.38506317138672, + "fcm_dpo/q_t": 0.39040350914001465, + "grad_norm": 119.48713684082031, + "learning_rate": 2.416026102552732e-09, + "logits/chosen": -0.8736098408699036, + "logits/rejected": -0.8673666715621948, + "logps/chosen": -394.96923828125, + "logps/ref_chosen": -293.5252990722656, + "logps/ref_rejected": -289.30126953125, + "logps/rejected": -442.1302490234375, + "loss": 4.3828, + "margin_dpo/margin_mean": 51.385066986083984, + "margin_dpo/margin_std": 80.866455078125, + "step": 459 + }, + { + "epoch": 0.9633507853403142, + "fcm_dpo/beta": 0.010447122156620026, + "fcm_dpo/delta": 0.01566571742296219, + "fcm_dpo/margin": 50.86057662963867, + "fcm_dpo/q_t": 0.38575083017349243, + "grad_norm": 106.79894256591797, + "learning_rate": 2.168758844148272e-09, + "logits/chosen": -0.8482452034950256, + "logits/rejected": -0.8550105094909668, + "logps/chosen": -422.16461181640625, + "logps/ref_chosen": -318.7803649902344, + "logps/ref_rejected": -258.7906799316406, + "logps/rejected": -413.0354919433594, + "loss": 4.335, + "margin_dpo/margin_mean": 50.86057662963867, + "margin_dpo/margin_std": 78.11962890625, + "step": 460 + }, + { + "epoch": 0.9654450261780104, + "fcm_dpo/beta": 0.010231072083115578, + "fcm_dpo/delta": -0.023067938163876534, + "fcm_dpo/margin": 56.03329849243164, + "fcm_dpo/q_t": 0.3828889727592468, + "grad_norm": 106.28280639648438, + "learning_rate": 1.9347820230782295e-09, + "logits/chosen": -0.8239161372184753, + "logits/rejected": -0.8513062596321106, + "logps/chosen": -346.3656311035156, + "logps/ref_chosen": -243.9099884033203, + "logps/ref_rejected": -232.6382293701172, + "logps/rejected": -391.1271667480469, + "loss": 4.351, + "margin_dpo/margin_mean": 56.03329849243164, + "margin_dpo/margin_std": 89.30838775634766, + "step": 461 + }, + { + "epoch": 0.9675392670157068, + "fcm_dpo/beta": 0.00970435980707407, + "fcm_dpo/delta": -0.0686081126332283, + "fcm_dpo/margin": 68.12385559082031, + "fcm_dpo/q_t": 0.3644864857196808, + "grad_norm": 94.2146987915039, + "learning_rate": 1.7141081868094209e-09, + "logits/chosen": -0.8403683304786682, + "logits/rejected": -0.7992677092552185, + "logps/chosen": -448.24212646484375, + "logps/ref_chosen": -344.09100341796875, + "logps/ref_rejected": -252.45037841796875, + "logps/rejected": -424.72540283203125, + "loss": 4.0783, + "margin_dpo/margin_mean": 68.12385559082031, + "margin_dpo/margin_std": 93.93057250976562, + "step": 462 + }, + { + "epoch": 0.9696335078534032, + "fcm_dpo/beta": 0.010029610246419907, + "fcm_dpo/delta": 0.08034525066614151, + "fcm_dpo/margin": 51.993682861328125, + "fcm_dpo/q_t": 0.39002859592437744, + "grad_norm": 99.33654022216797, + "learning_rate": 1.5067491694100153e-09, + "logits/chosen": -0.8565876483917236, + "logits/rejected": -0.8210662603378296, + "logps/chosen": -397.64654541015625, + "logps/ref_chosen": -297.1424560546875, + "logps/ref_rejected": -234.0208282470703, + "logps/rejected": -386.5185852050781, + "loss": 4.4017, + "margin_dpo/margin_mean": 51.99367904663086, + "margin_dpo/margin_std": 84.37198638916016, + "step": 463 + }, + { + "epoch": 0.9717277486910995, + "fcm_dpo/beta": 0.01041481550782919, + "fcm_dpo/delta": 0.04956157132983208, + "fcm_dpo/margin": 52.998783111572266, + "fcm_dpo/q_t": 0.3862907886505127, + "grad_norm": 133.43360900878906, + "learning_rate": 1.3127160909147672e-09, + "logits/chosen": -0.8275717496871948, + "logits/rejected": -0.8531575202941895, + "logps/chosen": -378.3173522949219, + "logps/ref_chosen": -265.71075439453125, + "logps/ref_rejected": -256.4108581542969, + "logps/rejected": -422.01617431640625, + "loss": 4.3843, + "margin_dpo/margin_mean": 52.998779296875, + "margin_dpo/margin_std": 86.16059112548828, + "step": 464 + }, + { + "epoch": 0.9738219895287958, + "fcm_dpo/beta": 0.009905759245157242, + "fcm_dpo/delta": -0.13921670615673065, + "fcm_dpo/margin": 65.77295684814453, + "fcm_dpo/q_t": 0.36359280347824097, + "grad_norm": 68.23556518554688, + "learning_rate": 1.1320193567288527e-09, + "logits/chosen": -0.8828303217887878, + "logits/rejected": -0.8572342395782471, + "logps/chosen": -391.30364990234375, + "logps/ref_chosen": -293.1527404785156, + "logps/ref_rejected": -293.70947265625, + "logps/rejected": -457.63336181640625, + "loss": 4.0356, + "margin_dpo/margin_mean": 65.77295684814453, + "margin_dpo/margin_std": 82.50149536132812, + "step": 465 + }, + { + "epoch": 0.9759162303664921, + "fcm_dpo/beta": 0.009194673970341682, + "fcm_dpo/delta": -0.052108634263277054, + "fcm_dpo/margin": 70.25971221923828, + "fcm_dpo/q_t": 0.36073338985443115, + "grad_norm": 77.50592803955078, + "learning_rate": 9.64668657069706e-10, + "logits/chosen": -0.8009305000305176, + "logits/rejected": -0.7544541954994202, + "logps/chosen": -353.8938293457031, + "logps/ref_chosen": -261.4775695800781, + "logps/ref_rejected": -248.36282348632812, + "logps/rejected": -411.038818359375, + "loss": 3.8645, + "margin_dpo/margin_mean": 70.25971221923828, + "margin_dpo/margin_std": 77.51724243164062, + "step": 466 + }, + { + "epoch": 0.9780104712041885, + "fcm_dpo/beta": 0.009051669389009476, + "fcm_dpo/delta": 0.01738828979432583, + "fcm_dpo/margin": 50.43449401855469, + "fcm_dpo/q_t": 0.40313076972961426, + "grad_norm": 100.67909240722656, + "learning_rate": 8.106729664475176e-10, + "logits/chosen": -0.7964373230934143, + "logits/rejected": -0.7927530407905579, + "logps/chosen": -372.4566955566406, + "logps/ref_chosen": -266.354248046875, + "logps/ref_rejected": -277.76324462890625, + "logps/rejected": -434.3001708984375, + "loss": 4.5803, + "margin_dpo/margin_mean": 50.43449401855469, + "margin_dpo/margin_std": 88.83377075195312, + "step": 467 + }, + { + "epoch": 0.9801047120418848, + "fcm_dpo/beta": 0.009555336087942123, + "fcm_dpo/delta": 0.04439329728484154, + "fcm_dpo/margin": 52.71699523925781, + "fcm_dpo/q_t": 0.39236387610435486, + "grad_norm": 95.95642852783203, + "learning_rate": 6.700405431837585e-10, + "logits/chosen": -0.8729247450828552, + "logits/rejected": -0.847733736038208, + "logps/chosen": -419.738525390625, + "logps/ref_chosen": -317.9631652832031, + "logps/ref_rejected": -261.8744201660156, + "logps/rejected": -416.3667297363281, + "loss": 4.3829, + "margin_dpo/margin_mean": 52.71699523925781, + "margin_dpo/margin_std": 82.52155303955078, + "step": 468 + }, + { + "epoch": 0.9821989528795811, + "fcm_dpo/beta": 0.009460176341235638, + "fcm_dpo/delta": -0.016594115644693375, + "fcm_dpo/margin": 64.84768676757812, + "fcm_dpo/q_t": 0.37026524543762207, + "grad_norm": 80.08511352539062, + "learning_rate": 5.427789289685347e-10, + "logits/chosen": -0.813917875289917, + "logits/rejected": -0.8034530282020569, + "logps/chosen": -421.9589538574219, + "logps/ref_chosen": -324.8868103027344, + "logps/ref_rejected": -264.0421447753906, + "logps/rejected": -425.9620056152344, + "loss": 4.1315, + "margin_dpo/margin_mean": 64.84768676757812, + "margin_dpo/margin_std": 89.04574584960938, + "step": 469 + }, + { + "epoch": 0.9842931937172775, + "fcm_dpo/beta": 0.009737596847116947, + "fcm_dpo/delta": -0.01096111536026001, + "fcm_dpo/margin": 62.489173889160156, + "fcm_dpo/q_t": 0.3725891411304474, + "grad_norm": 75.43241119384766, + "learning_rate": 4.288949484559934e-10, + "logits/chosen": -0.8106139898300171, + "logits/rejected": -0.8112368583679199, + "logps/chosen": -408.78070068359375, + "logps/ref_chosen": -314.7042541503906, + "logps/ref_rejected": -259.2276611328125, + "logps/rejected": -415.7933044433594, + "loss": 4.1024, + "margin_dpo/margin_mean": 62.489173889160156, + "margin_dpo/margin_std": 83.64241027832031, + "step": 470 + }, + { + "epoch": 0.9863874345549738, + "fcm_dpo/beta": 0.009809708222746849, + "fcm_dpo/delta": 0.014611058868467808, + "fcm_dpo/margin": 54.49514389038086, + "fcm_dpo/q_t": 0.3918081820011139, + "grad_norm": 100.71548461914062, + "learning_rate": 3.2839470889836627e-10, + "logits/chosen": -0.8629408478736877, + "logits/rejected": -0.8535081148147583, + "logps/chosen": -400.7052917480469, + "logps/ref_chosen": -292.5748291015625, + "logps/ref_rejected": -298.7525329589844, + "logps/rejected": -461.37811279296875, + "loss": 4.3804, + "margin_dpo/margin_mean": 54.49514389038086, + "margin_dpo/margin_std": 88.37163543701172, + "step": 471 + }, + { + "epoch": 0.9884816753926702, + "fcm_dpo/beta": 0.009502904489636421, + "fcm_dpo/delta": -0.0033622095361351967, + "fcm_dpo/margin": 63.385826110839844, + "fcm_dpo/q_t": 0.3735297918319702, + "grad_norm": 83.12667083740234, + "learning_rate": 2.412835998185092e-10, + "logits/chosen": -0.8532136082649231, + "logits/rejected": -0.8691096901893616, + "logps/chosen": -336.4410400390625, + "logps/ref_chosen": -243.37380981445312, + "logps/ref_rejected": -251.12109375, + "logps/rejected": -407.5741271972656, + "loss": 4.0089, + "margin_dpo/margin_mean": 63.385826110839844, + "margin_dpo/margin_std": 79.39556884765625, + "step": 472 + }, + { + "epoch": 0.9905759162303664, + "fcm_dpo/beta": 0.00958459172397852, + "fcm_dpo/delta": -0.033373601734638214, + "fcm_dpo/margin": 65.87982177734375, + "fcm_dpo/q_t": 0.36491870880126953, + "grad_norm": 100.53154754638672, + "learning_rate": 1.6756629272085544e-10, + "logits/chosen": -0.8084653615951538, + "logits/rejected": -0.8148469924926758, + "logps/chosen": -385.27142333984375, + "logps/ref_chosen": -286.3286437988281, + "logps/ref_rejected": -258.6535339355469, + "logps/rejected": -423.4761962890625, + "loss": 3.9915, + "margin_dpo/margin_mean": 65.87982177734375, + "margin_dpo/margin_std": 82.26141357421875, + "step": 473 + }, + { + "epoch": 0.9926701570680628, + "fcm_dpo/beta": 0.009304332546889782, + "fcm_dpo/delta": 0.059272147715091705, + "fcm_dpo/margin": 51.1921272277832, + "fcm_dpo/q_t": 0.3959723114967346, + "grad_norm": 109.58087921142578, + "learning_rate": 1.072467408408384e-10, + "logits/chosen": -0.839458167552948, + "logits/rejected": -0.8423305749893188, + "logps/chosen": -393.1019287109375, + "logps/ref_chosen": -288.08966064453125, + "logps/ref_rejected": -266.69696044921875, + "logps/rejected": -422.9013977050781, + "loss": 4.3602, + "margin_dpo/margin_mean": 51.1921272277832, + "margin_dpo/margin_std": 72.16545104980469, + "step": 474 + }, + { + "epoch": 0.9947643979057592, + "fcm_dpo/beta": 0.009782197885215282, + "fcm_dpo/delta": 0.013262166641652584, + "fcm_dpo/margin": 53.376014709472656, + "fcm_dpo/q_t": 0.3906119465827942, + "grad_norm": 89.71319580078125, + "learning_rate": 6.032817893297793e-11, + "logits/chosen": -0.812603771686554, + "logits/rejected": -0.8350270390510559, + "logps/chosen": -350.7452087402344, + "logps/ref_chosen": -256.0030517578125, + "logps/ref_rejected": -244.50660705566406, + "logps/rejected": -392.624755859375, + "loss": 4.3125, + "margin_dpo/margin_mean": 53.376007080078125, + "margin_dpo/margin_std": 78.27056884765625, + "step": 475 + }, + { + "epoch": 0.9968586387434555, + "fcm_dpo/beta": 0.01014248188585043, + "fcm_dpo/delta": 0.01945674978196621, + "fcm_dpo/margin": 56.881683349609375, + "fcm_dpo/q_t": 0.3829057216644287, + "grad_norm": 124.16419982910156, + "learning_rate": 2.6813123097352287e-11, + "logits/chosen": -0.8888995051383972, + "logits/rejected": -0.8492950797080994, + "logps/chosen": -414.1214904785156, + "logps/ref_chosen": -321.467529296875, + "logps/ref_rejected": -295.0592956542969, + "logps/rejected": -444.594970703125, + "loss": 4.316, + "margin_dpo/margin_mean": 56.881683349609375, + "margin_dpo/margin_std": 87.3014907836914, + "step": 476 + }, + { + "epoch": 0.9989528795811519, + "fcm_dpo/beta": 0.010023507289588451, + "fcm_dpo/delta": -0.04792780801653862, + "fcm_dpo/margin": 59.55145263671875, + "fcm_dpo/q_t": 0.38020825386047363, + "grad_norm": 126.22605895996094, + "learning_rate": 6.7033706447061635e-12, + "logits/chosen": -0.7799222469329834, + "logits/rejected": -0.792705774307251, + "logps/chosen": -385.03021240234375, + "logps/ref_chosen": -276.7939758300781, + "logps/ref_rejected": -244.82919311523438, + "logps/rejected": -412.61688232421875, + "loss": 4.3806, + "margin_dpo/margin_mean": 59.55145263671875, + "margin_dpo/margin_std": 96.72840881347656, + "step": 477 + }, + { + "epoch": 0.9989528795811519, + "step": 477, + "total_flos": 0.0, + "train_loss": 4.542374380479568, + "train_runtime": 6039.2377, + "train_samples_per_second": 10.123, + "train_steps_per_second": 0.079 + } + ], + "logging_steps": 1, + "max_steps": 477, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}