From e2d7fad87f6d1aa6a25188ccbc7c952b0ec6394f Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sun, 10 May 2026 12:37:20 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: jackf857/llama-3-8b-base-margin-dpo-hh-helpful-batch-64 Source: Original Platform --- .gitattributes | 36 + README.md | 85 + all_results.json | 27 + config.json | 29 + eval_results.json | 21 + generation_config.json | 9 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 +++++ train_results.json | 9 + trainer_state.json | 13789 +++++++++++++++++++++++++++++ 19 files changed, 16414 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..bc13649 --- /dev/null +++ b/README.md @@ -0,0 +1,85 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-hh-helpful-4xh200 +tags: +- alignment-handbook +- margin-dpo +- generated_from_trainer +datasets: +- Anthropic/hh-rlhf +model-index: +- name: llama-3-8b-base-margin-dpo-hh-helpful-4xH200-batch-64 + results: [] +--- + + + +# llama-3-8b-base-margin-dpo-hh-helpful-4xH200-batch-64 + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-helpful-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-helpful-4xh200) on the Anthropic/hh-rlhf dataset. +It achieves the following results on the evaluation set: +- Loss: 0.4046 +- Margin Dpo/beta: 0.1000 +- Margin Dpo/loss Margin Mean: 21.7563 +- Margin Dpo/beta Margin Mean: 2.1756 +- Margin Dpo/beta Margin Grad Mean: -0.2570 +- Margin Dpo/beta Margin Grad Std: 0.2538 +- Margin Dpo/margin Mean: 21.7563 +- Margin Dpo/margin Std: 26.3378 +- Logps/chosen: -105.9372 +- Logps/rejected: -135.4405 +- Logps/ref Chosen: -79.0510 +- Logps/ref Rejected: -86.7979 +- Logits/chosen: -0.6270 +- Logits/rejected: -0.6013 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 64 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Margin Dpo/beta | Margin Dpo/loss Margin Mean | Margin Dpo/beta Margin Mean | Margin Dpo/beta Margin Grad Mean | Margin Dpo/beta Margin Grad Std | Margin Dpo/margin Mean | Margin Dpo/margin Std | Logps/chosen | Logps/rejected | Logps/ref Chosen | Logps/ref Rejected | Logits/chosen | Logits/rejected | +|:-------------:|:------:|:----:|:---------------:|:---------------:|:---------------------------:|:---------------------------:|:--------------------------------:|:-------------------------------:|:----------------------:|:---------------------:|:------------:|:--------------:|:----------------:|:------------------:|:-------------:|:---------------:| +| 0.9037 | 0.1468 | 100 | 0.5593 | 0.1000 | 8.4400 | 0.8440 | -0.3668 | 0.2303 | 8.4400 | 15.3426 | -87.1427 | -103.3296 | -79.0510 | -86.7979 | -0.6628 | -0.6366 | +| 0.6607 | 0.2937 | 200 | 0.4791 | 0.1000 | 14.6826 | 1.4683 | -0.3109 | 0.2473 | 14.6826 | 21.1628 | -92.9979 | -115.4274 | -79.0510 | -86.7979 | -0.6426 | -0.6197 | +| 0.699 | 0.4405 | 300 | 0.4414 | 0.1000 | 18.1032 | 1.8103 | -0.2828 | 0.2516 | 18.1032 | 23.7825 | -99.9692 | -125.8193 | -79.0510 | -86.7979 | -0.6107 | -0.5845 | +| 0.4468 | 0.5874 | 400 | 0.4213 | 0.1000 | 20.2783 | 2.0278 | -0.2687 | 0.2540 | 20.2783 | 25.4582 | -102.0468 | -130.0720 | -79.0510 | -86.7979 | -0.5647 | -0.5335 | +| 0.38 | 0.7342 | 500 | 0.4098 | 0.1000 | 21.8238 | 2.1824 | -0.2579 | 0.2561 | 21.8238 | 26.5974 | -106.9358 | -136.5065 | -79.0510 | -86.7979 | -0.6236 | -0.5976 | +| 0.4876 | 0.8811 | 600 | 0.4046 | 0.1000 | 21.7563 | 2.1756 | -0.2570 | 0.2538 | 21.7563 | 26.3378 | -105.9372 | -135.4405 | -79.0510 | -86.7979 | -0.6270 | -0.6013 | + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..d4bbd29 --- /dev/null +++ b/all_results.json @@ -0,0 +1,27 @@ +{ + "epoch": 1.0, + "eval_logits/chosen": -0.6029719114303589, + "eval_logits/rejected": -0.5759690403938293, + "eval_logps/chosen": -106.27237701416016, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -135.82232666015625, + "eval_loss": 0.40388280153274536, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.2565760016441345, + "eval_margin_dpo/beta_margin_grad_std": 0.2537597417831421, + "eval_margin_dpo/beta_margin_mean": 2.1803061962127686, + "eval_margin_dpo/loss_margin_mean": 21.803062438964844, + "eval_margin_dpo/margin_mean": 21.803062438964844, + "eval_margin_dpo/margin_std": 26.34841537475586, + "eval_runtime": 39.8235, + "eval_samples": 2339, + "eval_samples_per_second": 58.734, + "eval_steps_per_second": 1.858, + "total_flos": 0.0, + "train_loss": 0.572698849610295, + "train_runtime": 1998.3785, + "train_samples": 43598, + "train_samples_per_second": 21.817, + "train_steps_per_second": 0.341 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..20954b7 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,21 @@ +{ + "epoch": 1.0, + "eval_logits/chosen": -0.6029719114303589, + "eval_logits/rejected": -0.5759690403938293, + "eval_logps/chosen": -106.27237701416016, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -135.82232666015625, + "eval_loss": 0.40388280153274536, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.2565760016441345, + "eval_margin_dpo/beta_margin_grad_std": 0.2537597417831421, + "eval_margin_dpo/beta_margin_mean": 2.1803061962127686, + "eval_margin_dpo/loss_margin_mean": 21.803062438964844, + "eval_margin_dpo/margin_mean": 21.803062438964844, + "eval_margin_dpo/margin_std": 26.34841537475586, + "eval_runtime": 39.8235, + "eval_samples": 2339, + "eval_samples_per_second": 58.734, + "eval_steps_per_second": 1.858 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..36aa2b7 --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7bc49a5353700476d003ae3c6cc3cf7366075ef53721b126e5275b5d68c821c +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..b6bfd1b --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75a87c657824eeb2684b524b00e18f7959ea8dda7c158a29bcf88c2952d3d3b6 +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..b9d3332 --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c422f0f3e828d477fc5d7b3c4b0f76bd396911a9ff6a7c9a506f1888189fa8 +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..bc649e0 --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32fe78e3944b8bd411dc54431d28e126889843efe76e0ff647b32ed05ca07373 +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..7afe0a7 --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a141370310ed90f200764e59cba19d704b6f36fdb0a533cf943e04b361868c3a +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..92898cf --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01dc5595d1a351d728641c6c39fe6e95493baad1f91a025bbf8f350d3a3febe6 +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..64d30ba --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d2a2a07551a3c29110ad47f0122963a96d3efc36dc507ee3abc83b4ca5db172 +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..6b69fa5 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 0.572698849610295, + "train_runtime": 1998.3785, + "train_samples": 43598, + "train_samples_per_second": 21.817, + "train_steps_per_second": 0.341 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..f7206a3 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,13789 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100, + "global_step": 681, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0014684287812041115, + "grad_norm": 83.52447509765625, + "learning_rate": 0.0, + "logits/chosen": -0.4974287748336792, + "logits/rejected": -0.43299180269241333, + "logps/chosen": -50.1435661315918, + "logps/ref_chosen": -50.14883804321289, + "logps/ref_rejected": -74.1280517578125, + "logps/rejected": -74.09991455078125, + "loss": 1.389, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.5005706548690796, + "margin_dpo/beta_margin_grad_std": 0.0104739461094141, + "margin_dpo/beta_margin_mean": -0.0022870064713060856, + "margin_dpo/loss_margin_mean": -0.02287006378173828, + "margin_dpo/margin_mean": -0.02287048101425171, + "margin_dpo/margin_std": 0.41920793056488037, + "step": 1 + }, + { + "epoch": 0.002936857562408223, + "grad_norm": 72.19432830810547, + "learning_rate": 7.246376811594203e-09, + "logits/chosen": -0.4953641891479492, + "logits/rejected": -0.4594460129737854, + "logps/chosen": -52.65569305419922, + "logps/ref_chosen": -52.620704650878906, + "logps/ref_rejected": -75.30413818359375, + "logps/rejected": -75.27340698242188, + "loss": 1.3932, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.5016425848007202, + "margin_dpo/beta_margin_grad_std": 0.008758805692195892, + "margin_dpo/beta_margin_mean": -0.006572261452674866, + "margin_dpo/loss_margin_mean": -0.06572261452674866, + "margin_dpo/margin_mean": -0.06572240591049194, + "margin_dpo/margin_std": 0.35048407316207886, + "step": 2 + }, + { + "epoch": 0.004405286343612335, + "grad_norm": 70.83383178710938, + "learning_rate": 1.4492753623188406e-08, + "logits/chosen": -0.48161470890045166, + "logits/rejected": -0.44217246770858765, + "logps/chosen": -60.95429611206055, + "logps/ref_chosen": -60.98159408569336, + "logps/ref_rejected": -68.67259216308594, + "logps/rejected": -68.64839935302734, + "loss": 1.3863, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.49992311000823975, + "margin_dpo/beta_margin_grad_std": 0.008581075817346573, + "margin_dpo/beta_margin_mean": 0.0003100454923696816, + "margin_dpo/loss_margin_mean": 0.003100454807281494, + "margin_dpo/margin_mean": 0.003100961446762085, + "margin_dpo/margin_std": 0.3433571755886078, + "step": 3 + }, + { + "epoch": 0.005873715124816446, + "grad_norm": 72.25827026367188, + "learning_rate": 2.1739130434782606e-08, + "logits/chosen": -0.46887677907943726, + "logits/rejected": -0.44121015071868896, + "logps/chosen": -56.833404541015625, + "logps/ref_chosen": -56.76771545410156, + "logps/ref_rejected": -86.64710998535156, + "logps/rejected": -86.60629272460938, + "loss": 1.3973, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.5026620626449585, + "margin_dpo/beta_margin_grad_std": 0.008479107171297073, + "margin_dpo/beta_margin_mean": -0.010650942102074623, + "margin_dpo/loss_margin_mean": -0.10650941729545593, + "margin_dpo/margin_mean": -0.10650989413261414, + "margin_dpo/margin_std": 0.33926206827163696, + "step": 4 + }, + { + "epoch": 0.007342143906020558, + "grad_norm": 89.21666717529297, + "learning_rate": 2.898550724637681e-08, + "logits/chosen": -0.5145087242126465, + "logits/rejected": -0.4707593023777008, + "logps/chosen": -53.772743225097656, + "logps/ref_chosen": -53.859375, + "logps/ref_rejected": -84.14918518066406, + "logps/rejected": -84.13954162597656, + "loss": 1.3789, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.49807578325271606, + "margin_dpo/beta_margin_grad_std": 0.008384998887777328, + "margin_dpo/beta_margin_mean": 0.007699114270508289, + "margin_dpo/loss_margin_mean": 0.07699114084243774, + "margin_dpo/margin_mean": 0.07699081301689148, + "margin_dpo/margin_std": 0.3355046510696411, + "step": 5 + }, + { + "epoch": 0.00881057268722467, + "grad_norm": 92.13448333740234, + "learning_rate": 3.6231884057971014e-08, + "logits/chosen": -0.5163406729698181, + "logits/rejected": -0.475068598985672, + "logps/chosen": -63.05199432373047, + "logps/ref_chosen": -63.007484436035156, + "logps/ref_rejected": -92.64534759521484, + "logps/rejected": -92.68731689453125, + "loss": 1.3869, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.5000631809234619, + "margin_dpo/beta_margin_grad_std": 0.008657192811369896, + "margin_dpo/beta_margin_mean": -0.00025360879953950644, + "margin_dpo/loss_margin_mean": -0.002536088228225708, + "margin_dpo/margin_mean": -0.002536386251449585, + "margin_dpo/margin_std": 0.3463857173919678, + "step": 6 + }, + { + "epoch": 0.010279001468428781, + "grad_norm": 82.59510803222656, + "learning_rate": 4.347826086956521e-08, + "logits/chosen": -0.5038071274757385, + "logits/rejected": -0.46995049715042114, + "logps/chosen": -57.764461517333984, + "logps/ref_chosen": -57.774818420410156, + "logps/ref_rejected": -103.92059326171875, + "logps/rejected": -103.89596557617188, + "loss": 1.3881, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.5003570914268494, + "margin_dpo/beta_margin_grad_std": 0.009314555674791336, + "margin_dpo/beta_margin_mean": -0.0014270306564867496, + "margin_dpo/loss_margin_mean": -0.014270305633544922, + "margin_dpo/margin_mean": -0.014270126819610596, + "margin_dpo/margin_std": 0.37269771099090576, + "step": 7 + }, + { + "epoch": 0.011747430249632892, + "grad_norm": 78.55260467529297, + "learning_rate": 5.0724637681159424e-08, + "logits/chosen": -0.5125592350959778, + "logits/rejected": -0.48697221279144287, + "logps/chosen": -58.67088317871094, + "logps/ref_chosen": -58.716033935546875, + "logps/ref_rejected": -79.3114242553711, + "logps/rejected": -79.30046081542969, + "loss": 1.3832, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4991455674171448, + "margin_dpo/beta_margin_grad_std": 0.008225222118198872, + "margin_dpo/beta_margin_mean": 0.0034186365082859993, + "margin_dpo/loss_margin_mean": 0.034186363220214844, + "margin_dpo/margin_mean": 0.03418651223182678, + "margin_dpo/margin_std": 0.3291283845901489, + "step": 8 + }, + { + "epoch": 0.013215859030837005, + "grad_norm": 84.95925903320312, + "learning_rate": 5.797101449275362e-08, + "logits/chosen": -0.518346905708313, + "logits/rejected": -0.4730910360813141, + "logps/chosen": -69.84893798828125, + "logps/ref_chosen": -69.8668441772461, + "logps/ref_rejected": -99.6026611328125, + "logps/rejected": -99.63265991210938, + "loss": 1.3819, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4988027811050415, + "margin_dpo/beta_margin_grad_std": 0.010282458737492561, + "margin_dpo/beta_margin_mean": 0.004790524020791054, + "margin_dpo/loss_margin_mean": 0.04790523648262024, + "margin_dpo/margin_mean": 0.04790511727333069, + "margin_dpo/margin_std": 0.4114891588687897, + "step": 9 + }, + { + "epoch": 0.014684287812041116, + "grad_norm": 70.49417877197266, + "learning_rate": 6.521739130434782e-08, + "logits/chosen": -0.4861105680465698, + "logits/rejected": -0.44242680072784424, + "logps/chosen": -48.30065155029297, + "logps/ref_chosen": -48.35768508911133, + "logps/ref_rejected": -80.37206268310547, + "logps/rejected": -80.39839172363281, + "loss": 1.3783, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4979170560836792, + "margin_dpo/beta_margin_grad_std": 0.008738012053072453, + "margin_dpo/beta_margin_mean": 0.00833646859973669, + "margin_dpo/loss_margin_mean": 0.08336468040943146, + "margin_dpo/margin_mean": 0.08336484432220459, + "margin_dpo/margin_std": 0.3496713638305664, + "step": 10 + }, + { + "epoch": 0.016152716593245228, + "grad_norm": 68.25067901611328, + "learning_rate": 7.246376811594203e-08, + "logits/chosen": -0.4707266092300415, + "logits/rejected": -0.4461541175842285, + "logps/chosen": -53.01072692871094, + "logps/ref_chosen": -53.01685333251953, + "logps/ref_rejected": -87.78038024902344, + "logps/rejected": -87.81500244140625, + "loss": 1.3825, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4989815652370453, + "margin_dpo/beta_margin_grad_std": 0.008964480832219124, + "margin_dpo/beta_margin_mean": 0.0040746452286839485, + "margin_dpo/loss_margin_mean": 0.040746450424194336, + "margin_dpo/margin_mean": 0.04074642062187195, + "margin_dpo/margin_std": 0.35872533917427063, + "step": 11 + }, + { + "epoch": 0.01762114537444934, + "grad_norm": 99.98358917236328, + "learning_rate": 7.971014492753623e-08, + "logits/chosen": -0.5403286814689636, + "logits/rejected": -0.5041991472244263, + "logps/chosen": -61.795372009277344, + "logps/ref_chosen": -61.80543518066406, + "logps/ref_rejected": -104.85826873779297, + "logps/rejected": -104.8602294921875, + "loss": 1.3855, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4997004270553589, + "margin_dpo/beta_margin_grad_std": 0.009653432294726372, + "margin_dpo/beta_margin_mean": 0.001203133026137948, + "margin_dpo/loss_margin_mean": 0.012031331658363342, + "margin_dpo/margin_mean": 0.012031003832817078, + "margin_dpo/margin_std": 0.3863860070705414, + "step": 12 + }, + { + "epoch": 0.01908957415565345, + "grad_norm": 79.62843322753906, + "learning_rate": 8.695652173913042e-08, + "logits/chosen": -0.47281551361083984, + "logits/rejected": -0.44416356086730957, + "logps/chosen": -64.23121643066406, + "logps/ref_chosen": -64.26036071777344, + "logps/ref_rejected": -87.20307922363281, + "logps/rejected": -87.18215942382812, + "loss": 1.3859, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4997946619987488, + "margin_dpo/beta_margin_grad_std": 0.009803904220461845, + "margin_dpo/beta_margin_mean": 0.0008225085912272334, + "margin_dpo/loss_margin_mean": 0.008225083351135254, + "margin_dpo/margin_mean": 0.008224427700042725, + "margin_dpo/margin_std": 0.39235472679138184, + "step": 13 + }, + { + "epoch": 0.020558002936857563, + "grad_norm": 85.54085540771484, + "learning_rate": 9.420289855072464e-08, + "logits/chosen": -0.4834981858730316, + "logits/rejected": -0.4443725347518921, + "logps/chosen": -58.135520935058594, + "logps/ref_chosen": -58.11021423339844, + "logps/ref_rejected": -104.04708099365234, + "logps/rejected": -104.12353515625, + "loss": 1.3816, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.49872222542762756, + "margin_dpo/beta_margin_grad_std": 0.010205242782831192, + "margin_dpo/beta_margin_mean": 0.00511439424008131, + "margin_dpo/loss_margin_mean": 0.05114394426345825, + "margin_dpo/margin_mean": 0.051144301891326904, + "margin_dpo/margin_std": 0.4083808958530426, + "step": 14 + }, + { + "epoch": 0.022026431718061675, + "grad_norm": 64.28120422363281, + "learning_rate": 1.0144927536231885e-07, + "logits/chosen": -0.505402147769928, + "logits/rejected": -0.4873759150505066, + "logps/chosen": -57.00213623046875, + "logps/ref_chosen": -56.96691131591797, + "logps/ref_rejected": -80.80863952636719, + "logps/rejected": -80.82938385009766, + "loss": 1.3881, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.5003613233566284, + "margin_dpo/beta_margin_grad_std": 0.008744737133383751, + "margin_dpo/beta_margin_mean": -0.0014485123101621866, + "margin_dpo/loss_margin_mean": -0.01448512077331543, + "margin_dpo/margin_mean": -0.01448512077331543, + "margin_dpo/margin_std": 0.34991174936294556, + "step": 15 + }, + { + "epoch": 0.023494860499265784, + "grad_norm": 84.06546020507812, + "learning_rate": 1.0869565217391303e-07, + "logits/chosen": -0.5580030083656311, + "logits/rejected": -0.5204088687896729, + "logps/chosen": -61.74095153808594, + "logps/ref_chosen": -61.739891052246094, + "logps/ref_rejected": -84.36947631835938, + "logps/rejected": -84.42204284667969, + "loss": 1.3816, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4987128973007202, + "margin_dpo/beta_margin_grad_std": 0.009828168898820877, + "margin_dpo/beta_margin_mean": 0.005151033401489258, + "margin_dpo/loss_margin_mean": 0.05151033401489258, + "margin_dpo/margin_mean": 0.051510006189346313, + "margin_dpo/margin_std": 0.3933736979961395, + "step": 16 + }, + { + "epoch": 0.024963289280469897, + "grad_norm": 78.63739013671875, + "learning_rate": 1.1594202898550725e-07, + "logits/chosen": -0.5074384212493896, + "logits/rejected": -0.47103995084762573, + "logps/chosen": -67.64342498779297, + "logps/ref_chosen": -67.71033477783203, + "logps/ref_rejected": -85.37865447998047, + "logps/rejected": -85.41255187988281, + "loss": 1.3766, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.497480571269989, + "margin_dpo/beta_margin_grad_std": 0.00974523089826107, + "margin_dpo/beta_margin_mean": 0.010080328211188316, + "margin_dpo/loss_margin_mean": 0.10080328583717346, + "margin_dpo/margin_mean": 0.10080331563949585, + "margin_dpo/margin_std": 0.39002037048339844, + "step": 17 + }, + { + "epoch": 0.02643171806167401, + "grad_norm": 82.34278869628906, + "learning_rate": 1.2318840579710146e-07, + "logits/chosen": -0.48814651370048523, + "logits/rejected": -0.4320324659347534, + "logps/chosen": -47.713279724121094, + "logps/ref_chosen": -47.7394905090332, + "logps/ref_rejected": -75.4722900390625, + "logps/rejected": -75.48577880859375, + "loss": 1.3826, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4990071952342987, + "margin_dpo/beta_margin_grad_std": 0.008132295683026314, + "margin_dpo/beta_margin_mean": 0.003970235586166382, + "margin_dpo/loss_margin_mean": 0.03970235586166382, + "margin_dpo/margin_mean": 0.03970211744308472, + "margin_dpo/margin_std": 0.32538339495658875, + "step": 18 + }, + { + "epoch": 0.027900146842878122, + "grad_norm": 73.4638900756836, + "learning_rate": 1.3043478260869563e-07, + "logits/chosen": -0.48973095417022705, + "logits/rejected": -0.4396272301673889, + "logps/chosen": -70.17350769042969, + "logps/ref_chosen": -70.20535278320312, + "logps/ref_rejected": -89.75758361816406, + "logps/rejected": -89.85565948486328, + "loss": 1.3737, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.49675452709198, + "margin_dpo/beta_margin_grad_std": 0.009917546063661575, + "margin_dpo/beta_margin_mean": 0.012991649098694324, + "margin_dpo/loss_margin_mean": 0.1299164891242981, + "margin_dpo/margin_mean": 0.12991660833358765, + "margin_dpo/margin_std": 0.3970108926296234, + "step": 19 + }, + { + "epoch": 0.02936857562408223, + "grad_norm": 74.19491577148438, + "learning_rate": 1.3768115942028986e-07, + "logits/chosen": -0.5667568445205688, + "logits/rejected": -0.5119162797927856, + "logps/chosen": -50.822715759277344, + "logps/ref_chosen": -50.80324172973633, + "logps/ref_rejected": -78.8233413696289, + "logps/rejected": -78.87236785888672, + "loss": 1.3836, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4992612898349762, + "margin_dpo/beta_margin_grad_std": 0.007525566965341568, + "margin_dpo/beta_margin_mean": 0.0029547633603215218, + "margin_dpo/loss_margin_mean": 0.02954763174057007, + "margin_dpo/margin_mean": 0.029547661542892456, + "margin_dpo/margin_std": 0.3011046051979065, + "step": 20 + }, + { + "epoch": 0.030837004405286344, + "grad_norm": 77.03598022460938, + "learning_rate": 1.4492753623188405e-07, + "logits/chosen": -0.5037728548049927, + "logits/rejected": -0.48049020767211914, + "logps/chosen": -50.014862060546875, + "logps/ref_chosen": -50.063018798828125, + "logps/ref_rejected": -77.86878967285156, + "logps/rejected": -78.02366638183594, + "loss": 1.3664, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4949270188808441, + "margin_dpo/beta_margin_grad_std": 0.008798542432487011, + "margin_dpo/beta_margin_mean": 0.020303059369325638, + "margin_dpo/loss_margin_mean": 0.20303058624267578, + "margin_dpo/margin_mean": 0.20303112268447876, + "margin_dpo/margin_std": 0.3521846532821655, + "step": 21 + }, + { + "epoch": 0.032305433186490456, + "grad_norm": 84.57589721679688, + "learning_rate": 1.5217391304347825e-07, + "logits/chosen": -0.49148398637771606, + "logits/rejected": -0.44818878173828125, + "logps/chosen": -58.99713897705078, + "logps/ref_chosen": -59.05763626098633, + "logps/ref_rejected": -97.50466918945312, + "logps/rejected": -97.65492248535156, + "loss": 1.3657, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4947338104248047, + "margin_dpo/beta_margin_grad_std": 0.009125478565692902, + "margin_dpo/beta_margin_mean": 0.021074719727039337, + "margin_dpo/loss_margin_mean": 0.21074718236923218, + "margin_dpo/margin_mean": 0.210746169090271, + "margin_dpo/margin_std": 0.36520200967788696, + "step": 22 + }, + { + "epoch": 0.033773861967694566, + "grad_norm": 80.40442657470703, + "learning_rate": 1.5942028985507245e-07, + "logits/chosen": -0.4710449278354645, + "logits/rejected": -0.44750112295150757, + "logps/chosen": -60.034095764160156, + "logps/ref_chosen": -60.07769775390625, + "logps/ref_rejected": -81.1395492553711, + "logps/rejected": -81.3127212524414, + "loss": 1.3652, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.49458569288253784, + "margin_dpo/beta_margin_grad_std": 0.010714245960116386, + "margin_dpo/beta_margin_mean": 0.021676737815141678, + "margin_dpo/loss_margin_mean": 0.21676737070083618, + "margin_dpo/margin_mean": 0.21676787734031677, + "margin_dpo/margin_std": 0.42905572056770325, + "step": 23 + }, + { + "epoch": 0.03524229074889868, + "grad_norm": 86.11105346679688, + "learning_rate": 1.6666666666666665e-07, + "logits/chosen": -0.515487790107727, + "logits/rejected": -0.49895963072776794, + "logps/chosen": -44.28882598876953, + "logps/ref_chosen": -44.29103469848633, + "logps/ref_rejected": -99.12521362304688, + "logps/rejected": -99.3304443359375, + "loss": 1.3661, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.49481743574142456, + "margin_dpo/beta_margin_grad_std": 0.01028534211218357, + "margin_dpo/beta_margin_mean": 0.020744048058986664, + "margin_dpo/loss_margin_mean": 0.20744048058986664, + "margin_dpo/margin_mean": 0.20744094252586365, + "margin_dpo/margin_std": 0.4117741584777832, + "step": 24 + }, + { + "epoch": 0.03671071953010279, + "grad_norm": 74.07949829101562, + "learning_rate": 1.7391304347826085e-07, + "logits/chosen": -0.5136522650718689, + "logits/rejected": -0.4842616319656372, + "logps/chosen": -52.5118408203125, + "logps/ref_chosen": -52.537052154541016, + "logps/ref_rejected": -89.34219360351562, + "logps/rejected": -89.51565551757812, + "loss": 1.367, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4950360655784607, + "margin_dpo/beta_margin_grad_std": 0.010537989437580109, + "margin_dpo/beta_margin_mean": 0.019867265596985817, + "margin_dpo/loss_margin_mean": 0.19867265224456787, + "margin_dpo/margin_mean": 0.1986721158027649, + "margin_dpo/margin_std": 0.4217980206012726, + "step": 25 + }, + { + "epoch": 0.0381791483113069, + "grad_norm": 87.3241195678711, + "learning_rate": 1.8115942028985507e-07, + "logits/chosen": -0.5391855239868164, + "logits/rejected": -0.5075402855873108, + "logps/chosen": -53.83518981933594, + "logps/ref_chosen": -53.92280578613281, + "logps/ref_rejected": -103.35971069335938, + "logps/rejected": -103.70204162597656, + "loss": 1.3445, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4892633557319641, + "margin_dpo/beta_margin_grad_std": 0.013717170804738998, + "margin_dpo/beta_margin_mean": 0.04299398139119148, + "margin_dpo/loss_margin_mean": 0.42993980646133423, + "margin_dpo/margin_mean": 0.42994067072868347, + "margin_dpo/margin_std": 0.5494698286056519, + "step": 26 + }, + { + "epoch": 0.039647577092511016, + "grad_norm": 93.3059310913086, + "learning_rate": 1.8840579710144927e-07, + "logits/chosen": -0.5076569318771362, + "logits/rejected": -0.47098520398139954, + "logps/chosen": -42.758522033691406, + "logps/ref_chosen": -42.898529052734375, + "logps/ref_rejected": -98.72420501708984, + "logps/rejected": -99.09854125976562, + "loss": 1.3364, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4871601164340973, + "margin_dpo/beta_margin_grad_std": 0.014479693956673145, + "margin_dpo/beta_margin_mean": 0.051434241235256195, + "margin_dpo/loss_margin_mean": 0.5143424272537231, + "margin_dpo/margin_mean": 0.514342188835144, + "margin_dpo/margin_std": 0.5809046626091003, + "step": 27 + }, + { + "epoch": 0.041116005873715125, + "grad_norm": 75.3113784790039, + "learning_rate": 1.9565217391304347e-07, + "logits/chosen": -0.5132657289505005, + "logits/rejected": -0.4586002230644226, + "logps/chosen": -60.55534362792969, + "logps/ref_chosen": -60.55650329589844, + "logps/ref_rejected": -91.40111541748047, + "logps/rejected": -91.69779205322266, + "loss": 1.3575, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.492561399936676, + "margin_dpo/beta_margin_grad_std": 0.013777370564639568, + "margin_dpo/beta_margin_mean": 0.029783397912979126, + "margin_dpo/loss_margin_mean": 0.29783397912979126, + "margin_dpo/margin_mean": 0.29783421754837036, + "margin_dpo/margin_std": 0.5516640543937683, + "step": 28 + }, + { + "epoch": 0.042584434654919234, + "grad_norm": 90.50589752197266, + "learning_rate": 2.028985507246377e-07, + "logits/chosen": -0.5619853734970093, + "logits/rejected": -0.5164209008216858, + "logps/chosen": -57.673362731933594, + "logps/ref_chosen": -57.80778503417969, + "logps/ref_rejected": -97.39434814453125, + "logps/rejected": -97.85377502441406, + "loss": 1.3285, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4851696789264679, + "margin_dpo/beta_margin_grad_std": 0.01294540986418724, + "margin_dpo/beta_margin_mean": 0.0593840591609478, + "margin_dpo/loss_margin_mean": 0.5938405990600586, + "margin_dpo/margin_mean": 0.5938413739204407, + "margin_dpo/margin_std": 0.5187057256698608, + "step": 29 + }, + { + "epoch": 0.04405286343612335, + "grad_norm": 87.18180847167969, + "learning_rate": 2.1014492753623187e-07, + "logits/chosen": -0.5116697549819946, + "logits/rejected": -0.4816800057888031, + "logps/chosen": -52.425750732421875, + "logps/ref_chosen": -52.57737350463867, + "logps/ref_rejected": -98.48921203613281, + "logps/rejected": -99.05937957763672, + "loss": 1.3165, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4819870591163635, + "margin_dpo/beta_margin_grad_std": 0.016035309061408043, + "margin_dpo/beta_margin_mean": 0.07217944413423538, + "margin_dpo/loss_margin_mean": 0.7217944860458374, + "margin_dpo/margin_mean": 0.7217941880226135, + "margin_dpo/margin_std": 0.6435875296592712, + "step": 30 + }, + { + "epoch": 0.04552129221732746, + "grad_norm": 67.85016632080078, + "learning_rate": 2.1739130434782607e-07, + "logits/chosen": -0.5148423910140991, + "logits/rejected": -0.4710330367088318, + "logps/chosen": -63.68492889404297, + "logps/ref_chosen": -63.806922912597656, + "logps/ref_rejected": -72.89400482177734, + "logps/rejected": -73.29931640625, + "loss": 1.3354, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4868418872356415, + "margin_dpo/beta_margin_grad_std": 0.016791202127933502, + "margin_dpo/beta_margin_mean": 0.052730634808540344, + "margin_dpo/loss_margin_mean": 0.527306318283081, + "margin_dpo/margin_mean": 0.527306318283081, + "margin_dpo/margin_std": 0.6738239526748657, + "step": 31 + }, + { + "epoch": 0.04698972099853157, + "grad_norm": 81.52291107177734, + "learning_rate": 2.2463768115942027e-07, + "logits/chosen": -0.5098748207092285, + "logits/rejected": -0.46841973066329956, + "logps/chosen": -62.55455017089844, + "logps/ref_chosen": -62.739524841308594, + "logps/ref_rejected": -89.3175048828125, + "logps/rejected": -89.87690734863281, + "loss": 1.3153, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.48145878314971924, + "margin_dpo/beta_margin_grad_std": 0.022345291450619698, + "margin_dpo/beta_margin_mean": 0.07443846762180328, + "margin_dpo/loss_margin_mean": 0.7443846464157104, + "margin_dpo/margin_mean": 0.7443850040435791, + "margin_dpo/margin_std": 0.9011361598968506, + "step": 32 + }, + { + "epoch": 0.048458149779735685, + "grad_norm": 72.76732635498047, + "learning_rate": 2.318840579710145e-07, + "logits/chosen": -0.5056596994400024, + "logits/rejected": -0.47998249530792236, + "logps/chosen": -53.159671783447266, + "logps/ref_chosen": -53.26097106933594, + "logps/ref_rejected": -87.8851318359375, + "logps/rejected": -88.34671020507812, + "loss": 1.3315, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4859437346458435, + "margin_dpo/beta_margin_grad_std": 0.013436902314424515, + "margin_dpo/beta_margin_mean": 0.056287482380867004, + "margin_dpo/loss_margin_mean": 0.5628747940063477, + "margin_dpo/margin_mean": 0.5628749132156372, + "margin_dpo/margin_std": 0.5385845899581909, + "step": 33 + }, + { + "epoch": 0.049926578560939794, + "grad_norm": 77.6261978149414, + "learning_rate": 2.391304347826087e-07, + "logits/chosen": -0.49515533447265625, + "logits/rejected": -0.4777703285217285, + "logps/chosen": -50.73601531982422, + "logps/ref_chosen": -50.81732940673828, + "logps/ref_rejected": -101.92184448242188, + "logps/rejected": -102.61337280273438, + "loss": 1.3124, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.48073524236679077, + "margin_dpo/beta_margin_grad_std": 0.02180512621998787, + "margin_dpo/beta_margin_mean": 0.07728321105241776, + "margin_dpo/loss_margin_mean": 0.7728320360183716, + "margin_dpo/margin_mean": 0.7728322744369507, + "margin_dpo/margin_std": 0.8760267496109009, + "step": 34 + }, + { + "epoch": 0.0513950073421439, + "grad_norm": 82.47791290283203, + "learning_rate": 2.463768115942029e-07, + "logits/chosen": -0.5227484107017517, + "logits/rejected": -0.48601728677749634, + "logps/chosen": -50.88093948364258, + "logps/ref_chosen": -51.02449035644531, + "logps/ref_rejected": -106.82443237304688, + "logps/rejected": -107.93114471435547, + "loss": 1.2685, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4689541757106781, + "margin_dpo/beta_margin_grad_std": 0.027965568006038666, + "margin_dpo/beta_margin_mean": 0.12502656877040863, + "margin_dpo/loss_margin_mean": 1.2502657175064087, + "margin_dpo/margin_mean": 1.2502658367156982, + "margin_dpo/margin_std": 1.1440428495407104, + "step": 35 + }, + { + "epoch": 0.05286343612334802, + "grad_norm": 72.95713806152344, + "learning_rate": 2.536231884057971e-07, + "logits/chosen": -0.563947319984436, + "logits/rejected": -0.5279806852340698, + "logps/chosen": -51.93867492675781, + "logps/ref_chosen": -51.991493225097656, + "logps/ref_rejected": -86.04061889648438, + "logps/rejected": -87.13275146484375, + "loss": 1.2793, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4715506434440613, + "margin_dpo/beta_margin_grad_std": 0.031429655849933624, + "margin_dpo/beta_margin_mean": 0.11449373513460159, + "margin_dpo/loss_margin_mean": 1.14493727684021, + "margin_dpo/margin_mean": 1.144936442375183, + "margin_dpo/margin_std": 1.2692325115203857, + "step": 36 + }, + { + "epoch": 0.05433186490455213, + "grad_norm": 61.87527084350586, + "learning_rate": 2.6086956521739126e-07, + "logits/chosen": -0.5000085234642029, + "logits/rejected": -0.4554196000099182, + "logps/chosen": -62.77561950683594, + "logps/ref_chosen": -62.807106018066406, + "logps/ref_rejected": -77.89507293701172, + "logps/rejected": -78.88422393798828, + "loss": 1.2909, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.47465330362319946, + "margin_dpo/beta_margin_grad_std": 0.0310398917645216, + "margin_dpo/beta_margin_mean": 0.10206404328346252, + "margin_dpo/loss_margin_mean": 1.0206403732299805, + "margin_dpo/margin_mean": 1.0206403732299805, + "margin_dpo/margin_std": 1.2562531232833862, + "step": 37 + }, + { + "epoch": 0.055800293685756244, + "grad_norm": 69.35832977294922, + "learning_rate": 2.681159420289855e-07, + "logits/chosen": -0.5131621360778809, + "logits/rejected": -0.4803985357284546, + "logps/chosen": -48.25373077392578, + "logps/ref_chosen": -48.39051818847656, + "logps/ref_rejected": -97.91244506835938, + "logps/rejected": -99.13421630859375, + "loss": 1.262, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.46648678183555603, + "margin_dpo/beta_margin_grad_std": 0.0405726283788681, + "margin_dpo/beta_margin_mean": 0.13585661351680756, + "margin_dpo/loss_margin_mean": 1.3585660457611084, + "margin_dpo/margin_mean": 1.3585660457611084, + "margin_dpo/margin_std": 1.6711539030075073, + "step": 38 + }, + { + "epoch": 0.05726872246696035, + "grad_norm": 73.56781768798828, + "learning_rate": 2.753623188405797e-07, + "logits/chosen": -0.5736282467842102, + "logits/rejected": -0.534826934337616, + "logps/chosen": -50.66197204589844, + "logps/ref_chosen": -50.75046920776367, + "logps/ref_rejected": -78.56951141357422, + "logps/rejected": -80.15695190429688, + "loss": 1.2309, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.458440363407135, + "margin_dpo/beta_margin_grad_std": 0.035203345119953156, + "margin_dpo/beta_margin_mean": 0.1675935983657837, + "margin_dpo/loss_margin_mean": 1.675935983657837, + "margin_dpo/margin_mean": 1.6759363412857056, + "margin_dpo/margin_std": 1.4285030364990234, + "step": 39 + }, + { + "epoch": 0.05873715124816446, + "grad_norm": 60.51735305786133, + "learning_rate": 2.8260869565217386e-07, + "logits/chosen": -0.527452826499939, + "logits/rejected": -0.4978986382484436, + "logps/chosen": -57.774688720703125, + "logps/ref_chosen": -57.985069274902344, + "logps/ref_rejected": -74.30007934570312, + "logps/rejected": -75.63487243652344, + "loss": 1.2454, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.46187901496887207, + "margin_dpo/beta_margin_grad_std": 0.041983917355537415, + "margin_dpo/beta_margin_mean": 0.15451756119728088, + "margin_dpo/loss_margin_mean": 1.5451757907867432, + "margin_dpo/margin_mean": 1.5451761484146118, + "margin_dpo/margin_std": 1.721125602722168, + "step": 40 + }, + { + "epoch": 0.06020558002936858, + "grad_norm": 68.02806091308594, + "learning_rate": 2.898550724637681e-07, + "logits/chosen": -0.522682785987854, + "logits/rejected": -0.4852331280708313, + "logps/chosen": -62.648956298828125, + "logps/ref_chosen": -62.69581604003906, + "logps/ref_rejected": -97.02352905273438, + "logps/rejected": -98.86910247802734, + "loss": 1.216, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4534452557563782, + "margin_dpo/beta_margin_grad_std": 0.04771146923303604, + "margin_dpo/beta_margin_mean": 0.1892436146736145, + "margin_dpo/loss_margin_mean": 1.892436146736145, + "margin_dpo/margin_mean": 1.8924363851547241, + "margin_dpo/margin_std": 1.9684252738952637, + "step": 41 + }, + { + "epoch": 0.06167400881057269, + "grad_norm": 79.33026123046875, + "learning_rate": 2.971014492753623e-07, + "logits/chosen": -0.5209932923316956, + "logits/rejected": -0.4742482602596283, + "logps/chosen": -58.71235275268555, + "logps/ref_chosen": -58.96642303466797, + "logps/ref_rejected": -109.90837097167969, + "logps/rejected": -112.24879455566406, + "loss": 1.1582, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.43676841259002686, + "margin_dpo/beta_margin_grad_std": 0.05766534060239792, + "margin_dpo/beta_margin_mean": 0.25944995880126953, + "margin_dpo/loss_margin_mean": 2.5944995880126953, + "margin_dpo/margin_mean": 2.5944998264312744, + "margin_dpo/margin_std": 2.435802936553955, + "step": 42 + }, + { + "epoch": 0.0631424375917768, + "grad_norm": 71.26874542236328, + "learning_rate": 3.043478260869565e-07, + "logits/chosen": -0.5625420808792114, + "logits/rejected": -0.538593590259552, + "logps/chosen": -53.63534927368164, + "logps/ref_chosen": -54.15599822998047, + "logps/ref_rejected": -96.48019409179688, + "logps/rejected": -98.510009765625, + "loss": 1.1584, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4374551773071289, + "margin_dpo/beta_margin_grad_std": 0.05021943897008896, + "margin_dpo/beta_margin_mean": 0.2550460994243622, + "margin_dpo/loss_margin_mean": 2.5504610538482666, + "margin_dpo/margin_mean": 2.5504608154296875, + "margin_dpo/margin_std": 2.1022145748138428, + "step": 43 + }, + { + "epoch": 0.06461086637298091, + "grad_norm": 78.6224136352539, + "learning_rate": 3.115942028985507e-07, + "logits/chosen": -0.46302688121795654, + "logits/rejected": -0.443297415971756, + "logps/chosen": -49.88066864013672, + "logps/ref_chosen": -50.07849884033203, + "logps/ref_rejected": -108.78376007080078, + "logps/rejected": -111.412841796875, + "loss": 1.1358, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.43084633350372314, + "margin_dpo/beta_margin_grad_std": 0.053701795637607574, + "margin_dpo/beta_margin_mean": 0.2826906740665436, + "margin_dpo/loss_margin_mean": 2.826906681060791, + "margin_dpo/margin_mean": 2.826906442642212, + "margin_dpo/margin_std": 2.2519941329956055, + "step": 44 + }, + { + "epoch": 0.06607929515418502, + "grad_norm": 61.787879943847656, + "learning_rate": 3.188405797101449e-07, + "logits/chosen": -0.4846153259277344, + "logits/rejected": -0.47198039293289185, + "logps/chosen": -48.231903076171875, + "logps/ref_chosen": -48.41493225097656, + "logps/ref_rejected": -77.93643188476562, + "logps/rejected": -80.11711883544922, + "loss": 1.181, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4426528513431549, + "margin_dpo/beta_margin_grad_std": 0.061857253313064575, + "margin_dpo/beta_margin_mean": 0.23637181520462036, + "margin_dpo/loss_margin_mean": 2.363718032836914, + "margin_dpo/margin_mean": 2.363717555999756, + "margin_dpo/margin_std": 2.6244254112243652, + "step": 45 + }, + { + "epoch": 0.06754772393538913, + "grad_norm": 69.09931945800781, + "learning_rate": 3.260869565217391e-07, + "logits/chosen": -0.5141834020614624, + "logits/rejected": -0.4625147581100464, + "logps/chosen": -55.74999237060547, + "logps/ref_chosen": -55.999427795410156, + "logps/ref_rejected": -95.652587890625, + "logps/rejected": -98.34117126464844, + "loss": 1.1376, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4294002056121826, + "margin_dpo/beta_margin_grad_std": 0.07263202965259552, + "margin_dpo/beta_margin_mean": 0.29380178451538086, + "margin_dpo/loss_margin_mean": 2.9380178451538086, + "margin_dpo/margin_mean": 2.9380173683166504, + "margin_dpo/margin_std": 3.154534339904785, + "step": 46 + }, + { + "epoch": 0.06901615271659324, + "grad_norm": 65.72870635986328, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -0.58119797706604, + "logits/rejected": -0.5290583372116089, + "logps/chosen": -57.503753662109375, + "logps/ref_chosen": -57.92607879638672, + "logps/ref_rejected": -94.67920684814453, + "logps/rejected": -97.23452758789062, + "loss": 1.1285, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4276430606842041, + "margin_dpo/beta_margin_grad_std": 0.0631469339132309, + "margin_dpo/beta_margin_mean": 0.29776421189308167, + "margin_dpo/loss_margin_mean": 2.977642059326172, + "margin_dpo/margin_mean": 2.977642297744751, + "margin_dpo/margin_std": 2.6595559120178223, + "step": 47 + }, + { + "epoch": 0.07048458149779736, + "grad_norm": 72.27952575683594, + "learning_rate": 3.4057971014492755e-07, + "logits/chosen": -0.5920270681381226, + "logits/rejected": -0.5339563488960266, + "logps/chosen": -57.16640853881836, + "logps/ref_chosen": -57.188072204589844, + "logps/ref_rejected": -88.0166015625, + "logps/rejected": -91.12606048583984, + "loss": 1.1231, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4243152141571045, + "margin_dpo/beta_margin_grad_std": 0.07117132842540741, + "margin_dpo/beta_margin_mean": 0.3131124675273895, + "margin_dpo/loss_margin_mean": 3.131124496459961, + "margin_dpo/margin_mean": 3.131124496459961, + "margin_dpo/margin_std": 3.016913890838623, + "step": 48 + }, + { + "epoch": 0.07195301027900147, + "grad_norm": 63.71873092651367, + "learning_rate": 3.478260869565217e-07, + "logits/chosen": -0.5536686182022095, + "logits/rejected": -0.49566274881362915, + "logps/chosen": -61.38921356201172, + "logps/ref_chosen": -61.685264587402344, + "logps/ref_rejected": -83.76747131347656, + "logps/rejected": -87.34431457519531, + "loss": 1.074, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.40908271074295044, + "margin_dpo/beta_margin_grad_std": 0.08748139441013336, + "margin_dpo/beta_margin_mean": 0.387288898229599, + "margin_dpo/loss_margin_mean": 3.8728890419006348, + "margin_dpo/margin_mean": 3.8728885650634766, + "margin_dpo/margin_std": 3.9563791751861572, + "step": 49 + }, + { + "epoch": 0.07342143906020558, + "grad_norm": 62.7824592590332, + "learning_rate": 3.5507246376811595e-07, + "logits/chosen": -0.5670984387397766, + "logits/rejected": -0.5319196581840515, + "logps/chosen": -58.91963195800781, + "logps/ref_chosen": -58.72413635253906, + "logps/ref_rejected": -96.35814666748047, + "logps/rejected": -100.67803955078125, + "loss": 1.0538, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.40261325240135193, + "margin_dpo/beta_margin_grad_std": 0.09164208173751831, + "margin_dpo/beta_margin_mean": 0.4124397039413452, + "margin_dpo/loss_margin_mean": 4.124396800994873, + "margin_dpo/margin_mean": 4.124396800994873, + "margin_dpo/margin_std": 4.026268005371094, + "step": 50 + }, + { + "epoch": 0.07488986784140969, + "grad_norm": 52.12064743041992, + "learning_rate": 3.6231884057971015e-07, + "logits/chosen": -0.5369248390197754, + "logits/rejected": -0.5046299695968628, + "logps/chosen": -61.671791076660156, + "logps/ref_chosen": -61.3736686706543, + "logps/ref_rejected": -76.00199890136719, + "logps/rejected": -80.37809753417969, + "loss": 1.0821, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.4077322781085968, + "margin_dpo/beta_margin_grad_std": 0.11022845655679703, + "margin_dpo/beta_margin_mean": 0.4077974557876587, + "margin_dpo/loss_margin_mean": 4.077974796295166, + "margin_dpo/margin_mean": 4.077974319458008, + "margin_dpo/margin_std": 5.209657669067383, + "step": 51 + }, + { + "epoch": 0.0763582966226138, + "grad_norm": 58.820133209228516, + "learning_rate": 3.695652173913043e-07, + "logits/chosen": -0.5732629299163818, + "logits/rejected": -0.5190708637237549, + "logps/chosen": -51.953399658203125, + "logps/ref_chosen": -52.33735656738281, + "logps/ref_rejected": -79.97391510009766, + "logps/rejected": -85.85843658447266, + "loss": 0.9142, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.3579610586166382, + "margin_dpo/beta_margin_grad_std": 0.10757434368133545, + "margin_dpo/beta_margin_mean": 0.6268481016159058, + "margin_dpo/loss_margin_mean": 6.26848030090332, + "margin_dpo/margin_mean": 6.2684807777404785, + "margin_dpo/margin_std": 5.199737548828125, + "step": 52 + }, + { + "epoch": 0.07782672540381791, + "grad_norm": 57.97807693481445, + "learning_rate": 3.7681159420289855e-07, + "logits/chosen": -0.618739128112793, + "logits/rejected": -0.5973125100135803, + "logps/chosen": -53.48461151123047, + "logps/ref_chosen": -53.31465530395508, + "logps/ref_rejected": -91.7835922241211, + "logps/rejected": -98.30101013183594, + "loss": 0.9439, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.36459940671920776, + "margin_dpo/beta_margin_grad_std": 0.11843107640743256, + "margin_dpo/beta_margin_mean": 0.6347463130950928, + "margin_dpo/loss_margin_mean": 6.347463130950928, + "margin_dpo/margin_mean": 6.347464084625244, + "margin_dpo/margin_std": 6.299587726593018, + "step": 53 + }, + { + "epoch": 0.07929515418502203, + "grad_norm": 58.53452682495117, + "learning_rate": 3.8405797101449274e-07, + "logits/chosen": -0.6002248525619507, + "logits/rejected": -0.5472081303596497, + "logps/chosen": -51.132781982421875, + "logps/ref_chosen": -50.68865966796875, + "logps/ref_rejected": -91.71539306640625, + "logps/rejected": -97.54826354980469, + "loss": 0.9754, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.37753698229789734, + "margin_dpo/beta_margin_grad_std": 0.10520176589488983, + "margin_dpo/beta_margin_mean": 0.5388752818107605, + "margin_dpo/loss_margin_mean": 5.3887529373168945, + "margin_dpo/margin_mean": 5.3887529373168945, + "margin_dpo/margin_std": 5.09660530090332, + "step": 54 + }, + { + "epoch": 0.08076358296622614, + "grad_norm": 53.50847244262695, + "learning_rate": 3.9130434782608694e-07, + "logits/chosen": -0.6379266977310181, + "logits/rejected": -0.5748265981674194, + "logps/chosen": -63.582801818847656, + "logps/ref_chosen": -62.615234375, + "logps/ref_rejected": -88.99349975585938, + "logps/rejected": -96.49349212646484, + "loss": 0.9552, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.36137956380844116, + "margin_dpo/beta_margin_grad_std": 0.1454969346523285, + "margin_dpo/beta_margin_mean": 0.6532418727874756, + "margin_dpo/loss_margin_mean": 6.532418727874756, + "margin_dpo/margin_mean": 6.532418251037598, + "margin_dpo/margin_std": 7.533010482788086, + "step": 55 + }, + { + "epoch": 0.08223201174743025, + "grad_norm": 48.04698944091797, + "learning_rate": 3.9855072463768114e-07, + "logits/chosen": -0.6322102546691895, + "logits/rejected": -0.5908021330833435, + "logps/chosen": -58.65277862548828, + "logps/ref_chosen": -57.93273162841797, + "logps/ref_rejected": -94.1744384765625, + "logps/rejected": -101.14324951171875, + "loss": 0.9724, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.3670324981212616, + "margin_dpo/beta_margin_grad_std": 0.14493967592716217, + "margin_dpo/beta_margin_mean": 0.6248764991760254, + "margin_dpo/loss_margin_mean": 6.248764991760254, + "margin_dpo/margin_mean": 6.248764991760254, + "margin_dpo/margin_std": 7.392797470092773, + "step": 56 + }, + { + "epoch": 0.08370044052863436, + "grad_norm": 53.7747688293457, + "learning_rate": 4.057971014492754e-07, + "logits/chosen": -0.5740865468978882, + "logits/rejected": -0.5456082820892334, + "logps/chosen": -71.21261596679688, + "logps/ref_chosen": -70.49528503417969, + "logps/ref_rejected": -95.56546020507812, + "logps/rejected": -103.3371353149414, + "loss": 0.8958, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.3454797565937042, + "margin_dpo/beta_margin_grad_std": 0.1326553225517273, + "margin_dpo/beta_margin_mean": 0.7054347991943359, + "margin_dpo/loss_margin_mean": 7.054348468780518, + "margin_dpo/margin_mean": 7.054348945617676, + "margin_dpo/margin_std": 6.582326889038086, + "step": 57 + }, + { + "epoch": 0.08516886930983847, + "grad_norm": 58.93936538696289, + "learning_rate": 4.1304347826086954e-07, + "logits/chosen": -0.6123115420341492, + "logits/rejected": -0.5382078886032104, + "logps/chosen": -63.20277786254883, + "logps/ref_chosen": -62.13294219970703, + "logps/ref_rejected": -84.61729431152344, + "logps/rejected": -93.39222717285156, + "loss": 0.8958, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.34207093715667725, + "margin_dpo/beta_margin_grad_std": 0.15293042361736298, + "margin_dpo/beta_margin_mean": 0.7705095410346985, + "margin_dpo/loss_margin_mean": 7.7050957679748535, + "margin_dpo/margin_mean": 7.705096244812012, + "margin_dpo/margin_std": 8.273210525512695, + "step": 58 + }, + { + "epoch": 0.08663729809104258, + "grad_norm": 55.383514404296875, + "learning_rate": 4.2028985507246374e-07, + "logits/chosen": -0.6397849321365356, + "logits/rejected": -0.6004974842071533, + "logps/chosen": -53.41858673095703, + "logps/ref_chosen": -51.932525634765625, + "logps/ref_rejected": -88.88520050048828, + "logps/rejected": -98.85914611816406, + "loss": 0.857, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.32870835065841675, + "margin_dpo/beta_margin_grad_std": 0.15062686800956726, + "margin_dpo/beta_margin_mean": 0.8487890958786011, + "margin_dpo/loss_margin_mean": 8.48789119720459, + "margin_dpo/margin_mean": 8.487890243530273, + "margin_dpo/margin_std": 8.594100952148438, + "step": 59 + }, + { + "epoch": 0.0881057268722467, + "grad_norm": 63.981693267822266, + "learning_rate": 4.2753623188405794e-07, + "logits/chosen": -0.6227731704711914, + "logits/rejected": -0.5636199712753296, + "logps/chosen": -63.575462341308594, + "logps/ref_chosen": -60.94218444824219, + "logps/ref_rejected": -85.39340209960938, + "logps/rejected": -94.78590393066406, + "loss": 0.9511, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.35332685708999634, + "margin_dpo/beta_margin_grad_std": 0.1562734991312027, + "margin_dpo/beta_margin_mean": 0.6759233474731445, + "margin_dpo/loss_margin_mean": 6.759233474731445, + "margin_dpo/margin_mean": 6.759233474731445, + "margin_dpo/margin_std": 7.703272819519043, + "step": 60 + }, + { + "epoch": 0.08957415565345081, + "grad_norm": 54.136070251464844, + "learning_rate": 4.3478260869565214e-07, + "logits/chosen": -0.589980959892273, + "logits/rejected": -0.5553174018859863, + "logps/chosen": -62.088226318359375, + "logps/ref_chosen": -60.633522033691406, + "logps/ref_rejected": -89.85249328613281, + "logps/rejected": -99.72574615478516, + "loss": 0.9274, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.34447312355041504, + "margin_dpo/beta_margin_grad_std": 0.17768457531929016, + "margin_dpo/beta_margin_mean": 0.8418547511100769, + "margin_dpo/loss_margin_mean": 8.418547630310059, + "margin_dpo/margin_mean": 8.418546676635742, + "margin_dpo/margin_std": 11.459321975708008, + "step": 61 + }, + { + "epoch": 0.09104258443465492, + "grad_norm": 56.41756057739258, + "learning_rate": 4.420289855072464e-07, + "logits/chosen": -0.6043162941932678, + "logits/rejected": -0.5698095560073853, + "logps/chosen": -57.790740966796875, + "logps/ref_chosen": -56.15077209472656, + "logps/ref_rejected": -75.56619262695312, + "logps/rejected": -83.44951629638672, + "loss": 0.9972, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.37032824754714966, + "margin_dpo/beta_margin_grad_std": 0.15750956535339355, + "margin_dpo/beta_margin_mean": 0.624334990978241, + "margin_dpo/loss_margin_mean": 6.243350028991699, + "margin_dpo/margin_mean": 6.243350028991699, + "margin_dpo/margin_std": 8.166690826416016, + "step": 62 + }, + { + "epoch": 0.09251101321585903, + "grad_norm": 56.643470764160156, + "learning_rate": 4.4927536231884053e-07, + "logits/chosen": -0.5830048322677612, + "logits/rejected": -0.5372258424758911, + "logps/chosen": -75.84552001953125, + "logps/ref_chosen": -73.14739227294922, + "logps/ref_rejected": -97.61006164550781, + "logps/rejected": -108.71710205078125, + "loss": 0.8745, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.3277238607406616, + "margin_dpo/beta_margin_grad_std": 0.1626659482717514, + "margin_dpo/beta_margin_mean": 0.84089195728302, + "margin_dpo/loss_margin_mean": 8.408918380737305, + "margin_dpo/margin_mean": 8.408919334411621, + "margin_dpo/margin_std": 8.86873722076416, + "step": 63 + }, + { + "epoch": 0.09397944199706314, + "grad_norm": 51.555030822753906, + "learning_rate": 4.5652173913043473e-07, + "logits/chosen": -0.6049680113792419, + "logits/rejected": -0.5739491581916809, + "logps/chosen": -54.96660232543945, + "logps/ref_chosen": -53.99859619140625, + "logps/ref_rejected": -93.53020477294922, + "logps/rejected": -104.44441223144531, + "loss": 0.8416, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.31263962388038635, + "margin_dpo/beta_margin_grad_std": 0.1735057830810547, + "margin_dpo/beta_margin_mean": 0.994620680809021, + "margin_dpo/loss_margin_mean": 9.946207046508789, + "margin_dpo/margin_mean": 9.946207046508789, + "margin_dpo/margin_std": 11.080026626586914, + "step": 64 + }, + { + "epoch": 0.09544787077826726, + "grad_norm": 54.08346939086914, + "learning_rate": 4.63768115942029e-07, + "logits/chosen": -0.6963008642196655, + "logits/rejected": -0.6837696433067322, + "logps/chosen": -68.06695556640625, + "logps/ref_chosen": -64.83599853515625, + "logps/ref_rejected": -109.94645690917969, + "logps/rejected": -123.10871124267578, + "loss": 0.8597, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.31020408868789673, + "margin_dpo/beta_margin_grad_std": 0.18927563726902008, + "margin_dpo/beta_margin_mean": 0.9931299686431885, + "margin_dpo/loss_margin_mean": 9.931299209594727, + "margin_dpo/margin_mean": 9.93129825592041, + "margin_dpo/margin_std": 11.138134002685547, + "step": 65 + }, + { + "epoch": 0.09691629955947137, + "grad_norm": 52.64336013793945, + "learning_rate": 4.7101449275362313e-07, + "logits/chosen": -0.6431201100349426, + "logits/rejected": -0.6103649139404297, + "logps/chosen": -54.33839797973633, + "logps/ref_chosen": -51.44352722167969, + "logps/ref_rejected": -75.63629150390625, + "logps/rejected": -87.60906219482422, + "loss": 0.8852, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.3300383687019348, + "margin_dpo/beta_margin_grad_std": 0.17383669316768646, + "margin_dpo/beta_margin_mean": 0.9077892303466797, + "margin_dpo/loss_margin_mean": 9.077892303466797, + "margin_dpo/margin_mean": 9.07789134979248, + "margin_dpo/margin_std": 11.045241355895996, + "step": 66 + }, + { + "epoch": 0.09838472834067548, + "grad_norm": 53.70967102050781, + "learning_rate": 4.782608695652174e-07, + "logits/chosen": -0.6037384271621704, + "logits/rejected": -0.56143718957901, + "logps/chosen": -61.83789825439453, + "logps/ref_chosen": -59.34080505371094, + "logps/ref_rejected": -72.78729248046875, + "logps/rejected": -84.55035400390625, + "loss": 0.8693, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.3258131444454193, + "margin_dpo/beta_margin_grad_std": 0.1767134666442871, + "margin_dpo/beta_margin_mean": 0.9265965223312378, + "margin_dpo/loss_margin_mean": 9.265965461730957, + "margin_dpo/margin_mean": 9.26596450805664, + "margin_dpo/margin_std": 10.946893692016602, + "step": 67 + }, + { + "epoch": 0.09985315712187959, + "grad_norm": 51.866180419921875, + "learning_rate": 4.855072463768116e-07, + "logits/chosen": -0.6399117708206177, + "logits/rejected": -0.5805681347846985, + "logps/chosen": -68.01475524902344, + "logps/ref_chosen": -65.2058334350586, + "logps/ref_rejected": -77.20724487304688, + "logps/rejected": -88.76637268066406, + "loss": 0.8436, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.3251006603240967, + "margin_dpo/beta_margin_grad_std": 0.1536335051059723, + "margin_dpo/beta_margin_mean": 0.875019907951355, + "margin_dpo/loss_margin_mean": 8.750198364257812, + "margin_dpo/margin_mean": 8.750198364257812, + "margin_dpo/margin_std": 8.96760082244873, + "step": 68 + }, + { + "epoch": 0.1013215859030837, + "grad_norm": 52.978328704833984, + "learning_rate": 4.927536231884058e-07, + "logits/chosen": -0.619906485080719, + "logits/rejected": -0.5960003137588501, + "logps/chosen": -63.03958511352539, + "logps/ref_chosen": -59.81924057006836, + "logps/ref_rejected": -103.38886260986328, + "logps/rejected": -117.06353759765625, + "loss": 0.7728, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.30147287249565125, + "margin_dpo/beta_margin_grad_std": 0.15879851579666138, + "margin_dpo/beta_margin_mean": 1.0454329252243042, + "margin_dpo/loss_margin_mean": 10.454328536987305, + "margin_dpo/margin_mean": 10.454329490661621, + "margin_dpo/margin_std": 10.263179779052734, + "step": 69 + }, + { + "epoch": 0.1027900146842878, + "grad_norm": 58.304927825927734, + "learning_rate": 5e-07, + "logits/chosen": -0.6229462623596191, + "logits/rejected": -0.5881924629211426, + "logps/chosen": -66.45687103271484, + "logps/ref_chosen": -61.930641174316406, + "logps/ref_rejected": -91.060791015625, + "logps/rejected": -106.82664489746094, + "loss": 0.7921, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2991790771484375, + "margin_dpo/beta_margin_grad_std": 0.1847190260887146, + "margin_dpo/beta_margin_mean": 1.123962163925171, + "margin_dpo/loss_margin_mean": 11.239620208740234, + "margin_dpo/margin_mean": 11.239620208740234, + "margin_dpo/margin_std": 11.950462341308594, + "step": 70 + }, + { + "epoch": 0.10425844346549193, + "grad_norm": 50.17360305786133, + "learning_rate": 4.999967061337492e-07, + "logits/chosen": -0.6788771152496338, + "logits/rejected": -0.6398866772651672, + "logps/chosen": -65.67703247070312, + "logps/ref_chosen": -61.750343322753906, + "logps/ref_rejected": -97.33662414550781, + "logps/rejected": -114.21321105957031, + "loss": 0.6993, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2723275125026703, + "margin_dpo/beta_margin_grad_std": 0.1644226610660553, + "margin_dpo/beta_margin_mean": 1.2949903011322021, + "margin_dpo/loss_margin_mean": 12.949902534484863, + "margin_dpo/margin_mean": 12.949902534484863, + "margin_dpo/margin_std": 12.493947982788086, + "step": 71 + }, + { + "epoch": 0.10572687224669604, + "grad_norm": 59.9242057800293, + "learning_rate": 4.999868246217933e-07, + "logits/chosen": -0.6571969985961914, + "logits/rejected": -0.6217666864395142, + "logps/chosen": -70.40309143066406, + "logps/ref_chosen": -66.05341339111328, + "logps/ref_rejected": -95.2869873046875, + "logps/rejected": -113.08145141601562, + "loss": 0.7306, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.26945406198501587, + "margin_dpo/beta_margin_grad_std": 0.1959770768880844, + "margin_dpo/beta_margin_mean": 1.3444783687591553, + "margin_dpo/loss_margin_mean": 13.444782257080078, + "margin_dpo/margin_mean": 13.444782257080078, + "margin_dpo/margin_std": 13.743330955505371, + "step": 72 + }, + { + "epoch": 0.10719530102790015, + "grad_norm": 75.93876647949219, + "learning_rate": 4.999703557245192e-07, + "logits/chosen": -0.6721267104148865, + "logits/rejected": -0.6297430992126465, + "logps/chosen": -72.05320739746094, + "logps/ref_chosen": -66.25627136230469, + "logps/ref_rejected": -90.45613861083984, + "logps/rejected": -109.47367858886719, + "loss": 0.9481, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.3085705637931824, + "margin_dpo/beta_margin_grad_std": 0.24887485802173615, + "margin_dpo/beta_margin_mean": 1.3220614194869995, + "margin_dpo/loss_margin_mean": 13.220613479614258, + "margin_dpo/margin_mean": 13.220613479614258, + "margin_dpo/margin_std": 18.805517196655273, + "step": 73 + }, + { + "epoch": 0.10866372980910426, + "grad_norm": 73.18781280517578, + "learning_rate": 4.999472998758977e-07, + "logits/chosen": -0.6080462336540222, + "logits/rejected": -0.5960662364959717, + "logps/chosen": -59.563087463378906, + "logps/ref_chosen": -53.42488098144531, + "logps/ref_rejected": -95.94693756103516, + "logps/rejected": -115.85839080810547, + "loss": 0.8756, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2891170084476471, + "margin_dpo/beta_margin_grad_std": 0.21858373284339905, + "margin_dpo/beta_margin_mean": 1.3773247003555298, + "margin_dpo/loss_margin_mean": 13.773246765136719, + "margin_dpo/margin_mean": 13.773246765136719, + "margin_dpo/margin_std": 20.172929763793945, + "step": 74 + }, + { + "epoch": 0.11013215859030837, + "grad_norm": 50.57677459716797, + "learning_rate": 4.999176576834721e-07, + "logits/chosen": -0.6782846450805664, + "logits/rejected": -0.6683961153030396, + "logps/chosen": -57.562652587890625, + "logps/ref_chosen": -51.861663818359375, + "logps/ref_rejected": -111.25397491455078, + "logps/rejected": -136.24032592773438, + "loss": 0.6095, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2280743420124054, + "margin_dpo/beta_margin_grad_std": 0.19971585273742676, + "margin_dpo/beta_margin_mean": 1.9285348653793335, + "margin_dpo/loss_margin_mean": 19.285348892211914, + "margin_dpo/margin_mean": 19.28534698486328, + "margin_dpo/margin_std": 18.741535186767578, + "step": 75 + }, + { + "epoch": 0.11160058737151249, + "grad_norm": 64.94268035888672, + "learning_rate": 4.998814299283415e-07, + "logits/chosen": -0.7133210301399231, + "logits/rejected": -0.6718661785125732, + "logps/chosen": -59.98701095581055, + "logps/ref_chosen": -53.26604080200195, + "logps/ref_rejected": -78.21662139892578, + "logps/rejected": -97.29232788085938, + "loss": 0.8158, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.28044480085372925, + "margin_dpo/beta_margin_grad_std": 0.20121756196022034, + "margin_dpo/beta_margin_mean": 1.235473871231079, + "margin_dpo/loss_margin_mean": 12.354738235473633, + "margin_dpo/margin_mean": 12.354738235473633, + "margin_dpo/margin_std": 14.27847671508789, + "step": 76 + }, + { + "epoch": 0.1130690161527166, + "grad_norm": 78.29557037353516, + "learning_rate": 4.998386175651409e-07, + "logits/chosen": -0.6657835245132446, + "logits/rejected": -0.623427152633667, + "logps/chosen": -63.632198333740234, + "logps/ref_chosen": -58.0966796875, + "logps/ref_rejected": -93.77361297607422, + "logps/rejected": -118.64299774169922, + "loss": 0.6806, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2214127629995346, + "margin_dpo/beta_margin_grad_std": 0.2203107476234436, + "margin_dpo/beta_margin_mean": 1.9333863258361816, + "margin_dpo/loss_margin_mean": 19.3338623046875, + "margin_dpo/margin_mean": 19.333864212036133, + "margin_dpo/margin_std": 19.132383346557617, + "step": 77 + }, + { + "epoch": 0.1145374449339207, + "grad_norm": 66.0775146484375, + "learning_rate": 4.997892217220159e-07, + "logits/chosen": -0.6555283069610596, + "logits/rejected": -0.6280935406684875, + "logps/chosen": -60.86896896362305, + "logps/ref_chosen": -55.61378479003906, + "logps/ref_rejected": -84.93436431884766, + "logps/rejected": -105.0614013671875, + "loss": 0.7256, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2643897533416748, + "margin_dpo/beta_margin_grad_std": 0.20472703874111176, + "margin_dpo/beta_margin_mean": 1.4871852397918701, + "margin_dpo/loss_margin_mean": 14.871850967407227, + "margin_dpo/margin_mean": 14.871851921081543, + "margin_dpo/margin_std": 15.568973541259766, + "step": 78 + }, + { + "epoch": 0.11600587371512482, + "grad_norm": 58.39738845825195, + "learning_rate": 4.997332437005931e-07, + "logits/chosen": -0.6440068483352661, + "logits/rejected": -0.6116843819618225, + "logps/chosen": -60.54931640625, + "logps/ref_chosen": -55.45048522949219, + "logps/ref_rejected": -87.64756774902344, + "logps/rejected": -108.8857192993164, + "loss": 0.777, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.27868783473968506, + "margin_dpo/beta_margin_grad_std": 0.22643005847930908, + "margin_dpo/beta_margin_mean": 1.6139320135116577, + "margin_dpo/loss_margin_mean": 16.139320373535156, + "margin_dpo/margin_mean": 16.139320373535156, + "margin_dpo/margin_std": 18.9587459564209, + "step": 79 + }, + { + "epoch": 0.11747430249632893, + "grad_norm": 62.89072036743164, + "learning_rate": 4.996706849759452e-07, + "logits/chosen": -0.7126628160476685, + "logits/rejected": -0.6654119491577148, + "logps/chosen": -65.43215942382812, + "logps/ref_chosen": -58.519290924072266, + "logps/ref_rejected": -87.54750061035156, + "logps/rejected": -108.79297637939453, + "loss": 0.8315, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.29197126626968384, + "margin_dpo/beta_margin_grad_std": 0.22850532829761505, + "margin_dpo/beta_margin_mean": 1.4332611560821533, + "margin_dpo/loss_margin_mean": 14.332611083984375, + "margin_dpo/margin_mean": 14.332611083984375, + "margin_dpo/margin_std": 17.499080657958984, + "step": 80 + }, + { + "epoch": 0.11894273127753303, + "grad_norm": 72.54817199707031, + "learning_rate": 4.996015471965529e-07, + "logits/chosen": -0.7246617674827576, + "logits/rejected": -0.6918442249298096, + "logps/chosen": -72.08871459960938, + "logps/ref_chosen": -66.44886779785156, + "logps/ref_rejected": -129.66270446777344, + "logps/rejected": -154.00892639160156, + "loss": 0.6947, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.24446864426136017, + "margin_dpo/beta_margin_grad_std": 0.22534911334514618, + "margin_dpo/beta_margin_mean": 1.8706377744674683, + "margin_dpo/loss_margin_mean": 18.706378936767578, + "margin_dpo/margin_mean": 18.706378936767578, + "margin_dpo/margin_std": 20.739093780517578, + "step": 81 + }, + { + "epoch": 0.12041116005873716, + "grad_norm": 88.24505615234375, + "learning_rate": 4.995258321842611e-07, + "logits/chosen": -0.6451495885848999, + "logits/rejected": -0.6281242370605469, + "logps/chosen": -59.3546142578125, + "logps/ref_chosen": -52.232383728027344, + "logps/ref_rejected": -90.74325561523438, + "logps/rejected": -113.15169525146484, + "loss": 0.9502, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.28069868683815, + "margin_dpo/beta_margin_grad_std": 0.24664762616157532, + "margin_dpo/beta_margin_mean": 1.5286219120025635, + "margin_dpo/loss_margin_mean": 15.286218643188477, + "margin_dpo/margin_mean": 15.286218643188477, + "margin_dpo/margin_std": 21.404075622558594, + "step": 82 + }, + { + "epoch": 0.12187958883994127, + "grad_norm": 70.73540496826172, + "learning_rate": 4.994435419342304e-07, + "logits/chosen": -0.6840830445289612, + "logits/rejected": -0.6387213468551636, + "logps/chosen": -62.84056854248047, + "logps/ref_chosen": -55.82738494873047, + "logps/ref_rejected": -103.71590423583984, + "logps/rejected": -127.5250015258789, + "loss": 0.7455, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.25954824686050415, + "margin_dpo/beta_margin_grad_std": 0.22985972464084625, + "margin_dpo/beta_margin_mean": 1.679591417312622, + "margin_dpo/loss_margin_mean": 16.795913696289062, + "margin_dpo/margin_mean": 16.795913696289062, + "margin_dpo/margin_std": 18.532222747802734, + "step": 83 + }, + { + "epoch": 0.12334801762114538, + "grad_norm": 58.62339401245117, + "learning_rate": 4.993546786148857e-07, + "logits/chosen": -0.6582399606704712, + "logits/rejected": -0.6212340593338013, + "logps/chosen": -72.36793518066406, + "logps/ref_chosen": -67.1761703491211, + "logps/ref_rejected": -87.29859924316406, + "logps/rejected": -107.6661605834961, + "loss": 0.6762, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.24515578150749207, + "margin_dpo/beta_margin_grad_std": 0.18955430388450623, + "margin_dpo/beta_margin_mean": 1.5175797939300537, + "margin_dpo/loss_margin_mean": 15.175796508789062, + "margin_dpo/margin_mean": 15.175797462463379, + "margin_dpo/margin_std": 13.974632263183594, + "step": 84 + }, + { + "epoch": 0.12481644640234948, + "grad_norm": 65.68191528320312, + "learning_rate": 4.992592445678582e-07, + "logits/chosen": -0.6145851016044617, + "logits/rejected": -0.5817907452583313, + "logps/chosen": -64.20103454589844, + "logps/ref_chosen": -58.406620025634766, + "logps/ref_rejected": -78.63880157470703, + "logps/rejected": -99.02200317382812, + "loss": 0.7679, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.27928727865219116, + "margin_dpo/beta_margin_grad_std": 0.21024659276008606, + "margin_dpo/beta_margin_mean": 1.458878993988037, + "margin_dpo/loss_margin_mean": 14.588789939880371, + "margin_dpo/margin_mean": 14.588790893554688, + "margin_dpo/margin_std": 15.866073608398438, + "step": 85 + }, + { + "epoch": 0.1262848751835536, + "grad_norm": 85.21753692626953, + "learning_rate": 4.991572423079235e-07, + "logits/chosen": -0.6728634238243103, + "logits/rejected": -0.6558930277824402, + "logps/chosen": -63.18061828613281, + "logps/ref_chosen": -56.13746643066406, + "logps/ref_rejected": -88.12165069580078, + "logps/rejected": -110.33665466308594, + "loss": 0.9179, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.29970669746398926, + "margin_dpo/beta_margin_grad_std": 0.24453628063201904, + "margin_dpo/beta_margin_mean": 1.5171852111816406, + "margin_dpo/loss_margin_mean": 15.17185115814209, + "margin_dpo/margin_mean": 15.171852111816406, + "margin_dpo/margin_std": 21.81802749633789, + "step": 86 + }, + { + "epoch": 0.1277533039647577, + "grad_norm": 66.58555603027344, + "learning_rate": 4.990486745229364e-07, + "logits/chosen": -0.7240000367164612, + "logits/rejected": -0.6876901984214783, + "logps/chosen": -62.49974060058594, + "logps/ref_chosen": -55.63609313964844, + "logps/ref_rejected": -95.46757507324219, + "logps/rejected": -118.70195007324219, + "loss": 0.7934, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2582399249076843, + "margin_dpo/beta_margin_grad_std": 0.22514671087265015, + "margin_dpo/beta_margin_mean": 1.6370728015899658, + "margin_dpo/loss_margin_mean": 16.3707275390625, + "margin_dpo/margin_mean": 16.3707275390625, + "margin_dpo/margin_std": 19.043777465820312, + "step": 87 + }, + { + "epoch": 0.12922173274596183, + "grad_norm": 75.37992095947266, + "learning_rate": 4.989335440737586e-07, + "logits/chosen": -0.6701527237892151, + "logits/rejected": -0.6537374258041382, + "logps/chosen": -82.11605072021484, + "logps/ref_chosen": -73.67115020751953, + "logps/ref_rejected": -106.70849609375, + "logps/rejected": -127.71624755859375, + "loss": 0.9174, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.30198073387145996, + "margin_dpo/beta_margin_grad_std": 0.23300787806510925, + "margin_dpo/beta_margin_mean": 1.2562841176986694, + "margin_dpo/loss_margin_mean": 12.562840461730957, + "margin_dpo/margin_mean": 12.562841415405273, + "margin_dpo/margin_std": 15.86634635925293, + "step": 88 + }, + { + "epoch": 0.13069016152716592, + "grad_norm": 54.054622650146484, + "learning_rate": 4.988118539941847e-07, + "logits/chosen": -0.7244502902030945, + "logits/rejected": -0.6862339973449707, + "logps/chosen": -65.05143737792969, + "logps/ref_chosen": -60.624916076660156, + "logps/ref_rejected": -82.08354949951172, + "logps/rejected": -99.46173095703125, + "loss": 0.7405, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.27611032128334045, + "margin_dpo/beta_margin_grad_std": 0.17834323644638062, + "margin_dpo/beta_margin_mean": 1.295165777206421, + "margin_dpo/loss_margin_mean": 12.951656341552734, + "margin_dpo/margin_mean": 12.95165729522705, + "margin_dpo/margin_std": 13.88388442993164, + "step": 89 + }, + { + "epoch": 0.13215859030837004, + "grad_norm": 66.94837188720703, + "learning_rate": 4.986836074908615e-07, + "logits/chosen": -0.6321258544921875, + "logits/rejected": -0.6237634420394897, + "logps/chosen": -59.42333221435547, + "logps/ref_chosen": -53.285308837890625, + "logps/ref_rejected": -111.54470825195312, + "logps/rejected": -133.43154907226562, + "loss": 0.8384, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.285112202167511, + "margin_dpo/beta_margin_grad_std": 0.226267009973526, + "margin_dpo/beta_margin_mean": 1.5748803615570068, + "margin_dpo/loss_margin_mean": 15.748802185058594, + "margin_dpo/margin_mean": 15.748802185058594, + "margin_dpo/margin_std": 20.31298065185547, + "step": 90 + }, + { + "epoch": 0.13362701908957417, + "grad_norm": 65.86888122558594, + "learning_rate": 4.985488079432037e-07, + "logits/chosen": -0.6855983734130859, + "logits/rejected": -0.6458035707473755, + "logps/chosen": -67.0127944946289, + "logps/ref_chosen": -61.80295944213867, + "logps/ref_rejected": -87.87395477294922, + "logps/rejected": -108.96083068847656, + "loss": 0.7585, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2720244526863098, + "margin_dpo/beta_margin_grad_std": 0.22684738039970398, + "margin_dpo/beta_margin_mean": 1.5877044200897217, + "margin_dpo/loss_margin_mean": 15.877042770385742, + "margin_dpo/margin_mean": 15.877042770385742, + "margin_dpo/margin_std": 17.475290298461914, + "step": 91 + }, + { + "epoch": 0.13509544787077826, + "grad_norm": 60.52584457397461, + "learning_rate": 4.984074589033043e-07, + "logits/chosen": -0.7122005224227905, + "logits/rejected": -0.6839097738265991, + "logps/chosen": -56.672237396240234, + "logps/ref_chosen": -51.640769958496094, + "logps/ref_rejected": -77.88117980957031, + "logps/rejected": -97.52497100830078, + "loss": 0.8096, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.28509509563446045, + "margin_dpo/beta_margin_grad_std": 0.22501453757286072, + "margin_dpo/beta_margin_mean": 1.4612317085266113, + "margin_dpo/loss_margin_mean": 14.61231803894043, + "margin_dpo/margin_mean": 14.61231803894043, + "margin_dpo/margin_std": 17.27523422241211, + "step": 92 + }, + { + "epoch": 0.13656387665198239, + "grad_norm": 47.008575439453125, + "learning_rate": 4.982595640958425e-07, + "logits/chosen": -0.7324954271316528, + "logits/rejected": -0.671492874622345, + "logps/chosen": -57.973655700683594, + "logps/ref_chosen": -52.529239654541016, + "logps/ref_rejected": -77.1607437133789, + "logps/rejected": -97.39739990234375, + "loss": 0.6889, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2600102424621582, + "margin_dpo/beta_margin_grad_std": 0.18848371505737305, + "margin_dpo/beta_margin_mean": 1.479223608970642, + "margin_dpo/loss_margin_mean": 14.792236328125, + "margin_dpo/margin_mean": 14.792236328125, + "margin_dpo/margin_std": 15.35598087310791, + "step": 93 + }, + { + "epoch": 0.13803230543318648, + "grad_norm": 51.4643669128418, + "learning_rate": 4.98105127417984e-07, + "logits/chosen": -0.6778910756111145, + "logits/rejected": -0.649002730846405, + "logps/chosen": -67.1570053100586, + "logps/ref_chosen": -61.22261047363281, + "logps/ref_rejected": -99.59902954101562, + "logps/rejected": -121.3552474975586, + "loss": 0.6464, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.24860258400440216, + "margin_dpo/beta_margin_grad_std": 0.19004377722740173, + "margin_dpo/beta_margin_mean": 1.5821821689605713, + "margin_dpo/loss_margin_mean": 15.821820259094238, + "margin_dpo/margin_mean": 15.821819305419922, + "margin_dpo/margin_std": 14.735492706298828, + "step": 94 + }, + { + "epoch": 0.1395007342143906, + "grad_norm": 50.75383758544922, + "learning_rate": 4.979441529392784e-07, + "logits/chosen": -0.6933159828186035, + "logits/rejected": -0.655129075050354, + "logps/chosen": -57.09678649902344, + "logps/ref_chosen": -52.52364730834961, + "logps/ref_rejected": -75.88035583496094, + "logps/rejected": -93.311767578125, + "loss": 0.7209, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2710234224796295, + "margin_dpo/beta_margin_grad_std": 0.1795656383037567, + "margin_dpo/beta_margin_mean": 1.2858270406723022, + "margin_dpo/loss_margin_mean": 12.858270645141602, + "margin_dpo/margin_mean": 12.858270645141602, + "margin_dpo/margin_std": 12.511711120605469, + "step": 95 + }, + { + "epoch": 0.14096916299559473, + "grad_norm": 50.98220443725586, + "learning_rate": 4.977766449015534e-07, + "logits/chosen": -0.6764161586761475, + "logits/rejected": -0.6342806816101074, + "logps/chosen": -65.936279296875, + "logps/ref_chosen": -62.15697479248047, + "logps/ref_rejected": -96.59601593017578, + "logps/rejected": -117.31523895263672, + "loss": 0.6236, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23587027192115784, + "margin_dpo/beta_margin_grad_std": 0.18438121676445007, + "margin_dpo/beta_margin_mean": 1.6939918994903564, + "margin_dpo/loss_margin_mean": 16.939918518066406, + "margin_dpo/margin_mean": 16.939918518066406, + "margin_dpo/margin_std": 16.764862060546875, + "step": 96 + }, + { + "epoch": 0.14243759177679882, + "grad_norm": 52.73670959472656, + "learning_rate": 4.976026077188012e-07, + "logits/chosen": -0.6401921510696411, + "logits/rejected": -0.5834782123565674, + "logps/chosen": -59.18102264404297, + "logps/ref_chosen": -54.64636993408203, + "logps/ref_rejected": -76.96475219726562, + "logps/rejected": -95.2552490234375, + "loss": 0.6774, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.25927746295928955, + "margin_dpo/beta_margin_grad_std": 0.17485031485557556, + "margin_dpo/beta_margin_mean": 1.375584363937378, + "margin_dpo/loss_margin_mean": 13.755844116210938, + "margin_dpo/margin_mean": 13.755844116210938, + "margin_dpo/margin_std": 12.07811164855957, + "step": 97 + }, + { + "epoch": 0.14390602055800295, + "grad_norm": 58.02984619140625, + "learning_rate": 4.974220459770639e-07, + "logits/chosen": -0.6708123683929443, + "logits/rejected": -0.6468954086303711, + "logps/chosen": -71.02387237548828, + "logps/ref_chosen": -65.25862884521484, + "logps/ref_rejected": -96.5274887084961, + "logps/rejected": -117.00706481933594, + "loss": 0.7512, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.25962063670158386, + "margin_dpo/beta_margin_grad_std": 0.2160511016845703, + "margin_dpo/beta_margin_mean": 1.4714339971542358, + "margin_dpo/loss_margin_mean": 14.714340209960938, + "margin_dpo/margin_mean": 14.714340209960938, + "margin_dpo/margin_std": 15.175495147705078, + "step": 98 + }, + { + "epoch": 0.14537444933920704, + "grad_norm": 48.38506317138672, + "learning_rate": 4.972349644343108e-07, + "logits/chosen": -0.6916057467460632, + "logits/rejected": -0.6791607737541199, + "logps/chosen": -50.54969787597656, + "logps/ref_chosen": -45.63848114013672, + "logps/ref_rejected": -86.43792724609375, + "logps/rejected": -107.18087768554688, + "loss": 0.6459, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2525924742221832, + "margin_dpo/beta_margin_grad_std": 0.17562821507453918, + "margin_dpo/beta_margin_mean": 1.5831732749938965, + "margin_dpo/loss_margin_mean": 15.831732749938965, + "margin_dpo/margin_mean": 15.831733703613281, + "margin_dpo/margin_std": 16.313186645507812, + "step": 99 + }, + { + "epoch": 0.14684287812041116, + "grad_norm": 67.92232513427734, + "learning_rate": 4.970413680203148e-07, + "logits/chosen": -0.6955288648605347, + "logits/rejected": -0.6533514857292175, + "logps/chosen": -62.669090270996094, + "logps/ref_chosen": -57.5939826965332, + "logps/ref_rejected": -74.06021118164062, + "logps/rejected": -90.62651062011719, + "loss": 0.9037, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.30934613943099976, + "margin_dpo/beta_margin_grad_std": 0.21438318490982056, + "margin_dpo/beta_margin_mean": 1.1491191387176514, + "margin_dpo/loss_margin_mean": 11.491190910339355, + "margin_dpo/margin_mean": 11.491189956665039, + "margin_dpo/margin_std": 15.003036499023438, + "step": 100 + }, + { + "epoch": 0.14684287812041116, + "eval_logits/chosen": -0.6628317832946777, + "eval_logits/rejected": -0.636573851108551, + "eval_logps/chosen": -87.1427230834961, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -103.3295669555664, + "eval_loss": 0.5592836737632751, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.36682140827178955, + "eval_margin_dpo/beta_margin_grad_std": 0.23032358288764954, + "eval_margin_dpo/beta_margin_mean": 0.8439961671829224, + "eval_margin_dpo/loss_margin_mean": 8.439962387084961, + "eval_margin_dpo/margin_mean": 8.439962387084961, + "eval_margin_dpo/margin_std": 15.342604637145996, + "eval_runtime": 39.9749, + "eval_samples_per_second": 58.512, + "eval_steps_per_second": 1.851, + "step": 100 + }, + { + "epoch": 0.14831130690161526, + "grad_norm": 54.49692153930664, + "learning_rate": 4.968412618365215e-07, + "logits/chosen": -0.7001588344573975, + "logits/rejected": -0.6611640453338623, + "logps/chosen": -67.25942993164062, + "logps/ref_chosen": -61.64884948730469, + "logps/ref_rejected": -83.18968963623047, + "logps/rejected": -102.17335510253906, + "loss": 0.7842, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.288244366645813, + "margin_dpo/beta_margin_grad_std": 0.20376016199588776, + "margin_dpo/beta_margin_mean": 1.3373081684112549, + "margin_dpo/loss_margin_mean": 13.373082160949707, + "margin_dpo/margin_mean": 13.37308120727539, + "margin_dpo/margin_std": 15.715073585510254, + "step": 101 + }, + { + "epoch": 0.14977973568281938, + "grad_norm": 69.74262237548828, + "learning_rate": 4.966346511559149e-07, + "logits/chosen": -0.7373714447021484, + "logits/rejected": -0.6927535533905029, + "logps/chosen": -70.96074676513672, + "logps/ref_chosen": -64.0788803100586, + "logps/ref_rejected": -68.18707275390625, + "logps/rejected": -85.39456176757812, + "loss": 0.9365, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.3263900876045227, + "margin_dpo/beta_margin_grad_std": 0.2226821929216385, + "margin_dpo/beta_margin_mean": 1.0325615406036377, + "margin_dpo/loss_margin_mean": 10.325615882873535, + "margin_dpo/margin_mean": 10.325615882873535, + "margin_dpo/margin_std": 14.067426681518555, + "step": 102 + }, + { + "epoch": 0.1512481644640235, + "grad_norm": 46.19940185546875, + "learning_rate": 4.964215414228785e-07, + "logits/chosen": -0.6903908252716064, + "logits/rejected": -0.6522761583328247, + "logps/chosen": -64.90095520019531, + "logps/ref_chosen": -61.299278259277344, + "logps/ref_rejected": -93.57271575927734, + "logps/rejected": -115.02561950683594, + "loss": 0.5573, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21845993399620056, + "margin_dpo/beta_margin_grad_std": 0.17656126618385315, + "margin_dpo/beta_margin_mean": 1.7851228713989258, + "margin_dpo/loss_margin_mean": 17.851226806640625, + "margin_dpo/margin_mean": 17.851226806640625, + "margin_dpo/margin_std": 15.277688026428223, + "step": 103 + }, + { + "epoch": 0.1527165932452276, + "grad_norm": 52.796669006347656, + "learning_rate": 4.96201938253052e-07, + "logits/chosen": -0.7016171813011169, + "logits/rejected": -0.6555418968200684, + "logps/chosen": -59.226173400878906, + "logps/ref_chosen": -54.37277603149414, + "logps/ref_rejected": -89.5647201538086, + "logps/rejected": -111.29109191894531, + "loss": 0.6771, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2526766061782837, + "margin_dpo/beta_margin_grad_std": 0.20172113180160522, + "margin_dpo/beta_margin_mean": 1.6872971057891846, + "margin_dpo/loss_margin_mean": 16.87297248840332, + "margin_dpo/margin_mean": 16.872970581054688, + "margin_dpo/margin_std": 17.26502227783203, + "step": 104 + }, + { + "epoch": 0.15418502202643172, + "grad_norm": 39.37166976928711, + "learning_rate": 4.959758474331832e-07, + "logits/chosen": -0.7300401926040649, + "logits/rejected": -0.6914358139038086, + "logps/chosen": -58.295875549316406, + "logps/ref_chosen": -54.638946533203125, + "logps/ref_rejected": -97.97351837158203, + "logps/rejected": -124.21298217773438, + "loss": 0.4219, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16911230981349945, + "margin_dpo/beta_margin_grad_std": 0.16213884949684143, + "margin_dpo/beta_margin_mean": 2.2582526206970215, + "margin_dpo/loss_margin_mean": 22.58252716064453, + "margin_dpo/margin_mean": 22.58252716064453, + "margin_dpo/margin_std": 16.65502166748047, + "step": 105 + }, + { + "epoch": 0.15565345080763582, + "grad_norm": 49.93497848510742, + "learning_rate": 4.957432749209755e-07, + "logits/chosen": -0.6783395409584045, + "logits/rejected": -0.6233980059623718, + "logps/chosen": -59.64507293701172, + "logps/ref_chosen": -54.83289337158203, + "logps/ref_rejected": -85.22461700439453, + "logps/rejected": -104.86808013916016, + "loss": 0.6785, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.25617170333862305, + "margin_dpo/beta_margin_grad_std": 0.19661831855773926, + "margin_dpo/beta_margin_mean": 1.483128547668457, + "margin_dpo/loss_margin_mean": 14.83128547668457, + "margin_dpo/margin_mean": 14.83128547668457, + "margin_dpo/margin_std": 14.39011001586914, + "step": 106 + }, + { + "epoch": 0.15712187958883994, + "grad_norm": 50.40999984741211, + "learning_rate": 4.955042268449307e-07, + "logits/chosen": -0.7118107676506042, + "logits/rejected": -0.6568803787231445, + "logps/chosen": -75.61688995361328, + "logps/ref_chosen": -69.70780944824219, + "logps/ref_rejected": -94.73950958251953, + "logps/rejected": -117.43498229980469, + "loss": 0.6451, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23818761110305786, + "margin_dpo/beta_margin_grad_std": 0.204330176115036, + "margin_dpo/beta_margin_mean": 1.6786396503448486, + "margin_dpo/loss_margin_mean": 16.786396026611328, + "margin_dpo/margin_mean": 16.786396026611328, + "margin_dpo/margin_std": 15.435192108154297, + "step": 107 + }, + { + "epoch": 0.15859030837004406, + "grad_norm": 58.914913177490234, + "learning_rate": 4.952587095041881e-07, + "logits/chosen": -0.7263258695602417, + "logits/rejected": -0.6777476668357849, + "logps/chosen": -61.758811950683594, + "logps/ref_chosen": -56.0098876953125, + "logps/ref_rejected": -95.79601287841797, + "logps/rejected": -118.6368408203125, + "loss": 0.7481, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.26686227321624756, + "margin_dpo/beta_margin_grad_std": 0.2259853482246399, + "margin_dpo/beta_margin_mean": 1.7091913223266602, + "margin_dpo/loss_margin_mean": 17.09191131591797, + "margin_dpo/margin_mean": 17.09191131591797, + "margin_dpo/margin_std": 19.03655433654785, + "step": 108 + }, + { + "epoch": 0.16005873715124816, + "grad_norm": 45.8420295715332, + "learning_rate": 4.95006729368358e-07, + "logits/chosen": -0.6235396862030029, + "logits/rejected": -0.5926010608673096, + "logps/chosen": -67.99076080322266, + "logps/ref_chosen": -62.88549041748047, + "logps/ref_rejected": -98.68573760986328, + "logps/rejected": -122.61830139160156, + "loss": 0.545, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2126576006412506, + "margin_dpo/beta_margin_grad_std": 0.18558171391487122, + "margin_dpo/beta_margin_mean": 1.8827290534973145, + "margin_dpo/loss_margin_mean": 18.827289581298828, + "margin_dpo/margin_mean": 18.82729148864746, + "margin_dpo/margin_std": 15.299284934997559, + "step": 109 + }, + { + "epoch": 0.16152716593245228, + "grad_norm": 50.39557647705078, + "learning_rate": 4.947482930773511e-07, + "logits/chosen": -0.6504217386245728, + "logits/rejected": -0.5953609347343445, + "logps/chosen": -63.09541320800781, + "logps/ref_chosen": -58.753684997558594, + "logps/ref_rejected": -79.75001525878906, + "logps/rejected": -101.90673828125, + "loss": 0.6773, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23589923977851868, + "margin_dpo/beta_margin_grad_std": 0.20386064052581787, + "margin_dpo/beta_margin_mean": 1.7814992666244507, + "margin_dpo/loss_margin_mean": 17.814992904663086, + "margin_dpo/margin_mean": 17.814992904663086, + "margin_dpo/margin_std": 18.05242919921875, + "step": 110 + }, + { + "epoch": 0.16299559471365638, + "grad_norm": 53.93953323364258, + "learning_rate": 4.944834074412042e-07, + "logits/chosen": -0.6995693445205688, + "logits/rejected": -0.6706931591033936, + "logps/chosen": -74.98273468017578, + "logps/ref_chosen": -68.62410736083984, + "logps/ref_rejected": -98.42886352539062, + "logps/rejected": -123.05096435546875, + "loss": 0.663, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23302116990089417, + "margin_dpo/beta_margin_grad_std": 0.2124572992324829, + "margin_dpo/beta_margin_mean": 1.826347827911377, + "margin_dpo/loss_margin_mean": 18.263477325439453, + "margin_dpo/margin_mean": 18.263477325439453, + "margin_dpo/margin_std": 17.97542953491211, + "step": 111 + }, + { + "epoch": 0.1644640234948605, + "grad_norm": 58.261051177978516, + "learning_rate": 4.942120794399002e-07, + "logits/chosen": -0.6929997205734253, + "logits/rejected": -0.6383606791496277, + "logps/chosen": -56.543792724609375, + "logps/ref_chosen": -50.24964141845703, + "logps/ref_rejected": -64.77442932128906, + "logps/rejected": -84.3316421508789, + "loss": 0.8086, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2888134717941284, + "margin_dpo/beta_margin_grad_std": 0.2119472473859787, + "margin_dpo/beta_margin_mean": 1.3263057470321655, + "margin_dpo/loss_margin_mean": 13.263057708740234, + "margin_dpo/margin_mean": 13.263057708740234, + "margin_dpo/margin_std": 15.071852684020996, + "step": 112 + }, + { + "epoch": 0.16593245227606462, + "grad_norm": 52.767189025878906, + "learning_rate": 4.939343162231841e-07, + "logits/chosen": -0.6641270518302917, + "logits/rejected": -0.6109206676483154, + "logps/chosen": -72.74588012695312, + "logps/ref_chosen": -66.71295166015625, + "logps/ref_rejected": -77.96870422363281, + "logps/rejected": -98.95388793945312, + "loss": 0.659, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.25107163190841675, + "margin_dpo/beta_margin_grad_std": 0.19331878423690796, + "margin_dpo/beta_margin_mean": 1.495226263999939, + "margin_dpo/loss_margin_mean": 14.952262878417969, + "margin_dpo/margin_mean": 14.952262878417969, + "margin_dpo/margin_std": 13.45613956451416, + "step": 113 + }, + { + "epoch": 0.16740088105726872, + "grad_norm": 48.202720642089844, + "learning_rate": 4.936501251103751e-07, + "logits/chosen": -0.6876777410507202, + "logits/rejected": -0.6400505304336548, + "logps/chosen": -63.42707824707031, + "logps/ref_chosen": -57.78507995605469, + "logps/ref_rejected": -87.10966491699219, + "logps/rejected": -112.90645599365234, + "loss": 0.5985, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.22679130733013153, + "margin_dpo/beta_margin_grad_std": 0.2024666965007782, + "margin_dpo/beta_margin_mean": 2.0154800415039062, + "margin_dpo/loss_margin_mean": 20.154800415039062, + "margin_dpo/margin_mean": 20.154800415039062, + "margin_dpo/margin_std": 20.26180076599121, + "step": 114 + }, + { + "epoch": 0.16886930983847284, + "grad_norm": 73.78862762451172, + "learning_rate": 4.933595135901732e-07, + "logits/chosen": -0.7042691111564636, + "logits/rejected": -0.6589173078536987, + "logps/chosen": -73.75160217285156, + "logps/ref_chosen": -65.5826416015625, + "logps/ref_rejected": -98.56552124023438, + "logps/rejected": -122.13522338867188, + "loss": 0.7918, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.27233636379241943, + "margin_dpo/beta_margin_grad_std": 0.21869003772735596, + "margin_dpo/beta_margin_mean": 1.5400748252868652, + "margin_dpo/loss_margin_mean": 15.400747299194336, + "margin_dpo/margin_mean": 15.400747299194336, + "margin_dpo/margin_std": 18.599172592163086, + "step": 115 + }, + { + "epoch": 0.17033773861967694, + "grad_norm": 47.02722930908203, + "learning_rate": 4.930624893204624e-07, + "logits/chosen": -0.7148517370223999, + "logits/rejected": -0.6804147958755493, + "logps/chosen": -57.35455322265625, + "logps/ref_chosen": -51.40031051635742, + "logps/ref_rejected": -80.5218505859375, + "logps/rejected": -101.68440246582031, + "loss": 0.6214, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.24444279074668884, + "margin_dpo/beta_margin_grad_std": 0.17096129059791565, + "margin_dpo/beta_margin_mean": 1.5208299160003662, + "margin_dpo/loss_margin_mean": 15.208297729492188, + "margin_dpo/margin_mean": 15.20829963684082, + "margin_dpo/margin_std": 13.959308624267578, + "step": 116 + }, + { + "epoch": 0.17180616740088106, + "grad_norm": 61.11030960083008, + "learning_rate": 4.927590601281083e-07, + "logits/chosen": -0.6608189344406128, + "logits/rejected": -0.6192047595977783, + "logps/chosen": -75.69219207763672, + "logps/ref_chosen": -69.29840850830078, + "logps/ref_rejected": -66.58399200439453, + "logps/rejected": -87.99634552001953, + "loss": 0.6968, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.26184725761413574, + "margin_dpo/beta_margin_grad_std": 0.19542691111564636, + "margin_dpo/beta_margin_mean": 1.501856803894043, + "margin_dpo/loss_margin_mean": 15.01856803894043, + "margin_dpo/margin_mean": 15.01856803894043, + "margin_dpo/margin_std": 15.984650611877441, + "step": 117 + }, + { + "epoch": 0.17327459618208516, + "grad_norm": 48.049564361572266, + "learning_rate": 4.924492340087524e-07, + "logits/chosen": -0.6910693645477295, + "logits/rejected": -0.6483018398284912, + "logps/chosen": -62.306884765625, + "logps/ref_chosen": -55.6409797668457, + "logps/ref_rejected": -75.66905212402344, + "logps/rejected": -96.35951232910156, + "loss": 0.6673, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2580760717391968, + "margin_dpo/beta_margin_grad_std": 0.17991520464420319, + "margin_dpo/beta_margin_mean": 1.4024548530578613, + "margin_dpo/loss_margin_mean": 14.024548530578613, + "margin_dpo/margin_mean": 14.024547576904297, + "margin_dpo/margin_std": 12.942065238952637, + "step": 118 + }, + { + "epoch": 0.17474302496328928, + "grad_norm": 57.746402740478516, + "learning_rate": 4.92133019126601e-07, + "logits/chosen": -0.6886883974075317, + "logits/rejected": -0.6644145250320435, + "logps/chosen": -80.7379379272461, + "logps/ref_chosen": -73.51019287109375, + "logps/ref_rejected": -102.97728729248047, + "logps/rejected": -125.08132934570312, + "loss": 0.7405, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2667841613292694, + "margin_dpo/beta_margin_grad_std": 0.21272984147071838, + "margin_dpo/beta_margin_mean": 1.4876298904418945, + "margin_dpo/loss_margin_mean": 14.876298904418945, + "margin_dpo/margin_mean": 14.876298904418945, + "margin_dpo/margin_std": 16.01374626159668, + "step": 119 + }, + { + "epoch": 0.1762114537444934, + "grad_norm": 52.1450080871582, + "learning_rate": 4.918104238142103e-07, + "logits/chosen": -0.7026511430740356, + "logits/rejected": -0.6608834266662598, + "logps/chosen": -84.82908630371094, + "logps/ref_chosen": -76.78083801269531, + "logps/ref_rejected": -108.02374267578125, + "logps/rejected": -134.74542236328125, + "loss": 0.6004, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.22407472133636475, + "margin_dpo/beta_margin_grad_std": 0.20310235023498535, + "margin_dpo/beta_margin_mean": 1.8673429489135742, + "margin_dpo/loss_margin_mean": 18.673429489135742, + "margin_dpo/margin_mean": 18.673429489135742, + "margin_dpo/margin_std": 17.22457504272461, + "step": 120 + }, + { + "epoch": 0.1776798825256975, + "grad_norm": 48.357093811035156, + "learning_rate": 4.91481456572267e-07, + "logits/chosen": -0.6549187898635864, + "logits/rejected": -0.6369335651397705, + "logps/chosen": -69.40840911865234, + "logps/ref_chosen": -61.789894104003906, + "logps/ref_rejected": -109.99456787109375, + "logps/rejected": -137.22264099121094, + "loss": 0.5936, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2275564819574356, + "margin_dpo/beta_margin_grad_std": 0.19740846753120422, + "margin_dpo/beta_margin_mean": 1.9609556198120117, + "margin_dpo/loss_margin_mean": 19.609556198120117, + "margin_dpo/margin_mean": 19.60955810546875, + "margin_dpo/margin_std": 18.554580688476562, + "step": 121 + }, + { + "epoch": 0.17914831130690162, + "grad_norm": 45.592864990234375, + "learning_rate": 4.911461260693638e-07, + "logits/chosen": -0.6858741044998169, + "logits/rejected": -0.6766628623008728, + "logps/chosen": -53.84559631347656, + "logps/ref_chosen": -46.90221405029297, + "logps/ref_rejected": -106.71418762207031, + "logps/rejected": -138.1607666015625, + "loss": 0.4337, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1684824675321579, + "margin_dpo/beta_margin_grad_std": 0.18330231308937073, + "margin_dpo/beta_margin_mean": 2.450319290161133, + "margin_dpo/loss_margin_mean": 24.503192901611328, + "margin_dpo/margin_mean": 24.503192901611328, + "margin_dpo/margin_std": 18.173328399658203, + "step": 122 + }, + { + "epoch": 0.18061674008810572, + "grad_norm": 66.03611755371094, + "learning_rate": 4.908044411417711e-07, + "logits/chosen": -0.6609284281730652, + "logits/rejected": -0.6286982297897339, + "logps/chosen": -68.30619812011719, + "logps/ref_chosen": -61.33863830566406, + "logps/ref_rejected": -87.77539825439453, + "logps/rejected": -111.76436614990234, + "loss": 0.7836, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2565336227416992, + "margin_dpo/beta_margin_grad_std": 0.23589524626731873, + "margin_dpo/beta_margin_mean": 1.7021416425704956, + "margin_dpo/loss_margin_mean": 17.02141571044922, + "margin_dpo/margin_mean": 17.02141571044922, + "margin_dpo/margin_std": 19.722978591918945, + "step": 123 + }, + { + "epoch": 0.18208516886930984, + "grad_norm": 62.63188934326172, + "learning_rate": 4.904564107932048e-07, + "logits/chosen": -0.664189338684082, + "logits/rejected": -0.6542805433273315, + "logps/chosen": -78.76295471191406, + "logps/ref_chosen": -71.44833374023438, + "logps/ref_rejected": -117.58056640625, + "logps/rejected": -146.3335723876953, + "loss": 0.6425, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.22931857407093048, + "margin_dpo/beta_margin_grad_std": 0.22194989025592804, + "margin_dpo/beta_margin_mean": 2.1438369750976562, + "margin_dpo/loss_margin_mean": 21.438369750976562, + "margin_dpo/margin_mean": 21.438369750976562, + "margin_dpo/margin_std": 23.726600646972656, + "step": 124 + }, + { + "epoch": 0.18355359765051396, + "grad_norm": 45.74631881713867, + "learning_rate": 4.90102044194588e-07, + "logits/chosen": -0.6452882289886475, + "logits/rejected": -0.6194664239883423, + "logps/chosen": -55.687599182128906, + "logps/ref_chosen": -50.136940002441406, + "logps/ref_rejected": -83.98861694335938, + "logps/rejected": -109.53338623046875, + "loss": 0.5347, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2084668129682541, + "margin_dpo/beta_margin_grad_std": 0.1783091127872467, + "margin_dpo/beta_margin_mean": 1.9994112253189087, + "margin_dpo/loss_margin_mean": 19.99411392211914, + "margin_dpo/margin_mean": 19.994110107421875, + "margin_dpo/margin_std": 17.385387420654297, + "step": 125 + }, + { + "epoch": 0.18502202643171806, + "grad_norm": 55.11186599731445, + "learning_rate": 4.897413506838102e-07, + "logits/chosen": -0.6568164825439453, + "logits/rejected": -0.6220812797546387, + "logps/chosen": -61.971824645996094, + "logps/ref_chosen": -55.66706848144531, + "logps/ref_rejected": -98.1297607421875, + "logps/rejected": -123.57440185546875, + "loss": 0.5552, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21338176727294922, + "margin_dpo/beta_margin_grad_std": 0.18551576137542725, + "margin_dpo/beta_margin_mean": 1.9139891862869263, + "margin_dpo/loss_margin_mean": 19.139890670776367, + "margin_dpo/margin_mean": 19.139890670776367, + "margin_dpo/margin_std": 16.969818115234375, + "step": 126 + }, + { + "epoch": 0.18649045521292218, + "grad_norm": 46.06990432739258, + "learning_rate": 4.89374339765481e-07, + "logits/chosen": -0.638472318649292, + "logits/rejected": -0.6041021347045898, + "logps/chosen": -61.95406723022461, + "logps/ref_chosen": -56.55467987060547, + "logps/ref_rejected": -76.7957763671875, + "logps/rejected": -98.31398010253906, + "loss": 0.6303, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2392818182706833, + "margin_dpo/beta_margin_grad_std": 0.19149260222911835, + "margin_dpo/beta_margin_mean": 1.6118814945220947, + "margin_dpo/loss_margin_mean": 16.118816375732422, + "margin_dpo/margin_mean": 16.118816375732422, + "margin_dpo/margin_std": 13.991384506225586, + "step": 127 + }, + { + "epoch": 0.18795888399412627, + "grad_norm": 51.177642822265625, + "learning_rate": 4.890010211106795e-07, + "logits/chosen": -0.663079023361206, + "logits/rejected": -0.616753876209259, + "logps/chosen": -63.87889862060547, + "logps/ref_chosen": -58.12095642089844, + "logps/ref_rejected": -76.43896484375, + "logps/rejected": -99.25593566894531, + "loss": 0.6751, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.25181353092193604, + "margin_dpo/beta_margin_grad_std": 0.20399567484855652, + "margin_dpo/beta_margin_mean": 1.7059035301208496, + "margin_dpo/loss_margin_mean": 17.05903434753418, + "margin_dpo/margin_mean": 17.05903434753418, + "margin_dpo/margin_std": 17.72481346130371, + "step": 128 + }, + { + "epoch": 0.1894273127753304, + "grad_norm": 72.67992401123047, + "learning_rate": 4.88621404556699e-07, + "logits/chosen": -0.6873067617416382, + "logits/rejected": -0.6568499803543091, + "logps/chosen": -75.6619644165039, + "logps/ref_chosen": -66.91636657714844, + "logps/ref_rejected": -96.6422119140625, + "logps/rejected": -122.64834594726562, + "loss": 0.7959, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.26328974962234497, + "margin_dpo/beta_margin_grad_std": 0.24150995910167694, + "margin_dpo/beta_margin_mean": 1.7260537147521973, + "margin_dpo/loss_margin_mean": 17.260536193847656, + "margin_dpo/margin_mean": 17.26053810119629, + "margin_dpo/margin_std": 20.107585906982422, + "step": 129 + }, + { + "epoch": 0.19089574155653452, + "grad_norm": 50.73147964477539, + "learning_rate": 4.882355001067891e-07, + "logits/chosen": -0.6596213579177856, + "logits/rejected": -0.6461096405982971, + "logps/chosen": -51.04236602783203, + "logps/ref_chosen": -44.666847229003906, + "logps/ref_rejected": -82.78165435791016, + "logps/rejected": -112.08168029785156, + "loss": 0.5939, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20150384306907654, + "margin_dpo/beta_margin_grad_std": 0.21593783795833588, + "margin_dpo/beta_margin_mean": 2.2924509048461914, + "margin_dpo/loss_margin_mean": 22.924509048461914, + "margin_dpo/margin_mean": 22.924509048461914, + "margin_dpo/margin_std": 19.672473907470703, + "step": 130 + }, + { + "epoch": 0.19236417033773862, + "grad_norm": 43.14263916015625, + "learning_rate": 4.878433179298909e-07, + "logits/chosen": -0.6646705269813538, + "logits/rejected": -0.6489601135253906, + "logps/chosen": -49.25099182128906, + "logps/ref_chosen": -44.92458724975586, + "logps/ref_rejected": -88.44401550292969, + "logps/rejected": -113.0731201171875, + "loss": 0.5387, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20338965952396393, + "margin_dpo/beta_margin_grad_std": 0.19119322299957275, + "margin_dpo/beta_margin_mean": 2.0302700996398926, + "margin_dpo/loss_margin_mean": 20.302701950073242, + "margin_dpo/margin_mean": 20.302701950073242, + "margin_dpo/margin_std": 17.324234008789062, + "step": 131 + }, + { + "epoch": 0.19383259911894274, + "grad_norm": 48.75657272338867, + "learning_rate": 4.874448683603694e-07, + "logits/chosen": -0.6894493699073792, + "logits/rejected": -0.6632376909255981, + "logps/chosen": -65.58708953857422, + "logps/ref_chosen": -59.00108337402344, + "logps/ref_rejected": -87.89215087890625, + "logps/rejected": -113.75344848632812, + "loss": 0.539, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2136635035276413, + "margin_dpo/beta_margin_grad_std": 0.17412179708480835, + "margin_dpo/beta_margin_mean": 1.927529215812683, + "margin_dpo/loss_margin_mean": 19.275291442871094, + "margin_dpo/margin_mean": 19.275293350219727, + "margin_dpo/margin_std": 17.327112197875977, + "step": 132 + }, + { + "epoch": 0.19530102790014683, + "grad_norm": 57.14876937866211, + "learning_rate": 4.870401618977415e-07, + "logits/chosen": -0.6868765354156494, + "logits/rejected": -0.6663703918457031, + "logps/chosen": -74.35096740722656, + "logps/ref_chosen": -66.60449981689453, + "logps/ref_rejected": -96.33355712890625, + "logps/rejected": -121.88394165039062, + "loss": 0.711, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2557414174079895, + "margin_dpo/beta_margin_grad_std": 0.22053731977939606, + "margin_dpo/beta_margin_mean": 1.780390977859497, + "margin_dpo/loss_margin_mean": 17.803909301757812, + "margin_dpo/margin_mean": 17.803909301757812, + "margin_dpo/margin_std": 19.555706024169922, + "step": 133 + }, + { + "epoch": 0.19676945668135096, + "grad_norm": 48.30363845825195, + "learning_rate": 4.866292092063986e-07, + "logits/chosen": -0.6847056150436401, + "logits/rejected": -0.6499172449111938, + "logps/chosen": -57.004554748535156, + "logps/ref_chosen": -52.06925582885742, + "logps/ref_rejected": -87.6545181274414, + "logps/rejected": -112.0121841430664, + "loss": 0.4899, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20193609595298767, + "margin_dpo/beta_margin_grad_std": 0.15019506216049194, + "margin_dpo/beta_margin_mean": 1.942237377166748, + "margin_dpo/loss_margin_mean": 19.422372817993164, + "margin_dpo/margin_mean": 19.42237091064453, + "margin_dpo/margin_std": 15.808595657348633, + "step": 134 + }, + { + "epoch": 0.19823788546255505, + "grad_norm": 56.833290100097656, + "learning_rate": 4.862120211153265e-07, + "logits/chosen": -0.6657185554504395, + "logits/rejected": -0.6646615862846375, + "logps/chosen": -58.18457794189453, + "logps/ref_chosen": -50.353858947753906, + "logps/ref_rejected": -115.97975158691406, + "logps/rejected": -144.62242126464844, + "loss": 0.5819, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21536532044410706, + "margin_dpo/beta_margin_grad_std": 0.20012225210666656, + "margin_dpo/beta_margin_mean": 2.0811963081359863, + "margin_dpo/loss_margin_mean": 20.811962127685547, + "margin_dpo/margin_mean": 20.811962127685547, + "margin_dpo/margin_std": 19.506851196289062, + "step": 135 + }, + { + "epoch": 0.19970631424375918, + "grad_norm": 60.99176788330078, + "learning_rate": 4.857886086178193e-07, + "logits/chosen": -0.6759936809539795, + "logits/rejected": -0.6444242596626282, + "logps/chosen": -73.07025146484375, + "logps/ref_chosen": -65.072509765625, + "logps/ref_rejected": -96.32122802734375, + "logps/rejected": -120.97657775878906, + "loss": 0.6585, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.252665251493454, + "margin_dpo/beta_margin_grad_std": 0.18508610129356384, + "margin_dpo/beta_margin_mean": 1.6657602787017822, + "margin_dpo/loss_margin_mean": 16.657604217529297, + "margin_dpo/margin_mean": 16.657604217529297, + "margin_dpo/margin_std": 18.530437469482422, + "step": 136 + }, + { + "epoch": 0.2011747430249633, + "grad_norm": 61.030094146728516, + "learning_rate": 4.853589828711902e-07, + "logits/chosen": -0.6564372181892395, + "logits/rejected": -0.6498109102249146, + "logps/chosen": -58.350120544433594, + "logps/ref_chosen": -48.759117126464844, + "logps/ref_rejected": -113.86377716064453, + "logps/rejected": -146.03843688964844, + "loss": 0.6181, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.218036487698555, + "margin_dpo/beta_margin_grad_std": 0.2112565040588379, + "margin_dpo/beta_margin_mean": 2.258366346359253, + "margin_dpo/loss_margin_mean": 22.583663940429688, + "margin_dpo/margin_mean": 22.583663940429688, + "margin_dpo/margin_std": 22.754310607910156, + "step": 137 + }, + { + "epoch": 0.2026431718061674, + "grad_norm": 70.91644287109375, + "learning_rate": 4.849231551964771e-07, + "logits/chosen": -0.6515681147575378, + "logits/rejected": -0.6243264675140381, + "logps/chosen": -69.78646850585938, + "logps/ref_chosen": -60.519649505615234, + "logps/ref_rejected": -93.19694519042969, + "logps/rejected": -121.13736724853516, + "loss": 0.6843, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2272961288690567, + "margin_dpo/beta_margin_grad_std": 0.22245639562606812, + "margin_dpo/beta_margin_mean": 1.8673601150512695, + "margin_dpo/loss_margin_mean": 18.673601150512695, + "margin_dpo/margin_mean": 18.673603057861328, + "margin_dpo/margin_std": 18.434284210205078, + "step": 138 + }, + { + "epoch": 0.20411160058737152, + "grad_norm": 50.293697357177734, + "learning_rate": 4.844811370781446e-07, + "logits/chosen": -0.6459161639213562, + "logits/rejected": -0.6150977611541748, + "logps/chosen": -53.843475341796875, + "logps/ref_chosen": -46.89138412475586, + "logps/ref_rejected": -79.72798156738281, + "logps/rejected": -107.27476501464844, + "loss": 0.548, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21206048130989075, + "margin_dpo/beta_margin_grad_std": 0.19148895144462585, + "margin_dpo/beta_margin_mean": 2.059469699859619, + "margin_dpo/loss_margin_mean": 20.594696044921875, + "margin_dpo/margin_mean": 20.594696044921875, + "margin_dpo/margin_std": 18.52047348022461, + "step": 139 + }, + { + "epoch": 0.2055800293685756, + "grad_norm": 53.57754898071289, + "learning_rate": 4.840329401637809e-07, + "logits/chosen": -0.6656177639961243, + "logits/rejected": -0.6380197405815125, + "logps/chosen": -66.53479766845703, + "logps/ref_chosen": -58.97471618652344, + "logps/ref_rejected": -83.28411102294922, + "logps/rejected": -110.21284484863281, + "loss": 0.676, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23967662453651428, + "margin_dpo/beta_margin_grad_std": 0.2281067818403244, + "margin_dpo/beta_margin_mean": 1.9368653297424316, + "margin_dpo/loss_margin_mean": 19.368654251098633, + "margin_dpo/margin_mean": 19.36865234375, + "margin_dpo/margin_std": 19.57999038696289, + "step": 140 + }, + { + "epoch": 0.20704845814977973, + "grad_norm": 61.85297393798828, + "learning_rate": 4.83578576263792e-07, + "logits/chosen": -0.6568824052810669, + "logits/rejected": -0.6319071650505066, + "logps/chosen": -81.36563110351562, + "logps/ref_chosen": -75.0756607055664, + "logps/ref_rejected": -98.1922607421875, + "logps/rejected": -123.78886413574219, + "loss": 0.6184, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21810100972652435, + "margin_dpo/beta_margin_grad_std": 0.2185245007276535, + "margin_dpo/beta_margin_mean": 1.9306633472442627, + "margin_dpo/loss_margin_mean": 19.30663299560547, + "margin_dpo/margin_mean": 19.30663299560547, + "margin_dpo/margin_std": 17.307022094726562, + "step": 141 + }, + { + "epoch": 0.20851688693098386, + "grad_norm": 72.42768096923828, + "learning_rate": 4.83118057351089e-07, + "logits/chosen": -0.6577416658401489, + "logits/rejected": -0.6425771117210388, + "logps/chosen": -67.6053695678711, + "logps/ref_chosen": -58.027931213378906, + "logps/ref_rejected": -94.58222198486328, + "logps/rejected": -124.15780639648438, + "loss": 0.7595, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.24029812216758728, + "margin_dpo/beta_margin_grad_std": 0.23216235637664795, + "margin_dpo/beta_margin_mean": 1.999813437461853, + "margin_dpo/loss_margin_mean": 19.99813461303711, + "margin_dpo/margin_mean": 19.99813461303711, + "margin_dpo/margin_std": 21.703876495361328, + "step": 142 + }, + { + "epoch": 0.20998531571218795, + "grad_norm": 73.55904388427734, + "learning_rate": 4.826513955607734e-07, + "logits/chosen": -0.6666814088821411, + "logits/rejected": -0.6278376579284668, + "logps/chosen": -66.27539825439453, + "logps/ref_chosen": -57.59645080566406, + "logps/ref_rejected": -78.99957275390625, + "logps/rejected": -103.03237915039062, + "loss": 0.8432, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.275440514087677, + "margin_dpo/beta_margin_grad_std": 0.24454638361930847, + "margin_dpo/beta_margin_mean": 1.535386323928833, + "margin_dpo/loss_margin_mean": 15.353862762451172, + "margin_dpo/margin_mean": 15.353862762451172, + "margin_dpo/margin_std": 18.465240478515625, + "step": 143 + }, + { + "epoch": 0.21145374449339208, + "grad_norm": 43.79653549194336, + "learning_rate": 4.821786031898176e-07, + "logits/chosen": -0.6671550869941711, + "logits/rejected": -0.6211960315704346, + "logps/chosen": -66.0051498413086, + "logps/ref_chosen": -59.90636444091797, + "logps/ref_rejected": -82.00025939941406, + "logps/rejected": -107.91677856445312, + "loss": 0.532, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20202696323394775, + "margin_dpo/beta_margin_grad_std": 0.19462409615516663, + "margin_dpo/beta_margin_mean": 1.9817728996276855, + "margin_dpo/loss_margin_mean": 19.817729949951172, + "margin_dpo/margin_mean": 19.817729949951172, + "margin_dpo/margin_std": 16.256122589111328, + "step": 144 + }, + { + "epoch": 0.21292217327459617, + "grad_norm": 47.29103469848633, + "learning_rate": 4.816996926967401e-07, + "logits/chosen": -0.6466660499572754, + "logits/rejected": -0.6030235290527344, + "logps/chosen": -64.20927429199219, + "logps/ref_chosen": -56.60066604614258, + "logps/ref_rejected": -77.86631774902344, + "logps/rejected": -105.54521179199219, + "loss": 0.5566, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2146737277507782, + "margin_dpo/beta_margin_grad_std": 0.18967363238334656, + "margin_dpo/beta_margin_mean": 2.0070290565490723, + "margin_dpo/loss_margin_mean": 20.07029151916504, + "margin_dpo/margin_mean": 20.070289611816406, + "margin_dpo/margin_std": 18.223201751708984, + "step": 145 + }, + { + "epoch": 0.2143906020558003, + "grad_norm": 70.13373565673828, + "learning_rate": 4.812146767012779e-07, + "logits/chosen": -0.680939793586731, + "logits/rejected": -0.6254955530166626, + "logps/chosen": -76.54412078857422, + "logps/ref_chosen": -66.00045776367188, + "logps/ref_rejected": -81.70278930664062, + "logps/rejected": -108.8254623413086, + "loss": 0.7131, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2562163472175598, + "margin_dpo/beta_margin_grad_std": 0.22194227576255798, + "margin_dpo/beta_margin_mean": 1.657900333404541, + "margin_dpo/loss_margin_mean": 16.579002380371094, + "margin_dpo/margin_mean": 16.579002380371094, + "margin_dpo/margin_std": 17.49138641357422, + "step": 146 + }, + { + "epoch": 0.21585903083700442, + "grad_norm": 57.6500244140625, + "learning_rate": 4.807235679840536e-07, + "logits/chosen": -0.6382741928100586, + "logits/rejected": -0.5938813090324402, + "logps/chosen": -61.75067138671875, + "logps/ref_chosen": -53.405487060546875, + "logps/ref_rejected": -71.39061737060547, + "logps/rejected": -100.0536880493164, + "loss": 0.5682, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21429036557674408, + "margin_dpo/beta_margin_grad_std": 0.19528846442699432, + "margin_dpo/beta_margin_mean": 2.0317890644073486, + "margin_dpo/loss_margin_mean": 20.317890167236328, + "margin_dpo/margin_mean": 20.317890167236328, + "margin_dpo/margin_std": 19.069602966308594, + "step": 147 + }, + { + "epoch": 0.2173274596182085, + "grad_norm": 50.89360809326172, + "learning_rate": 4.802263794862384e-07, + "logits/chosen": -0.6731536388397217, + "logits/rejected": -0.6413577795028687, + "logps/chosen": -71.63116455078125, + "logps/ref_chosen": -64.93708038330078, + "logps/ref_rejected": -103.09384155273438, + "logps/rejected": -125.92637634277344, + "loss": 0.6758, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.25298935174942017, + "margin_dpo/beta_margin_grad_std": 0.19248813390731812, + "margin_dpo/beta_margin_mean": 1.6138441562652588, + "margin_dpo/loss_margin_mean": 16.13844108581543, + "margin_dpo/margin_mean": 16.13844108581543, + "margin_dpo/margin_std": 15.57655143737793, + "step": 148 + }, + { + "epoch": 0.21879588839941264, + "grad_norm": 43.59981155395508, + "learning_rate": 4.797231243092118e-07, + "logits/chosen": -0.7026668787002563, + "logits/rejected": -0.6738122701644897, + "logps/chosen": -65.29115295410156, + "logps/ref_chosen": -58.47376251220703, + "logps/ref_rejected": -99.31474304199219, + "logps/rejected": -126.91746520996094, + "loss": 0.5037, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19538308680057526, + "margin_dpo/beta_margin_grad_std": 0.18354260921478271, + "margin_dpo/beta_margin_mean": 2.078533887863159, + "margin_dpo/loss_margin_mean": 20.78533935546875, + "margin_dpo/margin_mean": 20.78533935546875, + "margin_dpo/margin_std": 16.75721549987793, + "step": 149 + }, + { + "epoch": 0.22026431718061673, + "grad_norm": 55.560611724853516, + "learning_rate": 4.792138157142157e-07, + "logits/chosen": -0.6683057546615601, + "logits/rejected": -0.6466302871704102, + "logps/chosen": -52.58869934082031, + "logps/ref_chosen": -45.705810546875, + "logps/ref_rejected": -83.34759521484375, + "logps/rejected": -109.74415588378906, + "loss": 0.6181, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2259724736213684, + "margin_dpo/beta_margin_grad_std": 0.19235166907310486, + "margin_dpo/beta_margin_mean": 1.951366662979126, + "margin_dpo/loss_margin_mean": 19.5136661529541, + "margin_dpo/margin_mean": 19.5136661529541, + "margin_dpo/margin_std": 19.547821044921875, + "step": 150 + }, + { + "epoch": 0.22173274596182085, + "grad_norm": 53.323848724365234, + "learning_rate": 4.786984671220053e-07, + "logits/chosen": -0.6942344903945923, + "logits/rejected": -0.6533582210540771, + "logps/chosen": -78.0422592163086, + "logps/ref_chosen": -70.57083129882812, + "logps/ref_rejected": -100.46382141113281, + "logps/rejected": -129.36167907714844, + "loss": 0.5187, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19543182849884033, + "margin_dpo/beta_margin_grad_std": 0.19356155395507812, + "margin_dpo/beta_margin_mean": 2.142643690109253, + "margin_dpo/loss_margin_mean": 21.426437377929688, + "margin_dpo/margin_mean": 21.426435470581055, + "margin_dpo/margin_std": 18.174488067626953, + "step": 151 + }, + { + "epoch": 0.22320117474302498, + "grad_norm": 58.04679870605469, + "learning_rate": 4.78177092112495e-07, + "logits/chosen": -0.6981043815612793, + "logits/rejected": -0.6723449230194092, + "logps/chosen": -65.85396575927734, + "logps/ref_chosen": -60.164390563964844, + "logps/ref_rejected": -106.14045715332031, + "logps/rejected": -133.99301147460938, + "loss": 0.5073, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18981137871742249, + "margin_dpo/beta_margin_grad_std": 0.18638314306735992, + "margin_dpo/beta_margin_mean": 2.2162981033325195, + "margin_dpo/loss_margin_mean": 22.162979125976562, + "margin_dpo/margin_mean": 22.162979125976562, + "margin_dpo/margin_std": 18.572938919067383, + "step": 152 + }, + { + "epoch": 0.22466960352422907, + "grad_norm": 45.65426254272461, + "learning_rate": 4.776497044244016e-07, + "logits/chosen": -0.6792968511581421, + "logits/rejected": -0.6601795554161072, + "logps/chosen": -62.912200927734375, + "logps/ref_chosen": -56.315277099609375, + "logps/ref_rejected": -85.65583801269531, + "logps/rejected": -111.13700866699219, + "loss": 0.6482, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23670101165771484, + "margin_dpo/beta_margin_grad_std": 0.2141384482383728, + "margin_dpo/beta_margin_mean": 1.888425350189209, + "margin_dpo/loss_margin_mean": 18.884254455566406, + "margin_dpo/margin_mean": 18.884254455566406, + "margin_dpo/margin_std": 19.149822235107422, + "step": 153 + }, + { + "epoch": 0.2261380323054332, + "grad_norm": 70.82756805419922, + "learning_rate": 4.771163179548808e-07, + "logits/chosen": -0.6859003305435181, + "logits/rejected": -0.6607710123062134, + "logps/chosen": -71.26567077636719, + "logps/ref_chosen": -62.74256896972656, + "logps/ref_rejected": -104.24420166015625, + "logps/rejected": -131.44509887695312, + "loss": 0.7219, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2435159683227539, + "margin_dpo/beta_margin_grad_std": 0.2322094589471817, + "margin_dpo/beta_margin_mean": 1.8677783012390137, + "margin_dpo/loss_margin_mean": 18.677783966064453, + "margin_dpo/margin_mean": 18.677783966064453, + "margin_dpo/margin_std": 18.939533233642578, + "step": 154 + }, + { + "epoch": 0.2276064610866373, + "grad_norm": 54.700984954833984, + "learning_rate": 4.7657694675916247e-07, + "logits/chosen": -0.677458643913269, + "logits/rejected": -0.650254487991333, + "logps/chosen": -66.76405334472656, + "logps/ref_chosen": -60.65318298339844, + "logps/ref_rejected": -77.49220275878906, + "logps/rejected": -103.33089447021484, + "loss": 0.5788, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20778781175613403, + "margin_dpo/beta_margin_grad_std": 0.1974395513534546, + "margin_dpo/beta_margin_mean": 1.972782015800476, + "margin_dpo/loss_margin_mean": 19.727819442749023, + "margin_dpo/margin_mean": 19.72781753540039, + "margin_dpo/margin_std": 17.581571578979492, + "step": 155 + }, + { + "epoch": 0.2290748898678414, + "grad_norm": 88.04608154296875, + "learning_rate": 4.7603160505017893e-07, + "logits/chosen": -0.653121829032898, + "logits/rejected": -0.622460126876831, + "logps/chosen": -79.57420349121094, + "logps/ref_chosen": -69.49188232421875, + "logps/ref_rejected": -77.1692886352539, + "logps/rejected": -102.35071563720703, + "loss": 0.9578, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2811535894870758, + "margin_dpo/beta_margin_grad_std": 0.2631846070289612, + "margin_dpo/beta_margin_mean": 1.5099093914031982, + "margin_dpo/loss_margin_mean": 15.09909439086914, + "margin_dpo/margin_mean": 15.09909439086914, + "margin_dpo/margin_std": 20.09097671508789, + "step": 156 + }, + { + "epoch": 0.2305433186490455, + "grad_norm": 60.531410217285156, + "learning_rate": 4.7548030719819154e-07, + "logits/chosen": -0.7271685600280762, + "logits/rejected": -0.6973283290863037, + "logps/chosen": -71.76556396484375, + "logps/ref_chosen": -61.368438720703125, + "logps/ref_rejected": -107.64636993408203, + "logps/rejected": -139.60733032226562, + "loss": 0.5432, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20604455471038818, + "margin_dpo/beta_margin_grad_std": 0.1988464891910553, + "margin_dpo/beta_margin_mean": 2.1563832759857178, + "margin_dpo/loss_margin_mean": 21.563831329345703, + "margin_dpo/margin_mean": 21.563831329345703, + "margin_dpo/margin_std": 19.046764373779297, + "step": 157 + }, + { + "epoch": 0.23201174743024963, + "grad_norm": 46.93947219848633, + "learning_rate": 4.7492306773041136e-07, + "logits/chosen": -0.6574522256851196, + "logits/rejected": -0.6339297294616699, + "logps/chosen": -65.11067962646484, + "logps/ref_chosen": -57.61292266845703, + "logps/ref_rejected": -113.6946792602539, + "logps/rejected": -143.18002319335938, + "loss": 0.5655, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21865713596343994, + "margin_dpo/beta_margin_grad_std": 0.1967364400625229, + "margin_dpo/beta_margin_mean": 2.198759078979492, + "margin_dpo/loss_margin_mean": 21.987590789794922, + "margin_dpo/margin_mean": 21.987590789794922, + "margin_dpo/margin_std": 21.580215454101562, + "step": 158 + }, + { + "epoch": 0.23348017621145375, + "grad_norm": 52.955257415771484, + "learning_rate": 4.743599013306165e-07, + "logits/chosen": -0.6695908308029175, + "logits/rejected": -0.628494143486023, + "logps/chosen": -89.92948150634766, + "logps/ref_chosen": -81.56034088134766, + "logps/ref_rejected": -88.8987045288086, + "logps/rejected": -116.74146270751953, + "loss": 0.6625, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23920385539531708, + "margin_dpo/beta_margin_grad_std": 0.21742284297943115, + "margin_dpo/beta_margin_mean": 1.947361707687378, + "margin_dpo/loss_margin_mean": 19.473617553710938, + "margin_dpo/margin_mean": 19.473617553710938, + "margin_dpo/margin_std": 20.372608184814453, + "step": 159 + }, + { + "epoch": 0.23494860499265785, + "grad_norm": 58.06504440307617, + "learning_rate": 4.737908228387656e-07, + "logits/chosen": -0.6885409355163574, + "logits/rejected": -0.6468052864074707, + "logps/chosen": -74.30818939208984, + "logps/ref_chosen": -65.73088073730469, + "logps/ref_rejected": -97.21781921386719, + "logps/rejected": -125.66719818115234, + "loss": 0.6855, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.24562396109104156, + "margin_dpo/beta_margin_grad_std": 0.22771283984184265, + "margin_dpo/beta_margin_mean": 1.9872074127197266, + "margin_dpo/loss_margin_mean": 19.872074127197266, + "margin_dpo/margin_mean": 19.872072219848633, + "margin_dpo/margin_std": 21.958354949951172, + "step": 160 + }, + { + "epoch": 0.23641703377386197, + "grad_norm": 53.40824508666992, + "learning_rate": 4.7321584725060594e-07, + "logits/chosen": -0.6996691226959229, + "logits/rejected": -0.6711582541465759, + "logps/chosen": -60.577247619628906, + "logps/ref_chosen": -52.43647766113281, + "logps/ref_rejected": -83.43095397949219, + "logps/rejected": -111.64784240722656, + "loss": 0.6506, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23106649518013, + "margin_dpo/beta_margin_grad_std": 0.22650553286075592, + "margin_dpo/beta_margin_mean": 2.0076117515563965, + "margin_dpo/loss_margin_mean": 20.07611656188965, + "margin_dpo/margin_mean": 20.07611846923828, + "margin_dpo/margin_std": 19.994857788085938, + "step": 161 + }, + { + "epoch": 0.23788546255506607, + "grad_norm": 46.01493835449219, + "learning_rate": 4.7263498971727905e-07, + "logits/chosen": -0.6546447277069092, + "logits/rejected": -0.6173849105834961, + "logps/chosen": -70.60856628417969, + "logps/ref_chosen": -62.61058807373047, + "logps/ref_rejected": -89.39057922363281, + "logps/rejected": -116.71580505371094, + "loss": 0.6419, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2332153171300888, + "margin_dpo/beta_margin_grad_std": 0.20986613631248474, + "margin_dpo/beta_margin_mean": 1.932724952697754, + "margin_dpo/loss_margin_mean": 19.32724952697754, + "margin_dpo/margin_mean": 19.32724952697754, + "margin_dpo/margin_std": 19.705549240112305, + "step": 162 + }, + { + "epoch": 0.2393538913362702, + "grad_norm": 42.86497116088867, + "learning_rate": 4.720482655449212e-07, + "logits/chosen": -0.65444415807724, + "logits/rejected": -0.6160274744033813, + "logps/chosen": -62.261444091796875, + "logps/ref_chosen": -55.021629333496094, + "logps/ref_rejected": -75.41822052001953, + "logps/rejected": -101.32667541503906, + "loss": 0.5951, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21980655193328857, + "margin_dpo/beta_margin_grad_std": 0.20289334654808044, + "margin_dpo/beta_margin_mean": 1.8668642044067383, + "margin_dpo/loss_margin_mean": 18.668642044067383, + "margin_dpo/margin_mean": 18.668642044067383, + "margin_dpo/margin_std": 16.874954223632812, + "step": 163 + }, + { + "epoch": 0.24082232011747431, + "grad_norm": 37.281150817871094, + "learning_rate": 4.714556901942599e-07, + "logits/chosen": -0.6877849102020264, + "logits/rejected": -0.6435602903366089, + "logps/chosen": -61.4527702331543, + "logps/ref_chosen": -55.64066696166992, + "logps/ref_rejected": -79.66463470458984, + "logps/rejected": -106.46293640136719, + "loss": 0.464, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18166950345039368, + "margin_dpo/beta_margin_grad_std": 0.16736756265163422, + "margin_dpo/beta_margin_mean": 2.0986199378967285, + "margin_dpo/loss_margin_mean": 20.98619842529297, + "margin_dpo/margin_mean": 20.98619842529297, + "margin_dpo/margin_std": 15.089117050170898, + "step": 164 + }, + { + "epoch": 0.2422907488986784, + "grad_norm": 65.39411163330078, + "learning_rate": 4.708572792802069e-07, + "logits/chosen": -0.7008275985717773, + "logits/rejected": -0.6510541439056396, + "logps/chosen": -69.8692398071289, + "logps/ref_chosen": -61.310691833496094, + "logps/ref_rejected": -73.67060852050781, + "logps/rejected": -96.39982604980469, + "loss": 0.7393, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.26950976252555847, + "margin_dpo/beta_margin_grad_std": 0.200277179479599, + "margin_dpo/beta_margin_mean": 1.4170664548873901, + "margin_dpo/loss_margin_mean": 14.170663833618164, + "margin_dpo/margin_mean": 14.17066478729248, + "margin_dpo/margin_std": 14.783781051635742, + "step": 165 + }, + { + "epoch": 0.24375917767988253, + "grad_norm": 48.58378982543945, + "learning_rate": 4.702530485714461e-07, + "logits/chosen": -0.6584955453872681, + "logits/rejected": -0.650241494178772, + "logps/chosen": -59.29880905151367, + "logps/ref_chosen": -50.98360061645508, + "logps/ref_rejected": -98.09512329101562, + "logps/rejected": -129.55075073242188, + "loss": 0.5363, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2068173885345459, + "margin_dpo/beta_margin_grad_std": 0.18966558575630188, + "margin_dpo/beta_margin_mean": 2.3140416145324707, + "margin_dpo/loss_margin_mean": 23.14041519165039, + "margin_dpo/margin_mean": 23.14041519165039, + "margin_dpo/margin_std": 23.364418029785156, + "step": 166 + }, + { + "epoch": 0.24522760646108663, + "grad_norm": 52.58329391479492, + "learning_rate": 4.6964301399001877e-07, + "logits/chosen": -0.6375908851623535, + "logits/rejected": -0.6227909922599792, + "logps/chosen": -58.83246612548828, + "logps/ref_chosen": -50.42409133911133, + "logps/ref_rejected": -96.03042602539062, + "logps/rejected": -128.48484802246094, + "loss": 0.4969, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17307597398757935, + "margin_dpo/beta_margin_grad_std": 0.1926582306623459, + "margin_dpo/beta_margin_mean": 2.404604911804199, + "margin_dpo/loss_margin_mean": 24.046049118041992, + "margin_dpo/margin_mean": 24.04604721069336, + "margin_dpo/margin_std": 18.615556716918945, + "step": 167 + }, + { + "epoch": 0.24669603524229075, + "grad_norm": 46.499046325683594, + "learning_rate": 4.690271916109034e-07, + "logits/chosen": -0.7022169232368469, + "logits/rejected": -0.6692053079605103, + "logps/chosen": -57.07928466796875, + "logps/ref_chosen": -49.46282196044922, + "logps/ref_rejected": -75.30854797363281, + "logps/rejected": -101.93223571777344, + "loss": 0.5398, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20751315355300903, + "margin_dpo/beta_margin_grad_std": 0.18526721000671387, + "margin_dpo/beta_margin_mean": 1.9007223844528198, + "margin_dpo/loss_margin_mean": 19.00722312927246, + "margin_dpo/margin_mean": 19.007225036621094, + "margin_dpo/margin_std": 15.336655616760254, + "step": 168 + }, + { + "epoch": 0.24816446402349487, + "grad_norm": 55.35097122192383, + "learning_rate": 4.6840559766159235e-07, + "logits/chosen": -0.6735790967941284, + "logits/rejected": -0.644471287727356, + "logps/chosen": -67.24217224121094, + "logps/ref_chosen": -59.803443908691406, + "logps/ref_rejected": -83.34574890136719, + "logps/rejected": -108.11927795410156, + "loss": 0.7689, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2712000906467438, + "margin_dpo/beta_margin_grad_std": 0.2246740311384201, + "margin_dpo/beta_margin_mean": 1.73348069190979, + "margin_dpo/loss_margin_mean": 17.334806442260742, + "margin_dpo/margin_mean": 17.334806442260742, + "margin_dpo/margin_std": 21.838268280029297, + "step": 169 + }, + { + "epoch": 0.24963289280469897, + "grad_norm": 44.05381774902344, + "learning_rate": 4.6777824852166437e-07, + "logits/chosen": -0.6359131336212158, + "logits/rejected": -0.6122620105743408, + "logps/chosen": -55.612205505371094, + "logps/ref_chosen": -49.471771240234375, + "logps/ref_rejected": -75.91734313964844, + "logps/rejected": -103.55111694335938, + "loss": 0.5703, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2044505774974823, + "margin_dpo/beta_margin_grad_std": 0.19848260283470154, + "margin_dpo/beta_margin_mean": 2.149333953857422, + "margin_dpo/loss_margin_mean": 21.49333953857422, + "margin_dpo/margin_mean": 21.49333953857422, + "margin_dpo/margin_std": 18.57598876953125, + "step": 170 + }, + { + "epoch": 0.2511013215859031, + "grad_norm": 62.48033142089844, + "learning_rate": 4.6714516072235273e-07, + "logits/chosen": -0.6769453883171082, + "logits/rejected": -0.6254656314849854, + "logps/chosen": -92.27412414550781, + "logps/ref_chosen": -84.49931335449219, + "logps/ref_rejected": -109.38209533691406, + "logps/rejected": -135.86390686035156, + "loss": 0.6447, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23198726773262024, + "margin_dpo/beta_margin_grad_std": 0.21067272126674652, + "margin_dpo/beta_margin_mean": 1.8707005977630615, + "margin_dpo/loss_margin_mean": 18.70700454711914, + "margin_dpo/margin_mean": 18.70700454711914, + "margin_dpo/margin_std": 19.6763916015625, + "step": 171 + }, + { + "epoch": 0.2525697503671072, + "grad_norm": 65.7638168334961, + "learning_rate": 4.6650635094610966e-07, + "logits/chosen": -0.7173855304718018, + "logits/rejected": -0.6745563745498657, + "logps/chosen": -78.89846801757812, + "logps/ref_chosen": -68.65391540527344, + "logps/ref_rejected": -85.43667602539062, + "logps/rejected": -113.78555297851562, + "loss": 0.7006, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23992997407913208, + "margin_dpo/beta_margin_grad_std": 0.2309492528438568, + "margin_dpo/beta_margin_mean": 1.8104331493377686, + "margin_dpo/loss_margin_mean": 18.104331970214844, + "margin_dpo/margin_mean": 18.104331970214844, + "margin_dpo/margin_std": 18.842220306396484, + "step": 172 + }, + { + "epoch": 0.2540381791483113, + "grad_norm": 49.16233444213867, + "learning_rate": 4.6586183602616687e-07, + "logits/chosen": -0.7287572026252747, + "logits/rejected": -0.673369288444519, + "logps/chosen": -70.7244873046875, + "logps/ref_chosen": -63.050872802734375, + "logps/ref_rejected": -78.68392944335938, + "logps/rejected": -104.60391998291016, + "loss": 0.6261, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.22973671555519104, + "margin_dpo/beta_margin_grad_std": 0.2065761685371399, + "margin_dpo/beta_margin_mean": 1.824638843536377, + "margin_dpo/loss_margin_mean": 18.246387481689453, + "margin_dpo/margin_mean": 18.246387481689453, + "margin_dpo/margin_std": 17.423324584960938, + "step": 173 + }, + { + "epoch": 0.2555066079295154, + "grad_norm": 53.320858001708984, + "learning_rate": 4.652116329460919e-07, + "logits/chosen": -0.6692589521408081, + "logits/rejected": -0.669571042060852, + "logps/chosen": -61.95579528808594, + "logps/ref_chosen": -53.36296844482422, + "logps/ref_rejected": -101.91120910644531, + "logps/rejected": -128.92747497558594, + "loss": 0.6537, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2317759096622467, + "margin_dpo/beta_margin_grad_std": 0.21807681024074554, + "margin_dpo/beta_margin_mean": 1.842343807220459, + "margin_dpo/loss_margin_mean": 18.423437118530273, + "margin_dpo/margin_mean": 18.423437118530273, + "margin_dpo/margin_std": 17.788700103759766, + "step": 174 + }, + { + "epoch": 0.25697503671071953, + "grad_norm": 47.415931701660156, + "learning_rate": 4.645557588393406e-07, + "logits/chosen": -0.6318604946136475, + "logits/rejected": -0.6075109243392944, + "logps/chosen": -52.48356628417969, + "logps/ref_chosen": -45.417762756347656, + "logps/ref_rejected": -89.50579833984375, + "logps/rejected": -120.28120422363281, + "loss": 0.4447, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1811106652021408, + "margin_dpo/beta_margin_grad_std": 0.15868915617465973, + "margin_dpo/beta_margin_mean": 2.370960235595703, + "margin_dpo/loss_margin_mean": 23.7096004486084, + "margin_dpo/margin_mean": 23.7096004486084, + "margin_dpo/margin_std": 19.452625274658203, + "step": 175 + }, + { + "epoch": 0.25844346549192365, + "grad_norm": 43.29652786254883, + "learning_rate": 4.638942309888058e-07, + "logits/chosen": -0.7066097259521484, + "logits/rejected": -0.6949463486671448, + "logps/chosen": -57.918304443359375, + "logps/ref_chosen": -50.45283889770508, + "logps/ref_rejected": -95.55896759033203, + "logps/rejected": -124.98609924316406, + "loss": 0.4807, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18598869442939758, + "margin_dpo/beta_margin_grad_std": 0.1866185963153839, + "margin_dpo/beta_margin_mean": 2.196166515350342, + "margin_dpo/loss_margin_mean": 21.96166229248047, + "margin_dpo/margin_mean": 21.9616641998291, + "margin_dpo/margin_std": 17.302711486816406, + "step": 176 + }, + { + "epoch": 0.2599118942731278, + "grad_norm": 41.06880187988281, + "learning_rate": 4.6322706682636137e-07, + "logits/chosen": -0.6657878160476685, + "logits/rejected": -0.6222826242446899, + "logps/chosen": -70.47785186767578, + "logps/ref_chosen": -61.21646499633789, + "logps/ref_rejected": -95.89378356933594, + "logps/rejected": -127.02268981933594, + "loss": 0.5421, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2077367603778839, + "margin_dpo/beta_margin_grad_std": 0.19527243077754974, + "margin_dpo/beta_margin_mean": 2.1867523193359375, + "margin_dpo/loss_margin_mean": 21.867523193359375, + "margin_dpo/margin_mean": 21.867523193359375, + "margin_dpo/margin_std": 20.43021011352539, + "step": 177 + }, + { + "epoch": 0.26138032305433184, + "grad_norm": 56.99783706665039, + "learning_rate": 4.6255428393240354e-07, + "logits/chosen": -0.6359624862670898, + "logits/rejected": -0.6415029764175415, + "logps/chosen": -70.54104614257812, + "logps/ref_chosen": -58.26478958129883, + "logps/ref_rejected": -105.36532592773438, + "logps/rejected": -141.9470672607422, + "loss": 0.5463, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1983056217432022, + "margin_dpo/beta_margin_grad_std": 0.21389643847942352, + "margin_dpo/beta_margin_mean": 2.4305472373962402, + "margin_dpo/loss_margin_mean": 24.30547332763672, + "margin_dpo/margin_mean": 24.30547332763672, + "margin_dpo/margin_std": 22.445331573486328, + "step": 178 + }, + { + "epoch": 0.26284875183553597, + "grad_norm": 71.98530578613281, + "learning_rate": 4.6187590003538724e-07, + "logits/chosen": -0.6545614004135132, + "logits/rejected": -0.629997730255127, + "logps/chosen": -72.10971069335938, + "logps/ref_chosen": -61.05832290649414, + "logps/ref_rejected": -90.52782440185547, + "logps/rejected": -126.18659973144531, + "loss": 0.672, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21123456954956055, + "margin_dpo/beta_margin_grad_std": 0.24784591794013977, + "margin_dpo/beta_margin_mean": 2.460737705230713, + "margin_dpo/loss_margin_mean": 24.607376098632812, + "margin_dpo/margin_mean": 24.607376098632812, + "margin_dpo/margin_std": 23.464365005493164, + "step": 179 + }, + { + "epoch": 0.2643171806167401, + "grad_norm": 45.19013214111328, + "learning_rate": 4.611919330113591e-07, + "logits/chosen": -0.646220862865448, + "logits/rejected": -0.6155471801757812, + "logps/chosen": -62.99604034423828, + "logps/ref_chosen": -54.34272003173828, + "logps/ref_rejected": -98.21183776855469, + "logps/rejected": -131.40283203125, + "loss": 0.4572, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17428556084632874, + "margin_dpo/beta_margin_grad_std": 0.19251887500286102, + "margin_dpo/beta_margin_mean": 2.453767776489258, + "margin_dpo/loss_margin_mean": 24.53767967224121, + "margin_dpo/margin_mean": 24.53767967224121, + "margin_dpo/margin_std": 19.19011688232422, + "step": 180 + }, + { + "epoch": 0.2657856093979442, + "grad_norm": 53.57444381713867, + "learning_rate": 4.605024008834863e-07, + "logits/chosen": -0.684493899345398, + "logits/rejected": -0.6450868844985962, + "logps/chosen": -63.24270248413086, + "logps/ref_chosen": -55.000457763671875, + "logps/ref_rejected": -61.656166076660156, + "logps/rejected": -87.48631286621094, + "loss": 0.6668, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.24293041229248047, + "margin_dpo/beta_margin_grad_std": 0.21182817220687866, + "margin_dpo/beta_margin_mean": 1.7587906122207642, + "margin_dpo/loss_margin_mean": 17.587905883789062, + "margin_dpo/margin_mean": 17.587905883789062, + "margin_dpo/margin_std": 18.199848175048828, + "step": 181 + }, + { + "epoch": 0.26725403817914833, + "grad_norm": 58.0034065246582, + "learning_rate": 4.598073218215817e-07, + "logits/chosen": -0.6252259016036987, + "logits/rejected": -0.6054234504699707, + "logps/chosen": -50.210166931152344, + "logps/ref_chosen": -41.10784912109375, + "logps/ref_rejected": -89.5215835571289, + "logps/rejected": -125.56619262695312, + "loss": 0.5722, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18403324484825134, + "margin_dpo/beta_margin_grad_std": 0.22924652695655823, + "margin_dpo/beta_margin_mean": 2.6942296028137207, + "margin_dpo/loss_margin_mean": 26.94229507446289, + "margin_dpo/margin_mean": 26.942296981811523, + "margin_dpo/margin_std": 23.49362564086914, + "step": 182 + }, + { + "epoch": 0.2687224669603524, + "grad_norm": 90.1989974975586, + "learning_rate": 4.5910671414162484e-07, + "logits/chosen": -0.664011538028717, + "logits/rejected": -0.612758994102478, + "logps/chosen": -69.1800765991211, + "logps/ref_chosen": -57.524559020996094, + "logps/ref_rejected": -75.97572326660156, + "logps/rejected": -108.55187225341797, + "loss": 0.5191, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2063201367855072, + "margin_dpo/beta_margin_grad_std": 0.16726571321487427, + "margin_dpo/beta_margin_mean": 2.0920636653900146, + "margin_dpo/loss_margin_mean": 20.920635223388672, + "margin_dpo/margin_mean": 20.920637130737305, + "margin_dpo/margin_std": 16.628860473632812, + "step": 183 + }, + { + "epoch": 0.2701908957415565, + "grad_norm": 63.455596923828125, + "learning_rate": 4.5840059630527985e-07, + "logits/chosen": -0.6686521768569946, + "logits/rejected": -0.658584713935852, + "logps/chosen": -67.59618377685547, + "logps/ref_chosen": -58.544952392578125, + "logps/ref_rejected": -76.63406372070312, + "logps/rejected": -101.26787567138672, + "loss": 0.7089, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2659500241279602, + "margin_dpo/beta_margin_grad_std": 0.19632551074028015, + "margin_dpo/beta_margin_mean": 1.5582584142684937, + "margin_dpo/loss_margin_mean": 15.582584381103516, + "margin_dpo/margin_mean": 15.582584381103516, + "margin_dpo/margin_std": 17.532541275024414, + "step": 184 + }, + { + "epoch": 0.27165932452276065, + "grad_norm": 59.450496673583984, + "learning_rate": 4.5768898691940836e-07, + "logits/chosen": -0.6913318634033203, + "logits/rejected": -0.6381030678749084, + "logps/chosen": -71.63302612304688, + "logps/ref_chosen": -62.02584457397461, + "logps/ref_rejected": -73.76260375976562, + "logps/rejected": -98.8121337890625, + "loss": 0.8254, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2810859680175781, + "margin_dpo/beta_margin_grad_std": 0.23385247588157654, + "margin_dpo/beta_margin_mean": 1.5442359447479248, + "margin_dpo/loss_margin_mean": 15.442358016967773, + "margin_dpo/margin_mean": 15.442358016967773, + "margin_dpo/margin_std": 18.860673904418945, + "step": 185 + }, + { + "epoch": 0.27312775330396477, + "grad_norm": 42.19928741455078, + "learning_rate": 4.5697190473557947e-07, + "logits/chosen": -0.6911958456039429, + "logits/rejected": -0.635596752166748, + "logps/chosen": -79.12220764160156, + "logps/ref_chosen": -69.35346984863281, + "logps/ref_rejected": -88.07244873046875, + "logps/rejected": -123.28886413574219, + "loss": 0.3942, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15830203890800476, + "margin_dpo/beta_margin_grad_std": 0.16447928547859192, + "margin_dpo/beta_margin_mean": 2.544766902923584, + "margin_dpo/loss_margin_mean": 25.447669982910156, + "margin_dpo/margin_mean": 25.447669982910156, + "margin_dpo/margin_std": 18.903629302978516, + "step": 186 + }, + { + "epoch": 0.2745961820851689, + "grad_norm": 66.2356185913086, + "learning_rate": 4.5624936864957555e-07, + "logits/chosen": -0.6642763614654541, + "logits/rejected": -0.6538236141204834, + "logps/chosen": -64.20773315429688, + "logps/ref_chosen": -52.75646209716797, + "logps/ref_rejected": -81.96910095214844, + "logps/rejected": -112.33505249023438, + "loss": 0.7275, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.24081790447235107, + "margin_dpo/beta_margin_grad_std": 0.22335243225097656, + "margin_dpo/beta_margin_mean": 1.8914674520492554, + "margin_dpo/loss_margin_mean": 18.914674758911133, + "margin_dpo/margin_mean": 18.914674758911133, + "margin_dpo/margin_std": 20.249263763427734, + "step": 187 + }, + { + "epoch": 0.27606461086637296, + "grad_norm": 57.380775451660156, + "learning_rate": 4.5552139770089454e-07, + "logits/chosen": -0.6732475757598877, + "logits/rejected": -0.655211329460144, + "logps/chosen": -58.014007568359375, + "logps/ref_chosen": -49.415489196777344, + "logps/ref_rejected": -89.54043579101562, + "logps/rejected": -119.85971069335938, + "loss": 0.5978, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20978838205337524, + "margin_dpo/beta_margin_grad_std": 0.21329578757286072, + "margin_dpo/beta_margin_mean": 2.1720757484436035, + "margin_dpo/loss_margin_mean": 21.72075653076172, + "margin_dpo/margin_mean": 21.72075653076172, + "margin_dpo/margin_std": 20.222156524658203, + "step": 188 + }, + { + "epoch": 0.2775330396475771, + "grad_norm": 59.92390441894531, + "learning_rate": 4.5478801107224794e-07, + "logits/chosen": -0.6710000038146973, + "logits/rejected": -0.6149500012397766, + "logps/chosen": -60.63805389404297, + "logps/ref_chosen": -52.39896011352539, + "logps/ref_rejected": -72.16735076904297, + "logps/rejected": -102.7850341796875, + "loss": 0.5266, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19314193725585938, + "margin_dpo/beta_margin_grad_std": 0.18688946962356567, + "margin_dpo/beta_margin_mean": 2.2378592491149902, + "margin_dpo/loss_margin_mean": 22.378591537475586, + "margin_dpo/margin_mean": 22.378589630126953, + "margin_dpo/margin_std": 19.83676528930664, + "step": 189 + }, + { + "epoch": 0.2790014684287812, + "grad_norm": 49.65851974487305, + "learning_rate": 4.5404922808905543e-07, + "logits/chosen": -0.6853049993515015, + "logits/rejected": -0.6321940422058105, + "logps/chosen": -73.16799926757812, + "logps/ref_chosen": -64.68305969238281, + "logps/ref_rejected": -102.55052185058594, + "logps/rejected": -134.60731506347656, + "loss": 0.5028, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19292610883712769, + "margin_dpo/beta_margin_grad_std": 0.1859111189842224, + "margin_dpo/beta_margin_mean": 2.357185125350952, + "margin_dpo/loss_margin_mean": 23.57185173034668, + "margin_dpo/margin_mean": 23.571849822998047, + "margin_dpo/margin_std": 20.523746490478516, + "step": 190 + }, + { + "epoch": 0.28046989720998533, + "grad_norm": 34.40500259399414, + "learning_rate": 4.5330506821893565e-07, + "logits/chosen": -0.6436042785644531, + "logits/rejected": -0.6251427531242371, + "logps/chosen": -75.3275146484375, + "logps/ref_chosen": -68.65887451171875, + "logps/ref_rejected": -110.1396713256836, + "logps/rejected": -144.67477416992188, + "loss": 0.4349, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16417238116264343, + "margin_dpo/beta_margin_grad_std": 0.18410329520702362, + "margin_dpo/beta_margin_mean": 2.7866461277008057, + "margin_dpo/loss_margin_mean": 27.866458892822266, + "margin_dpo/margin_mean": 27.866458892822266, + "margin_dpo/margin_std": 24.24535369873047, + "step": 191 + }, + { + "epoch": 0.28193832599118945, + "grad_norm": 54.248565673828125, + "learning_rate": 4.5255555107119336e-07, + "logits/chosen": -0.6639034748077393, + "logits/rejected": -0.6402075290679932, + "logps/chosen": -80.83169555664062, + "logps/ref_chosen": -69.72691345214844, + "logps/ref_rejected": -103.32135009765625, + "logps/rejected": -138.1024627685547, + "loss": 0.5458, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20180875062942505, + "margin_dpo/beta_margin_grad_std": 0.21217647194862366, + "margin_dpo/beta_margin_mean": 2.367633819580078, + "margin_dpo/loss_margin_mean": 23.67633628845215, + "margin_dpo/margin_mean": 23.67633819580078, + "margin_dpo/margin_std": 21.766090393066406, + "step": 192 + }, + { + "epoch": 0.2834067547723935, + "grad_norm": 72.17058563232422, + "learning_rate": 4.5180069639630236e-07, + "logits/chosen": -0.7117282152175903, + "logits/rejected": -0.6623973846435547, + "logps/chosen": -71.19960021972656, + "logps/ref_chosen": -60.19049835205078, + "logps/ref_rejected": -76.40755462646484, + "logps/rejected": -104.30625915527344, + "loss": 0.7828, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23411771655082703, + "margin_dpo/beta_margin_grad_std": 0.23166634142398834, + "margin_dpo/beta_margin_mean": 1.6889591217041016, + "margin_dpo/loss_margin_mean": 16.889591217041016, + "margin_dpo/margin_mean": 16.889591217041016, + "margin_dpo/margin_std": 17.94813346862793, + "step": 193 + }, + { + "epoch": 0.28487518355359764, + "grad_norm": 36.26988983154297, + "learning_rate": 4.510405240853854e-07, + "logits/chosen": -0.6696387529373169, + "logits/rejected": -0.6456412076950073, + "logps/chosen": -45.34961700439453, + "logps/ref_chosen": -37.84037399291992, + "logps/ref_rejected": -60.684783935546875, + "logps/rejected": -90.98521423339844, + "loss": 0.5049, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1981307864189148, + "margin_dpo/beta_margin_grad_std": 0.1864275336265564, + "margin_dpo/beta_margin_mean": 2.279118776321411, + "margin_dpo/loss_margin_mean": 22.791187286376953, + "margin_dpo/margin_mean": 22.791187286376953, + "margin_dpo/margin_std": 19.600643157958984, + "step": 194 + }, + { + "epoch": 0.28634361233480177, + "grad_norm": 52.49971008300781, + "learning_rate": 4.5027505416968985e-07, + "logits/chosen": -0.6493013501167297, + "logits/rejected": -0.646949291229248, + "logps/chosen": -66.55081939697266, + "logps/ref_chosen": -54.891571044921875, + "logps/ref_rejected": -96.77095794677734, + "logps/rejected": -130.78152465820312, + "loss": 0.4842, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18146619200706482, + "margin_dpo/beta_margin_grad_std": 0.19031144678592682, + "margin_dpo/beta_margin_mean": 2.2351322174072266, + "margin_dpo/loss_margin_mean": 22.351322174072266, + "margin_dpo/margin_mean": 22.351322174072266, + "margin_dpo/margin_std": 18.570903778076172, + "step": 195 + }, + { + "epoch": 0.2878120411160059, + "grad_norm": 42.67164993286133, + "learning_rate": 4.495043068200599e-07, + "logits/chosen": -0.6235789060592651, + "logits/rejected": -0.5861127972602844, + "logps/chosen": -61.65058135986328, + "logps/ref_chosen": -53.245243072509766, + "logps/ref_rejected": -76.05294799804688, + "logps/rejected": -111.39912414550781, + "loss": 0.442, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16935831308364868, + "margin_dpo/beta_margin_grad_std": 0.18017151951789856, + "margin_dpo/beta_margin_mean": 2.6940836906433105, + "margin_dpo/loss_margin_mean": 26.940837860107422, + "margin_dpo/margin_mean": 26.940837860107422, + "margin_dpo/margin_std": 22.081172943115234, + "step": 196 + }, + { + "epoch": 0.28928046989721, + "grad_norm": 47.60453414916992, + "learning_rate": 4.4872830234640493e-07, + "logits/chosen": -0.6239949464797974, + "logits/rejected": -0.6122224926948547, + "logps/chosen": -69.72303009033203, + "logps/ref_chosen": -60.42033767700195, + "logps/ref_rejected": -77.20890808105469, + "logps/rejected": -107.96568298339844, + "loss": 0.5113, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19921091198921204, + "margin_dpo/beta_margin_grad_std": 0.17500488460063934, + "margin_dpo/beta_margin_mean": 2.1454074382781982, + "margin_dpo/loss_margin_mean": 21.45407485961914, + "margin_dpo/margin_mean": 21.45407485961914, + "margin_dpo/margin_std": 18.467487335205078, + "step": 197 + }, + { + "epoch": 0.2907488986784141, + "grad_norm": 52.930660247802734, + "learning_rate": 4.479470611971645e-07, + "logits/chosen": -0.651822566986084, + "logits/rejected": -0.6399349570274353, + "logps/chosen": -64.81780242919922, + "logps/ref_chosen": -55.03618621826172, + "logps/ref_rejected": -97.24325561523438, + "logps/rejected": -130.20297241210938, + "loss": 0.5069, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19629308581352234, + "margin_dpo/beta_margin_grad_std": 0.18192753195762634, + "margin_dpo/beta_margin_mean": 2.3178110122680664, + "margin_dpo/loss_margin_mean": 23.17810821533203, + "margin_dpo/margin_mean": 23.17810821533203, + "margin_dpo/margin_std": 22.717308044433594, + "step": 198 + }, + { + "epoch": 0.2922173274596182, + "grad_norm": 54.65925216674805, + "learning_rate": 4.471606039587695e-07, + "logits/chosen": -0.6595567464828491, + "logits/rejected": -0.6411717534065247, + "logps/chosen": -66.6902847290039, + "logps/ref_chosen": -56.828826904296875, + "logps/ref_rejected": -84.64820861816406, + "logps/rejected": -117.4836654663086, + "loss": 0.5925, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21100641787052155, + "margin_dpo/beta_margin_grad_std": 0.21913698315620422, + "margin_dpo/beta_margin_mean": 2.2973995208740234, + "margin_dpo/loss_margin_mean": 22.973995208740234, + "margin_dpo/margin_mean": 22.973995208740234, + "margin_dpo/margin_std": 22.114442825317383, + "step": 199 + }, + { + "epoch": 0.2936857562408223, + "grad_norm": 69.58685302734375, + "learning_rate": 4.4636895135509966e-07, + "logits/chosen": -0.6506587862968445, + "logits/rejected": -0.6131795644760132, + "logps/chosen": -64.16134643554688, + "logps/ref_chosen": -53.06706237792969, + "logps/ref_rejected": -80.60843658447266, + "logps/rejected": -116.44496154785156, + "loss": 0.6607, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21007482707500458, + "margin_dpo/beta_margin_grad_std": 0.22120529413223267, + "margin_dpo/beta_margin_mean": 2.4742238521575928, + "margin_dpo/loss_margin_mean": 24.742237091064453, + "margin_dpo/margin_mean": 24.742237091064453, + "margin_dpo/margin_std": 25.760879516601562, + "step": 200 + }, + { + "epoch": 0.2936857562408223, + "eval_logits/chosen": -0.6426228284835815, + "eval_logits/rejected": -0.6196746826171875, + "eval_logps/chosen": -92.99790954589844, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -115.42741394042969, + "eval_loss": 0.4791145920753479, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.31089797616004944, + "eval_margin_dpo/beta_margin_grad_std": 0.24731215834617615, + "eval_margin_dpo/beta_margin_mean": 1.4682612419128418, + "eval_margin_dpo/loss_margin_mean": 14.682612419128418, + "eval_margin_dpo/margin_mean": 14.682612419128418, + "eval_margin_dpo/margin_std": 21.162776947021484, + "eval_runtime": 39.8807, + "eval_samples_per_second": 58.65, + "eval_steps_per_second": 1.856, + "step": 200 + }, + { + "epoch": 0.29515418502202645, + "grad_norm": 53.58722686767578, + "learning_rate": 4.455721242469372e-07, + "logits/chosen": -0.6264992356300354, + "logits/rejected": -0.5962362289428711, + "logps/chosen": -83.7801513671875, + "logps/ref_chosen": -75.4022216796875, + "logps/ref_rejected": -114.80821990966797, + "logps/rejected": -148.85873413085938, + "loss": 0.5285, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1883956640958786, + "margin_dpo/beta_margin_grad_std": 0.21361252665519714, + "margin_dpo/beta_margin_mean": 2.567258834838867, + "margin_dpo/loss_margin_mean": 25.672588348388672, + "margin_dpo/margin_mean": 25.67258644104004, + "margin_dpo/margin_std": 23.231678009033203, + "step": 201 + }, + { + "epoch": 0.2966226138032305, + "grad_norm": 72.63829803466797, + "learning_rate": 4.4477014363141755e-07, + "logits/chosen": -0.6617487668991089, + "logits/rejected": -0.6499176025390625, + "logps/chosen": -60.97098922729492, + "logps/ref_chosen": -50.101318359375, + "logps/ref_rejected": -86.98503112792969, + "logps/rejected": -116.73014831542969, + "loss": 0.7735, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2517499327659607, + "margin_dpo/beta_margin_grad_std": 0.24596986174583435, + "margin_dpo/beta_margin_mean": 1.8875447511672974, + "margin_dpo/loss_margin_mean": 18.875446319580078, + "margin_dpo/margin_mean": 18.875446319580078, + "margin_dpo/margin_std": 21.153244018554688, + "step": 202 + }, + { + "epoch": 0.29809104258443464, + "grad_norm": 44.909637451171875, + "learning_rate": 4.439630306414758e-07, + "logits/chosen": -0.6598186492919922, + "logits/rejected": -0.6150715351104736, + "logps/chosen": -68.689453125, + "logps/ref_chosen": -60.60969543457031, + "logps/ref_rejected": -85.89596557617188, + "logps/rejected": -114.92884826660156, + "loss": 0.5325, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20222759246826172, + "margin_dpo/beta_margin_grad_std": 0.1946883499622345, + "margin_dpo/beta_margin_mean": 2.0953121185302734, + "margin_dpo/loss_margin_mean": 20.953121185302734, + "margin_dpo/margin_mean": 20.953121185302734, + "margin_dpo/margin_std": 18.274738311767578, + "step": 203 + }, + { + "epoch": 0.29955947136563876, + "grad_norm": 47.8790283203125, + "learning_rate": 4.431508065452897e-07, + "logits/chosen": -0.6964120864868164, + "logits/rejected": -0.643916130065918, + "logps/chosen": -89.8682861328125, + "logps/ref_chosen": -80.16496276855469, + "logps/ref_rejected": -87.69590759277344, + "logps/rejected": -119.29830932617188, + "loss": 0.522, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19900324940681458, + "margin_dpo/beta_margin_grad_std": 0.1881289929151535, + "margin_dpo/beta_margin_mean": 2.1899077892303467, + "margin_dpo/loss_margin_mean": 21.899078369140625, + "margin_dpo/margin_mean": 21.899078369140625, + "margin_dpo/margin_std": 19.764657974243164, + "step": 204 + }, + { + "epoch": 0.3010279001468429, + "grad_norm": 66.0072021484375, + "learning_rate": 4.4233349274571974e-07, + "logits/chosen": -0.7408896684646606, + "logits/rejected": -0.7029110193252563, + "logps/chosen": -70.63681030273438, + "logps/ref_chosen": -59.384735107421875, + "logps/ref_rejected": -85.12505340576172, + "logps/rejected": -120.17679595947266, + "loss": 0.636, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21140334010124207, + "margin_dpo/beta_margin_grad_std": 0.22996652126312256, + "margin_dpo/beta_margin_mean": 2.3799662590026855, + "margin_dpo/loss_margin_mean": 23.799659729003906, + "margin_dpo/margin_mean": 23.79966163635254, + "margin_dpo/margin_std": 23.010438919067383, + "step": 205 + }, + { + "epoch": 0.302496328928047, + "grad_norm": 45.65394592285156, + "learning_rate": 4.415111107797445e-07, + "logits/chosen": -0.650726318359375, + "logits/rejected": -0.6484321355819702, + "logps/chosen": -57.730751037597656, + "logps/ref_chosen": -46.964500427246094, + "logps/ref_rejected": -98.9534912109375, + "logps/rejected": -136.9589080810547, + "loss": 0.4357, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16246598958969116, + "margin_dpo/beta_margin_grad_std": 0.19966889917850494, + "margin_dpo/beta_margin_mean": 2.723916530609131, + "margin_dpo/loss_margin_mean": 27.239166259765625, + "margin_dpo/margin_mean": 27.239164352416992, + "margin_dpo/margin_std": 21.334924697875977, + "step": 206 + }, + { + "epoch": 0.3039647577092511, + "grad_norm": 52.66937255859375, + "learning_rate": 4.4068368231789365e-07, + "logits/chosen": -0.726109504699707, + "logits/rejected": -0.70166015625, + "logps/chosen": -64.216064453125, + "logps/ref_chosen": -56.05625915527344, + "logps/ref_rejected": -84.44779968261719, + "logps/rejected": -120.97300720214844, + "loss": 0.4566, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17621780931949615, + "margin_dpo/beta_margin_grad_std": 0.18753397464752197, + "margin_dpo/beta_margin_mean": 2.8365397453308105, + "margin_dpo/loss_margin_mean": 28.36539649963379, + "margin_dpo/margin_mean": 28.365394592285156, + "margin_dpo/margin_std": 25.557231903076172, + "step": 207 + }, + { + "epoch": 0.3054331864904552, + "grad_norm": 49.823787689208984, + "learning_rate": 4.398512291636768e-07, + "logits/chosen": -0.6845159530639648, + "logits/rejected": -0.6554762125015259, + "logps/chosen": -79.41966247558594, + "logps/ref_chosen": -67.06761169433594, + "logps/ref_rejected": -94.28689575195312, + "logps/rejected": -129.97105407714844, + "loss": 0.4757, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18872755765914917, + "margin_dpo/beta_margin_grad_std": 0.1753954291343689, + "margin_dpo/beta_margin_mean": 2.333209991455078, + "margin_dpo/loss_margin_mean": 23.33209991455078, + "margin_dpo/margin_mean": 23.33209991455078, + "margin_dpo/margin_std": 20.38248062133789, + "step": 208 + }, + { + "epoch": 0.3069016152716593, + "grad_norm": 56.87575912475586, + "learning_rate": 4.3901377325300857e-07, + "logits/chosen": -0.6603403687477112, + "logits/rejected": -0.634404182434082, + "logps/chosen": -65.81916809082031, + "logps/ref_chosen": -56.18169403076172, + "logps/ref_rejected": -80.94152069091797, + "logps/rejected": -114.23938751220703, + "loss": 0.6082, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2075938880443573, + "margin_dpo/beta_margin_grad_std": 0.23247569799423218, + "margin_dpo/beta_margin_mean": 2.366039752960205, + "margin_dpo/loss_margin_mean": 23.660396575927734, + "margin_dpo/margin_mean": 23.660396575927734, + "margin_dpo/margin_std": 21.86727523803711, + "step": 209 + }, + { + "epoch": 0.30837004405286345, + "grad_norm": 47.36671447753906, + "learning_rate": 4.381713366536311e-07, + "logits/chosen": -0.6590738296508789, + "logits/rejected": -0.6278238296508789, + "logps/chosen": -56.26042175292969, + "logps/ref_chosen": -46.371822357177734, + "logps/ref_rejected": -76.68162536621094, + "logps/rejected": -108.72216796875, + "loss": 0.5044, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1963951587677002, + "margin_dpo/beta_margin_grad_std": 0.17726612091064453, + "margin_dpo/beta_margin_mean": 2.2151942253112793, + "margin_dpo/loss_margin_mean": 22.15194320678711, + "margin_dpo/margin_mean": 22.15194320678711, + "margin_dpo/margin_std": 18.77596664428711, + "step": 210 + }, + { + "epoch": 0.30983847283406757, + "grad_norm": 60.94392776489258, + "learning_rate": 4.373239415645323e-07, + "logits/chosen": -0.6643211841583252, + "logits/rejected": -0.6076558232307434, + "logps/chosen": -91.72789764404297, + "logps/ref_chosen": -78.93235778808594, + "logps/ref_rejected": -86.82098388671875, + "logps/rejected": -122.42787170410156, + "loss": 0.5933, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2022809386253357, + "margin_dpo/beta_margin_grad_std": 0.2218504548072815, + "margin_dpo/beta_margin_mean": 2.2811341285705566, + "margin_dpo/loss_margin_mean": 22.81134033203125, + "margin_dpo/margin_mean": 22.81134033203125, + "margin_dpo/margin_std": 20.594642639160156, + "step": 211 + }, + { + "epoch": 0.31130690161527164, + "grad_norm": 55.531829833984375, + "learning_rate": 4.3647161031536086e-07, + "logits/chosen": -0.6789622902870178, + "logits/rejected": -0.6477820873260498, + "logps/chosen": -69.9522705078125, + "logps/ref_chosen": -58.19701385498047, + "logps/ref_rejected": -103.05784606933594, + "logps/rejected": -143.61941528320312, + "loss": 0.4546, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17285102605819702, + "margin_dpo/beta_margin_grad_std": 0.19285747408866882, + "margin_dpo/beta_margin_mean": 2.8806304931640625, + "margin_dpo/loss_margin_mean": 28.806303024291992, + "margin_dpo/margin_mean": 28.806303024291992, + "margin_dpo/margin_std": 24.59879493713379, + "step": 212 + }, + { + "epoch": 0.31277533039647576, + "grad_norm": 53.27421951293945, + "learning_rate": 4.3561436536583774e-07, + "logits/chosen": -0.6767191886901855, + "logits/rejected": -0.6345890760421753, + "logps/chosen": -77.456298828125, + "logps/ref_chosen": -67.51271057128906, + "logps/ref_rejected": -93.91471862792969, + "logps/rejected": -132.91636657714844, + "loss": 0.4835, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1712796837091446, + "margin_dpo/beta_margin_grad_std": 0.19696003198623657, + "margin_dpo/beta_margin_mean": 2.905806303024292, + "margin_dpo/loss_margin_mean": 29.058063507080078, + "margin_dpo/margin_mean": 29.058061599731445, + "margin_dpo/margin_std": 25.737838745117188, + "step": 213 + }, + { + "epoch": 0.3142437591776799, + "grad_norm": 63.7830696105957, + "learning_rate": 4.3475222930516473e-07, + "logits/chosen": -0.6705787181854248, + "logits/rejected": -0.6549390554428101, + "logps/chosen": -52.265167236328125, + "logps/ref_chosen": -41.604888916015625, + "logps/ref_rejected": -77.51741027832031, + "logps/rejected": -111.19293212890625, + "loss": 0.6303, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21721556782722473, + "margin_dpo/beta_margin_grad_std": 0.21066680550575256, + "margin_dpo/beta_margin_mean": 2.3015246391296387, + "margin_dpo/loss_margin_mean": 23.015247344970703, + "margin_dpo/margin_mean": 23.015247344970703, + "margin_dpo/margin_std": 24.095165252685547, + "step": 214 + }, + { + "epoch": 0.315712187958884, + "grad_norm": 53.305728912353516, + "learning_rate": 4.3388522485142885e-07, + "logits/chosen": -0.6503559350967407, + "logits/rejected": -0.6214190721511841, + "logps/chosen": -63.870384216308594, + "logps/ref_chosen": -53.279266357421875, + "logps/ref_rejected": -89.96464538574219, + "logps/rejected": -125.34566497802734, + "loss": 0.5335, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1932601034641266, + "margin_dpo/beta_margin_grad_std": 0.2013830840587616, + "margin_dpo/beta_margin_mean": 2.478990077972412, + "margin_dpo/loss_margin_mean": 24.789897918701172, + "margin_dpo/margin_mean": 24.789899826049805, + "margin_dpo/margin_std": 23.55270767211914, + "step": 215 + }, + { + "epoch": 0.31718061674008813, + "grad_norm": 62.44280242919922, + "learning_rate": 4.330133748510036e-07, + "logits/chosen": -0.6717164516448975, + "logits/rejected": -0.639351487159729, + "logps/chosen": -61.949493408203125, + "logps/ref_chosen": -48.887794494628906, + "logps/ref_rejected": -77.19892883300781, + "logps/rejected": -117.35563659667969, + "loss": 0.5894, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20755264163017273, + "margin_dpo/beta_margin_grad_std": 0.22565214335918427, + "margin_dpo/beta_margin_mean": 2.709501266479492, + "margin_dpo/loss_margin_mean": 27.095012664794922, + "margin_dpo/margin_mean": 27.095012664794922, + "margin_dpo/margin_std": 26.17487335205078, + "step": 216 + }, + { + "epoch": 0.3186490455212922, + "grad_norm": 42.385009765625, + "learning_rate": 4.3213670227794757e-07, + "logits/chosen": -0.6868363618850708, + "logits/rejected": -0.6442810893058777, + "logps/chosen": -60.769012451171875, + "logps/ref_chosen": -49.845306396484375, + "logps/ref_rejected": -100.07832336425781, + "logps/rejected": -138.9866943359375, + "loss": 0.4008, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1490134596824646, + "margin_dpo/beta_margin_grad_std": 0.18446922302246094, + "margin_dpo/beta_margin_mean": 2.798466205596924, + "margin_dpo/loss_margin_mean": 27.984663009643555, + "margin_dpo/margin_mean": 27.984663009643555, + "margin_dpo/margin_std": 21.431766510009766, + "step": 217 + }, + { + "epoch": 0.3201174743024963, + "grad_norm": 60.58324432373047, + "learning_rate": 4.3125523023339815e-07, + "logits/chosen": -0.659305214881897, + "logits/rejected": -0.6280574798583984, + "logps/chosen": -69.96195983886719, + "logps/ref_chosen": -58.576683044433594, + "logps/ref_rejected": -87.84639739990234, + "logps/rejected": -124.12518310546875, + "loss": 0.5344, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1986551582813263, + "margin_dpo/beta_margin_grad_std": 0.19974969327449799, + "margin_dpo/beta_margin_mean": 2.4893507957458496, + "margin_dpo/loss_margin_mean": 24.893508911132812, + "margin_dpo/margin_mean": 24.893508911132812, + "margin_dpo/margin_std": 23.56732177734375, + "step": 218 + }, + { + "epoch": 0.32158590308370044, + "grad_norm": 59.97367858886719, + "learning_rate": 4.303689819449636e-07, + "logits/chosen": -0.6612948775291443, + "logits/rejected": -0.6297075152397156, + "logps/chosen": -72.49765014648438, + "logps/ref_chosen": -61.083858489990234, + "logps/ref_rejected": -85.83042907714844, + "logps/rejected": -119.2388687133789, + "loss": 0.5264, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19783681631088257, + "margin_dpo/beta_margin_grad_std": 0.18227900564670563, + "margin_dpo/beta_margin_mean": 2.1994645595550537, + "margin_dpo/loss_margin_mean": 21.994646072387695, + "margin_dpo/margin_mean": 21.994644165039062, + "margin_dpo/margin_std": 19.54847526550293, + "step": 219 + }, + { + "epoch": 0.32305433186490456, + "grad_norm": 47.776611328125, + "learning_rate": 4.2947798076611047e-07, + "logits/chosen": -0.646489679813385, + "logits/rejected": -0.605070948600769, + "logps/chosen": -81.14366912841797, + "logps/ref_chosen": -70.03128051757812, + "logps/ref_rejected": -87.68551635742188, + "logps/rejected": -119.57649230957031, + "loss": 0.5065, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19959807395935059, + "margin_dpo/beta_margin_grad_std": 0.17812803387641907, + "margin_dpo/beta_margin_mean": 2.077859401702881, + "margin_dpo/loss_margin_mean": 20.77859115600586, + "margin_dpo/margin_mean": 20.778593063354492, + "margin_dpo/margin_std": 16.526872634887695, + "step": 220 + }, + { + "epoch": 0.3245227606461087, + "grad_norm": 52.991641998291016, + "learning_rate": 4.285822501755485e-07, + "logits/chosen": -0.6369597315788269, + "logits/rejected": -0.6250277757644653, + "logps/chosen": -64.38204193115234, + "logps/ref_chosen": -52.15470886230469, + "logps/ref_rejected": -106.46768188476562, + "logps/rejected": -151.52133178710938, + "loss": 0.3403, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12470635771751404, + "margin_dpo/beta_margin_grad_std": 0.18095463514328003, + "margin_dpo/beta_margin_mean": 3.2826321125030518, + "margin_dpo/loss_margin_mean": 32.82632064819336, + "margin_dpo/margin_mean": 32.82632064819336, + "margin_dpo/margin_std": 22.725797653198242, + "step": 221 + }, + { + "epoch": 0.32599118942731276, + "grad_norm": 72.23493957519531, + "learning_rate": 4.276818137766118e-07, + "logits/chosen": -0.7019423842430115, + "logits/rejected": -0.6659786701202393, + "logps/chosen": -74.65852355957031, + "logps/ref_chosen": -60.971099853515625, + "logps/ref_rejected": -100.00115203857422, + "logps/rejected": -139.9058837890625, + "loss": 0.5722, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19258952140808105, + "margin_dpo/beta_margin_grad_std": 0.22169330716133118, + "margin_dpo/beta_margin_mean": 2.621731758117676, + "margin_dpo/loss_margin_mean": 26.217315673828125, + "margin_dpo/margin_mean": 26.217315673828125, + "margin_dpo/margin_std": 24.92258071899414, + "step": 222 + }, + { + "epoch": 0.3274596182085169, + "grad_norm": 85.80632781982422, + "learning_rate": 4.2677669529663686e-07, + "logits/chosen": -0.6935982704162598, + "logits/rejected": -0.6445531249046326, + "logps/chosen": -68.51580047607422, + "logps/ref_chosen": -52.64057922363281, + "logps/ref_rejected": -82.82502746582031, + "logps/rejected": -120.95681762695312, + "loss": 0.7341, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.22147008776664734, + "margin_dpo/beta_margin_grad_std": 0.2591605484485626, + "margin_dpo/beta_margin_mean": 2.225656509399414, + "margin_dpo/loss_margin_mean": 22.25656509399414, + "margin_dpo/margin_mean": 22.25656509399414, + "margin_dpo/margin_std": 22.70517349243164, + "step": 223 + }, + { + "epoch": 0.328928046989721, + "grad_norm": 72.05899810791016, + "learning_rate": 4.2586691858633747e-07, + "logits/chosen": -0.6770994663238525, + "logits/rejected": -0.6360162496566772, + "logps/chosen": -61.67028045654297, + "logps/ref_chosen": -48.59540939331055, + "logps/ref_rejected": -77.11648559570312, + "logps/rejected": -116.93687438964844, + "loss": 0.5671, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19068878889083862, + "margin_dpo/beta_margin_grad_std": 0.20871135592460632, + "margin_dpo/beta_margin_mean": 2.674551486968994, + "margin_dpo/loss_margin_mean": 26.745513916015625, + "margin_dpo/margin_mean": 26.745513916015625, + "margin_dpo/margin_std": 24.828662872314453, + "step": 224 + }, + { + "epoch": 0.3303964757709251, + "grad_norm": 44.1650390625, + "learning_rate": 4.249525076191759e-07, + "logits/chosen": -0.6742178201675415, + "logits/rejected": -0.6422642469406128, + "logps/chosen": -72.62501525878906, + "logps/ref_chosen": -58.000465393066406, + "logps/ref_rejected": -99.90290832519531, + "logps/rejected": -147.4707489013672, + "loss": 0.4084, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14956694841384888, + "margin_dpo/beta_margin_grad_std": 0.20272037386894226, + "margin_dpo/beta_margin_mean": 3.2943289279937744, + "margin_dpo/loss_margin_mean": 32.94328689575195, + "margin_dpo/margin_mean": 32.94329071044922, + "margin_dpo/margin_std": 26.7680606842041, + "step": 225 + }, + { + "epoch": 0.33186490455212925, + "grad_norm": 52.37092208862305, + "learning_rate": 4.2403348649073167e-07, + "logits/chosen": -0.6877872943878174, + "logits/rejected": -0.6356024146080017, + "logps/chosen": -69.44877624511719, + "logps/ref_chosen": -58.898799896240234, + "logps/ref_rejected": -78.68775939941406, + "logps/rejected": -114.89201354980469, + "loss": 0.4828, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17930053174495697, + "margin_dpo/beta_margin_grad_std": 0.18374590575695038, + "margin_dpo/beta_margin_mean": 2.565427780151367, + "margin_dpo/loss_margin_mean": 25.654277801513672, + "margin_dpo/margin_mean": 25.654277801513672, + "margin_dpo/margin_std": 21.256011962890625, + "step": 226 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 47.082557678222656, + "learning_rate": 4.2310987941806615e-07, + "logits/chosen": -0.6890594959259033, + "logits/rejected": -0.6593263745307922, + "logps/chosen": -70.82875061035156, + "logps/ref_chosen": -59.072181701660156, + "logps/ref_rejected": -99.41236877441406, + "logps/rejected": -142.82723999023438, + "loss": 0.4196, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15451833605766296, + "margin_dpo/beta_margin_grad_std": 0.19809511303901672, + "margin_dpo/beta_margin_mean": 3.1658291816711426, + "margin_dpo/loss_margin_mean": 31.65829086303711, + "margin_dpo/margin_mean": 31.65829086303711, + "margin_dpo/margin_std": 26.29955291748047, + "step": 227 + }, + { + "epoch": 0.33480176211453744, + "grad_norm": 56.605804443359375, + "learning_rate": 4.2218171073908463e-07, + "logits/chosen": -0.7011622786521912, + "logits/rejected": -0.6703910231590271, + "logps/chosen": -78.96180725097656, + "logps/ref_chosen": -65.89129638671875, + "logps/ref_rejected": -91.04875183105469, + "logps/rejected": -128.32534790039062, + "loss": 0.5385, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18866901099681854, + "margin_dpo/beta_margin_grad_std": 0.21488142013549805, + "margin_dpo/beta_margin_mean": 2.4206089973449707, + "margin_dpo/loss_margin_mean": 24.20608901977539, + "margin_dpo/margin_mean": 24.20608901977539, + "margin_dpo/margin_std": 20.969837188720703, + "step": 228 + }, + { + "epoch": 0.33627019089574156, + "grad_norm": 63.40804672241211, + "learning_rate": 4.212490049118951e-07, + "logits/chosen": -0.7051106691360474, + "logits/rejected": -0.6546026468276978, + "logps/chosen": -84.97560119628906, + "logps/ref_chosen": -70.70636749267578, + "logps/ref_rejected": -84.52740478515625, + "logps/rejected": -125.85427856445312, + "loss": 0.6019, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19034332036972046, + "margin_dpo/beta_margin_grad_std": 0.2341303527355194, + "margin_dpo/beta_margin_mean": 2.7057645320892334, + "margin_dpo/loss_margin_mean": 27.05764389038086, + "margin_dpo/margin_mean": 27.05764389038086, + "margin_dpo/margin_std": 25.40169906616211, + "step": 229 + }, + { + "epoch": 0.3377386196769457, + "grad_norm": 53.56421661376953, + "learning_rate": 4.203117865141635e-07, + "logits/chosen": -0.6524708271026611, + "logits/rejected": -0.6426960229873657, + "logps/chosen": -51.435630798339844, + "logps/ref_chosen": -39.282005310058594, + "logps/ref_rejected": -85.62191009521484, + "logps/rejected": -128.23431396484375, + "loss": 0.5106, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16138073801994324, + "margin_dpo/beta_margin_grad_std": 0.21171000599861145, + "margin_dpo/beta_margin_mean": 3.0458788871765137, + "margin_dpo/loss_margin_mean": 30.45878791809082, + "margin_dpo/margin_mean": 30.45878791809082, + "margin_dpo/margin_std": 27.84360694885254, + "step": 230 + }, + { + "epoch": 0.3392070484581498, + "grad_norm": 42.17887878417969, + "learning_rate": 4.1937008024246625e-07, + "logits/chosen": -0.6823098063468933, + "logits/rejected": -0.6389471292495728, + "logps/chosen": -74.67344665527344, + "logps/ref_chosen": -63.27644348144531, + "logps/ref_rejected": -74.1239013671875, + "logps/rejected": -111.6684799194336, + "loss": 0.4691, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18743924796581268, + "margin_dpo/beta_margin_grad_std": 0.17160117626190186, + "margin_dpo/beta_margin_mean": 2.614757537841797, + "margin_dpo/loss_margin_mean": 26.14757537841797, + "margin_dpo/margin_mean": 26.14757537841797, + "margin_dpo/margin_std": 25.1517333984375, + "step": 231 + }, + { + "epoch": 0.3406754772393539, + "grad_norm": 70.57960510253906, + "learning_rate": 4.1842391091163933e-07, + "logits/chosen": -0.6195404529571533, + "logits/rejected": -0.5588107109069824, + "logps/chosen": -84.39373779296875, + "logps/ref_chosen": -70.74876403808594, + "logps/ref_rejected": -83.97706604003906, + "logps/rejected": -118.90908813476562, + "loss": 0.6901, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23599335551261902, + "margin_dpo/beta_margin_grad_std": 0.24028193950653076, + "margin_dpo/beta_margin_mean": 2.1287035942077637, + "margin_dpo/loss_margin_mean": 21.287036895751953, + "margin_dpo/margin_mean": 21.287036895751953, + "margin_dpo/margin_std": 22.360965728759766, + "step": 232 + }, + { + "epoch": 0.342143906020558, + "grad_norm": 63.01465606689453, + "learning_rate": 4.174733034541245e-07, + "logits/chosen": -0.6951059103012085, + "logits/rejected": -0.6703442931175232, + "logps/chosen": -67.92855834960938, + "logps/ref_chosen": -54.8829345703125, + "logps/ref_rejected": -107.48007202148438, + "logps/rejected": -148.419921875, + "loss": 0.5636, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18834185600280762, + "margin_dpo/beta_margin_grad_std": 0.22877255082130432, + "margin_dpo/beta_margin_mean": 2.7894234657287598, + "margin_dpo/loss_margin_mean": 27.894235610961914, + "margin_dpo/margin_mean": 27.89423370361328, + "margin_dpo/margin_std": 26.236351013183594, + "step": 233 + }, + { + "epoch": 0.3436123348017621, + "grad_norm": 59.044742584228516, + "learning_rate": 4.165182829193126e-07, + "logits/chosen": -0.6353030204772949, + "logits/rejected": -0.6363640427589417, + "logps/chosen": -55.01062774658203, + "logps/ref_chosen": -44.09451675415039, + "logps/ref_rejected": -100.00663757324219, + "logps/rejected": -139.08529663085938, + "loss": 0.4541, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15148046612739563, + "margin_dpo/beta_margin_grad_std": 0.18851938843727112, + "margin_dpo/beta_margin_mean": 2.8162550926208496, + "margin_dpo/loss_margin_mean": 28.162551879882812, + "margin_dpo/margin_mean": 28.16254997253418, + "margin_dpo/margin_std": 21.774639129638672, + "step": 234 + }, + { + "epoch": 0.34508076358296624, + "grad_norm": 64.13523864746094, + "learning_rate": 4.1555887447288255e-07, + "logits/chosen": -0.6782118082046509, + "logits/rejected": -0.6378265619277954, + "logps/chosen": -77.53021240234375, + "logps/ref_chosen": -62.237911224365234, + "logps/ref_rejected": -90.39505767822266, + "logps/rejected": -128.7196502685547, + "loss": 0.5952, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21459338068962097, + "margin_dpo/beta_margin_grad_std": 0.21603041887283325, + "margin_dpo/beta_margin_mean": 2.303229808807373, + "margin_dpo/loss_margin_mean": 23.03229522705078, + "margin_dpo/margin_mean": 23.03229522705078, + "margin_dpo/margin_std": 23.028390884399414, + "step": 235 + }, + { + "epoch": 0.3465491923641703, + "grad_norm": 64.90979766845703, + "learning_rate": 4.1459510339613946e-07, + "logits/chosen": -0.6209807395935059, + "logits/rejected": -0.6189430356025696, + "logps/chosen": -60.39289855957031, + "logps/ref_chosen": -49.34136199951172, + "logps/ref_rejected": -103.51162719726562, + "logps/rejected": -139.95445251464844, + "loss": 0.5672, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19896173477172852, + "margin_dpo/beta_margin_grad_std": 0.22020728886127472, + "margin_dpo/beta_margin_mean": 2.5391287803649902, + "margin_dpo/loss_margin_mean": 25.391286849975586, + "margin_dpo/margin_mean": 25.391284942626953, + "margin_dpo/margin_std": 23.486440658569336, + "step": 236 + }, + { + "epoch": 0.34801762114537443, + "grad_norm": 48.42905807495117, + "learning_rate": 4.136269950853473e-07, + "logits/chosen": -0.6719942092895508, + "logits/rejected": -0.6377314329147339, + "logps/chosen": -65.90644836425781, + "logps/ref_chosen": -54.168121337890625, + "logps/ref_rejected": -94.78036499023438, + "logps/rejected": -133.96646118164062, + "loss": 0.5145, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17442235350608826, + "margin_dpo/beta_margin_grad_std": 0.21036501228809357, + "margin_dpo/beta_margin_mean": 2.744776487350464, + "margin_dpo/loss_margin_mean": 27.447765350341797, + "margin_dpo/margin_mean": 27.447765350341797, + "margin_dpo/margin_std": 24.072509765625, + "step": 237 + }, + { + "epoch": 0.34948604992657856, + "grad_norm": 39.447776794433594, + "learning_rate": 4.126545750510605e-07, + "logits/chosen": -0.62393718957901, + "logits/rejected": -0.6167235970497131, + "logps/chosen": -64.80150604248047, + "logps/ref_chosen": -53.973121643066406, + "logps/ref_rejected": -89.41795349121094, + "logps/rejected": -124.90143585205078, + "loss": 0.4407, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17134535312652588, + "margin_dpo/beta_margin_grad_std": 0.17862460017204285, + "margin_dpo/beta_margin_mean": 2.465510368347168, + "margin_dpo/loss_margin_mean": 24.65510368347168, + "margin_dpo/margin_mean": 24.655105590820312, + "margin_dpo/margin_std": 20.156238555908203, + "step": 238 + }, + { + "epoch": 0.3509544787077827, + "grad_norm": 49.10955810546875, + "learning_rate": 4.116778689174514e-07, + "logits/chosen": -0.6934037208557129, + "logits/rejected": -0.6648428440093994, + "logps/chosen": -70.5438232421875, + "logps/ref_chosen": -58.09782409667969, + "logps/ref_rejected": -93.59294128417969, + "logps/rejected": -131.54547119140625, + "loss": 0.4402, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16609887778759003, + "margin_dpo/beta_margin_grad_std": 0.17937105894088745, + "margin_dpo/beta_margin_mean": 2.550652503967285, + "margin_dpo/loss_margin_mean": 25.50652313232422, + "margin_dpo/margin_mean": 25.50652313232422, + "margin_dpo/margin_std": 19.751544952392578, + "step": 239 + }, + { + "epoch": 0.3524229074889868, + "grad_norm": 59.94078063964844, + "learning_rate": 4.106969024216348e-07, + "logits/chosen": -0.7201815843582153, + "logits/rejected": -0.6908845901489258, + "logps/chosen": -73.39276885986328, + "logps/ref_chosen": -60.6144905090332, + "logps/ref_rejected": -74.1185302734375, + "logps/rejected": -109.40141296386719, + "loss": 0.6221, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20475083589553833, + "margin_dpo/beta_margin_grad_std": 0.22877436876296997, + "margin_dpo/beta_margin_mean": 2.250460624694824, + "margin_dpo/loss_margin_mean": 22.50460433959961, + "margin_dpo/margin_mean": 22.50460433959961, + "margin_dpo/margin_std": 20.584453582763672, + "step": 240 + }, + { + "epoch": 0.35389133627019087, + "grad_norm": 58.87882614135742, + "learning_rate": 4.097117014129903e-07, + "logits/chosen": -0.6401950120925903, + "logits/rejected": -0.5843400955200195, + "logps/chosen": -76.44609069824219, + "logps/ref_chosen": -66.091064453125, + "logps/ref_rejected": -88.06088256835938, + "logps/rejected": -130.1419677734375, + "loss": 0.5073, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16559036076068878, + "margin_dpo/beta_margin_grad_std": 0.2104012668132782, + "margin_dpo/beta_margin_mean": 3.172605514526367, + "margin_dpo/loss_margin_mean": 31.726055145263672, + "margin_dpo/margin_mean": 31.726055145263672, + "margin_dpo/margin_std": 29.52655029296875, + "step": 241 + }, + { + "epoch": 0.355359765051395, + "grad_norm": 53.561256408691406, + "learning_rate": 4.087222918524807e-07, + "logits/chosen": -0.6564736366271973, + "logits/rejected": -0.6259176135063171, + "logps/chosen": -79.38307189941406, + "logps/ref_chosen": -67.86392211914062, + "logps/ref_rejected": -83.36033630371094, + "logps/rejected": -119.49470520019531, + "loss": 0.4948, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18211215734481812, + "margin_dpo/beta_margin_grad_std": 0.19208890199661255, + "margin_dpo/beta_margin_mean": 2.4615235328674316, + "margin_dpo/loss_margin_mean": 24.615234375, + "margin_dpo/margin_mean": 24.615234375, + "margin_dpo/margin_std": 21.698657989501953, + "step": 242 + }, + { + "epoch": 0.3568281938325991, + "grad_norm": 33.901893615722656, + "learning_rate": 4.07728699811968e-07, + "logits/chosen": -0.6499842405319214, + "logits/rejected": -0.5824156999588013, + "logps/chosen": -73.99522399902344, + "logps/ref_chosen": -63.08424377441406, + "logps/ref_rejected": -76.33563232421875, + "logps/rejected": -116.67811584472656, + "loss": 0.3266, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13336455821990967, + "margin_dpo/beta_margin_grad_std": 0.15137195587158203, + "margin_dpo/beta_margin_mean": 2.943150043487549, + "margin_dpo/loss_margin_mean": 29.43149757385254, + "margin_dpo/margin_mean": 29.43149757385254, + "margin_dpo/margin_std": 21.736125946044922, + "step": 243 + }, + { + "epoch": 0.35829662261380324, + "grad_norm": 43.502723693847656, + "learning_rate": 4.067309514735267e-07, + "logits/chosen": -0.6875163316726685, + "logits/rejected": -0.6771037578582764, + "logps/chosen": -71.17987060546875, + "logps/ref_chosen": -61.14069366455078, + "logps/ref_rejected": -94.89193725585938, + "logps/rejected": -130.1956787109375, + "loss": 0.4962, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18511684238910675, + "margin_dpo/beta_margin_grad_std": 0.20264488458633423, + "margin_dpo/beta_margin_mean": 2.526456832885742, + "margin_dpo/loss_margin_mean": 25.264570236206055, + "margin_dpo/margin_mean": 25.264570236206055, + "margin_dpo/margin_std": 21.366226196289062, + "step": 244 + }, + { + "epoch": 0.35976505139500736, + "grad_norm": 75.58712768554688, + "learning_rate": 4.057290731287531e-07, + "logits/chosen": -0.6512210369110107, + "logits/rejected": -0.5914992094039917, + "logps/chosen": -78.87323760986328, + "logps/ref_chosen": -67.26228332519531, + "logps/ref_rejected": -87.64010620117188, + "logps/rejected": -126.08024597167969, + "loss": 0.5409, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1954166740179062, + "margin_dpo/beta_margin_grad_std": 0.20244669914245605, + "margin_dpo/beta_margin_mean": 2.68291974067688, + "margin_dpo/loss_margin_mean": 26.82919692993164, + "margin_dpo/margin_mean": 26.82919692993164, + "margin_dpo/margin_std": 25.66305923461914, + "step": 245 + }, + { + "epoch": 0.36123348017621143, + "grad_norm": 55.81852722167969, + "learning_rate": 4.047230911780736e-07, + "logits/chosen": -0.6650277972221375, + "logits/rejected": -0.6220686435699463, + "logps/chosen": -77.91656494140625, + "logps/ref_chosen": -66.69696807861328, + "logps/ref_rejected": -84.34634399414062, + "logps/rejected": -118.67218017578125, + "loss": 0.5257, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19744564592838287, + "margin_dpo/beta_margin_grad_std": 0.1971253752708435, + "margin_dpo/beta_margin_mean": 2.3106236457824707, + "margin_dpo/loss_margin_mean": 23.10623550415039, + "margin_dpo/margin_mean": 23.10623550415039, + "margin_dpo/margin_std": 20.974063873291016, + "step": 246 + }, + { + "epoch": 0.36270190895741555, + "grad_norm": 39.22549057006836, + "learning_rate": 4.0371303213004814e-07, + "logits/chosen": -0.7125513553619385, + "logits/rejected": -0.6908072233200073, + "logps/chosen": -68.05924224853516, + "logps/ref_chosen": -56.6053466796875, + "logps/ref_rejected": -106.29327392578125, + "logps/rejected": -150.0380401611328, + "loss": 0.4087, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1458946168422699, + "margin_dpo/beta_margin_grad_std": 0.1995469033718109, + "margin_dpo/beta_margin_mean": 3.229086399078369, + "margin_dpo/loss_margin_mean": 32.29086685180664, + "margin_dpo/margin_mean": 32.29086685180664, + "margin_dpo/margin_std": 25.363780975341797, + "step": 247 + }, + { + "epoch": 0.3641703377386197, + "grad_norm": 44.850181579589844, + "learning_rate": 4.0269892260067197e-07, + "logits/chosen": -0.65459144115448, + "logits/rejected": -0.6384952068328857, + "logps/chosen": -54.36640930175781, + "logps/ref_chosen": -44.043216705322266, + "logps/ref_rejected": -91.85687255859375, + "logps/rejected": -126.57852172851562, + "loss": 0.4079, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16769659519195557, + "margin_dpo/beta_margin_grad_std": 0.15377435088157654, + "margin_dpo/beta_margin_mean": 2.439844846725464, + "margin_dpo/loss_margin_mean": 24.398448944091797, + "margin_dpo/margin_mean": 24.398447036743164, + "margin_dpo/margin_std": 19.091136932373047, + "step": 248 + }, + { + "epoch": 0.3656387665198238, + "grad_norm": 64.64720916748047, + "learning_rate": 4.0168078931267426e-07, + "logits/chosen": -0.7210872173309326, + "logits/rejected": -0.6843761205673218, + "logps/chosen": -74.97351837158203, + "logps/ref_chosen": -62.442352294921875, + "logps/ref_rejected": -80.46806335449219, + "logps/rejected": -113.79952239990234, + "loss": 0.6559, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23226860165596008, + "margin_dpo/beta_margin_grad_std": 0.22308656573295593, + "margin_dpo/beta_margin_mean": 2.080028533935547, + "margin_dpo/loss_margin_mean": 20.80028533935547, + "margin_dpo/margin_mean": 20.80028533935547, + "margin_dpo/margin_std": 20.590898513793945, + "step": 249 + }, + { + "epoch": 0.3671071953010279, + "grad_norm": 32.35546875, + "learning_rate": 4.006586590948141e-07, + "logits/chosen": -0.6919997334480286, + "logits/rejected": -0.621533215045929, + "logps/chosen": -74.43826293945312, + "logps/ref_chosen": -65.6366958618164, + "logps/ref_rejected": -73.87183380126953, + "logps/rejected": -108.39453125, + "loss": 0.4355, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15557865798473358, + "margin_dpo/beta_margin_grad_std": 0.1853388100862503, + "margin_dpo/beta_margin_mean": 2.572113037109375, + "margin_dpo/loss_margin_mean": 25.72113037109375, + "margin_dpo/margin_mean": 25.72113037109375, + "margin_dpo/margin_std": 18.065841674804688, + "step": 250 + }, + { + "epoch": 0.368575624082232, + "grad_norm": 45.22731018066406, + "learning_rate": 3.9963255888117325e-07, + "logits/chosen": -0.7006521224975586, + "logits/rejected": -0.645630955696106, + "logps/chosen": -70.0533676147461, + "logps/ref_chosen": -57.182716369628906, + "logps/ref_rejected": -77.66343688964844, + "logps/rejected": -116.09004211425781, + "loss": 0.4648, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.179362490773201, + "margin_dpo/beta_margin_grad_std": 0.19022953510284424, + "margin_dpo/beta_margin_mean": 2.5555951595306396, + "margin_dpo/loss_margin_mean": 25.555952072143555, + "margin_dpo/margin_mean": 25.555952072143555, + "margin_dpo/margin_std": 20.537967681884766, + "step": 251 + }, + { + "epoch": 0.3700440528634361, + "grad_norm": 52.23085403442383, + "learning_rate": 3.9860251571044666e-07, + "logits/chosen": -0.68607497215271, + "logits/rejected": -0.6413745880126953, + "logps/chosen": -83.42814636230469, + "logps/ref_chosen": -71.68563842773438, + "logps/ref_rejected": -84.75798797607422, + "logps/rejected": -122.11627197265625, + "loss": 0.4332, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15921342372894287, + "margin_dpo/beta_margin_grad_std": 0.18207845091819763, + "margin_dpo/beta_margin_mean": 2.5615780353546143, + "margin_dpo/loss_margin_mean": 25.615779876708984, + "margin_dpo/margin_mean": 25.615779876708984, + "margin_dpo/margin_std": 19.359203338623047, + "step": 252 + }, + { + "epoch": 0.37151248164464024, + "grad_norm": 50.17756652832031, + "learning_rate": 3.9756855672522986e-07, + "logits/chosen": -0.7000366449356079, + "logits/rejected": -0.6709662675857544, + "logps/chosen": -79.15950012207031, + "logps/ref_chosen": -69.13392639160156, + "logps/ref_rejected": -98.70252990722656, + "logps/rejected": -132.74795532226562, + "loss": 0.6265, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2043215036392212, + "margin_dpo/beta_margin_grad_std": 0.22261486947536469, + "margin_dpo/beta_margin_mean": 2.401987075805664, + "margin_dpo/loss_margin_mean": 24.01987075805664, + "margin_dpo/margin_mean": 24.01987075805664, + "margin_dpo/margin_std": 22.792251586914062, + "step": 253 + }, + { + "epoch": 0.37298091042584436, + "grad_norm": 63.319210052490234, + "learning_rate": 3.965307091713037e-07, + "logits/chosen": -0.6818605065345764, + "logits/rejected": -0.6327254772186279, + "logps/chosen": -64.66595458984375, + "logps/ref_chosen": -54.154998779296875, + "logps/ref_rejected": -90.30764770507812, + "logps/rejected": -125.43360900878906, + "loss": 0.5555, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1997654289007187, + "margin_dpo/beta_margin_grad_std": 0.21750159561634064, + "margin_dpo/beta_margin_mean": 2.461501121520996, + "margin_dpo/loss_margin_mean": 24.615013122558594, + "margin_dpo/margin_mean": 24.615013122558594, + "margin_dpo/margin_std": 22.605167388916016, + "step": 254 + }, + { + "epoch": 0.3744493392070485, + "grad_norm": 67.16899871826172, + "learning_rate": 3.954890003969163e-07, + "logits/chosen": -0.6882792115211487, + "logits/rejected": -0.6564103364944458, + "logps/chosen": -70.3147201538086, + "logps/ref_chosen": -57.14167022705078, + "logps/ref_rejected": -90.2085952758789, + "logps/rejected": -130.73675537109375, + "loss": 0.6626, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19194266200065613, + "margin_dpo/beta_margin_grad_std": 0.2271135002374649, + "margin_dpo/beta_margin_mean": 2.7355103492736816, + "margin_dpo/loss_margin_mean": 27.355106353759766, + "margin_dpo/margin_mean": 27.355106353759766, + "margin_dpo/margin_std": 28.053775787353516, + "step": 255 + }, + { + "epoch": 0.37591776798825255, + "grad_norm": 58.15704345703125, + "learning_rate": 3.944434578520628e-07, + "logits/chosen": -0.6779786348342896, + "logits/rejected": -0.6487682461738586, + "logps/chosen": -68.2900619506836, + "logps/ref_chosen": -55.163490295410156, + "logps/ref_rejected": -92.56291961669922, + "logps/rejected": -132.92449951171875, + "loss": 0.5166, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17461168766021729, + "margin_dpo/beta_margin_grad_std": 0.20105499029159546, + "margin_dpo/beta_margin_mean": 2.7235007286071777, + "margin_dpo/loss_margin_mean": 27.235008239746094, + "margin_dpo/margin_mean": 27.235008239746094, + "margin_dpo/margin_std": 25.091623306274414, + "step": 256 + }, + { + "epoch": 0.37738619676945667, + "grad_norm": 45.471885681152344, + "learning_rate": 3.933941090877615e-07, + "logits/chosen": -0.647832453250885, + "logits/rejected": -0.6230664253234863, + "logps/chosen": -61.75330352783203, + "logps/ref_chosen": -49.4236946105957, + "logps/ref_rejected": -79.53791809082031, + "logps/rejected": -121.87123107910156, + "loss": 0.5048, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18101766705513, + "margin_dpo/beta_margin_grad_std": 0.21384158730506897, + "margin_dpo/beta_margin_mean": 3.0003700256347656, + "margin_dpo/loss_margin_mean": 30.003700256347656, + "margin_dpo/margin_mean": 30.003700256347656, + "margin_dpo/margin_std": 25.782047271728516, + "step": 257 + }, + { + "epoch": 0.3788546255506608, + "grad_norm": 87.32129669189453, + "learning_rate": 3.923409817553284e-07, + "logits/chosen": -0.6796102523803711, + "logits/rejected": -0.6486295461654663, + "logps/chosen": -75.19246673583984, + "logps/ref_chosen": -59.384124755859375, + "logps/ref_rejected": -95.9901123046875, + "logps/rejected": -138.2916717529297, + "loss": 0.7333, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20006248354911804, + "margin_dpo/beta_margin_grad_std": 0.2458367645740509, + "margin_dpo/beta_margin_mean": 2.649322748184204, + "margin_dpo/loss_margin_mean": 26.493227005004883, + "margin_dpo/margin_mean": 26.493227005004883, + "margin_dpo/margin_std": 27.26435089111328, + "step": 258 + }, + { + "epoch": 0.3803230543318649, + "grad_norm": 49.1059684753418, + "learning_rate": 3.9128410360564793e-07, + "logits/chosen": -0.6160672307014465, + "logits/rejected": -0.5899853110313416, + "logps/chosen": -67.10466003417969, + "logps/ref_chosen": -52.828346252441406, + "logps/ref_rejected": -89.19165802001953, + "logps/rejected": -127.45698547363281, + "loss": 0.5267, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1898474097251892, + "margin_dpo/beta_margin_grad_std": 0.19210869073867798, + "margin_dpo/beta_margin_mean": 2.3989009857177734, + "margin_dpo/loss_margin_mean": 23.9890079498291, + "margin_dpo/margin_mean": 23.9890079498291, + "margin_dpo/margin_std": 20.4078369140625, + "step": 259 + }, + { + "epoch": 0.38179148311306904, + "grad_norm": 57.559608459472656, + "learning_rate": 3.9022350248844246e-07, + "logits/chosen": -0.6179628372192383, + "logits/rejected": -0.6178984045982361, + "logps/chosen": -62.646522521972656, + "logps/ref_chosen": -47.41767501831055, + "logps/ref_rejected": -95.08979034423828, + "logps/rejected": -137.5649871826172, + "loss": 0.4996, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18264836072921753, + "margin_dpo/beta_margin_grad_std": 0.2074463963508606, + "margin_dpo/beta_margin_mean": 2.724635124206543, + "margin_dpo/loss_margin_mean": 27.24635124206543, + "margin_dpo/margin_mean": 27.24635124206543, + "margin_dpo/margin_std": 25.009780883789062, + "step": 260 + }, + { + "epoch": 0.3832599118942731, + "grad_norm": 45.6181755065918, + "learning_rate": 3.891592063515376e-07, + "logits/chosen": -0.6720625758171082, + "logits/rejected": -0.6372575759887695, + "logps/chosen": -65.10540008544922, + "logps/ref_chosen": -53.03137969970703, + "logps/ref_rejected": -88.51494598388672, + "logps/rejected": -129.3517608642578, + "loss": 0.4674, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1723802089691162, + "margin_dpo/beta_margin_grad_std": 0.2052065134048462, + "margin_dpo/beta_margin_mean": 2.876279354095459, + "margin_dpo/loss_margin_mean": 28.762794494628906, + "margin_dpo/margin_mean": 28.762794494628906, + "margin_dpo/margin_std": 26.346603393554688, + "step": 261 + }, + { + "epoch": 0.38472834067547723, + "grad_norm": 62.1253776550293, + "learning_rate": 3.880912432401264e-07, + "logits/chosen": -0.6411904096603394, + "logits/rejected": -0.5941104888916016, + "logps/chosen": -74.12861633300781, + "logps/ref_chosen": -59.620140075683594, + "logps/ref_rejected": -86.41853332519531, + "logps/rejected": -126.77371215820312, + "loss": 0.5264, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17031686007976532, + "margin_dpo/beta_margin_grad_std": 0.22105002403259277, + "margin_dpo/beta_margin_mean": 2.5846705436706543, + "margin_dpo/loss_margin_mean": 25.84670639038086, + "margin_dpo/margin_mean": 25.84670639038086, + "margin_dpo/margin_std": 21.955059051513672, + "step": 262 + }, + { + "epoch": 0.38619676945668135, + "grad_norm": 63.75021743774414, + "learning_rate": 3.870196412960302e-07, + "logits/chosen": -0.6712623834609985, + "logits/rejected": -0.6137841939926147, + "logps/chosen": -71.11343383789062, + "logps/ref_chosen": -59.42094421386719, + "logps/ref_rejected": -96.85720825195312, + "logps/rejected": -139.07449340820312, + "loss": 0.4316, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1605646014213562, + "margin_dpo/beta_margin_grad_std": 0.19631603360176086, + "margin_dpo/beta_margin_mean": 3.0524797439575195, + "margin_dpo/loss_margin_mean": 30.524797439575195, + "margin_dpo/margin_mean": 30.524799346923828, + "margin_dpo/margin_std": 26.109458923339844, + "step": 263 + }, + { + "epoch": 0.3876651982378855, + "grad_norm": 65.67262268066406, + "learning_rate": 3.8594442875695665e-07, + "logits/chosen": -0.664789080619812, + "logits/rejected": -0.6371362805366516, + "logps/chosen": -76.22811126708984, + "logps/ref_chosen": -62.722084045410156, + "logps/ref_rejected": -93.85621643066406, + "logps/rejected": -131.24859619140625, + "loss": 0.5618, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19636279344558716, + "margin_dpo/beta_margin_grad_std": 0.201200932264328, + "margin_dpo/beta_margin_mean": 2.3886351585388184, + "margin_dpo/loss_margin_mean": 23.8863525390625, + "margin_dpo/margin_mean": 23.8863525390625, + "margin_dpo/margin_std": 21.996925354003906, + "step": 264 + }, + { + "epoch": 0.3891336270190896, + "grad_norm": 68.70135498046875, + "learning_rate": 3.848656339557562e-07, + "logits/chosen": -0.6722284555435181, + "logits/rejected": -0.6433699131011963, + "logps/chosen": -76.23115539550781, + "logps/ref_chosen": -61.971466064453125, + "logps/ref_rejected": -88.02059936523438, + "logps/rejected": -127.41754150390625, + "loss": 0.5867, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20745858550071716, + "margin_dpo/beta_margin_grad_std": 0.22185558080673218, + "margin_dpo/beta_margin_mean": 2.5137252807617188, + "margin_dpo/loss_margin_mean": 25.137252807617188, + "margin_dpo/margin_mean": 25.137252807617188, + "margin_dpo/margin_std": 25.626853942871094, + "step": 265 + }, + { + "epoch": 0.39060205580029367, + "grad_norm": 54.78199768066406, + "learning_rate": 3.8378328531967507e-07, + "logits/chosen": -0.6929798126220703, + "logits/rejected": -0.6306042075157166, + "logps/chosen": -80.66532897949219, + "logps/ref_chosen": -67.09967041015625, + "logps/ref_rejected": -67.97122192382812, + "logps/rejected": -106.28300476074219, + "loss": 0.5605, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1999325454235077, + "margin_dpo/beta_margin_grad_std": 0.21910326182842255, + "margin_dpo/beta_margin_mean": 2.474611759185791, + "margin_dpo/loss_margin_mean": 24.746116638183594, + "margin_dpo/margin_mean": 24.746116638183594, + "margin_dpo/margin_std": 22.58720588684082, + "step": 266 + }, + { + "epoch": 0.3920704845814978, + "grad_norm": 50.09029769897461, + "learning_rate": 3.8269741136960646e-07, + "logits/chosen": -0.661482036113739, + "logits/rejected": -0.6170265078544617, + "logps/chosen": -82.08916473388672, + "logps/ref_chosen": -68.97074890136719, + "logps/ref_rejected": -90.16844940185547, + "logps/rejected": -130.65118408203125, + "loss": 0.4154, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16007937490940094, + "margin_dpo/beta_margin_grad_std": 0.18461111187934875, + "margin_dpo/beta_margin_mean": 2.7364323139190674, + "margin_dpo/loss_margin_mean": 27.364322662353516, + "margin_dpo/margin_mean": 27.364322662353516, + "margin_dpo/margin_std": 22.23681640625, + "step": 267 + }, + { + "epoch": 0.3935389133627019, + "grad_norm": 61.21752166748047, + "learning_rate": 3.8160804071933894e-07, + "logits/chosen": -0.6383650898933411, + "logits/rejected": -0.6218883991241455, + "logps/chosen": -68.4329833984375, + "logps/ref_chosen": -55.900306701660156, + "logps/ref_rejected": -101.64763641357422, + "logps/rejected": -139.42156982421875, + "loss": 0.4978, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18062824010849, + "margin_dpo/beta_margin_grad_std": 0.20679926872253418, + "margin_dpo/beta_margin_mean": 2.5241270065307617, + "margin_dpo/loss_margin_mean": 25.241270065307617, + "margin_dpo/margin_mean": 25.241270065307617, + "margin_dpo/margin_std": 21.52151870727539, + "step": 268 + }, + { + "epoch": 0.39500734214390604, + "grad_norm": 63.637428283691406, + "learning_rate": 3.8051520207480204e-07, + "logits/chosen": -0.6397280097007751, + "logits/rejected": -0.5932068228721619, + "logps/chosen": -82.90742492675781, + "logps/ref_chosen": -70.03955078125, + "logps/ref_rejected": -107.34937286376953, + "logps/rejected": -153.24514770507812, + "loss": 0.4033, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14418360590934753, + "margin_dpo/beta_margin_grad_std": 0.21151122450828552, + "margin_dpo/beta_margin_mean": 3.302790641784668, + "margin_dpo/loss_margin_mean": 33.02790832519531, + "margin_dpo/margin_mean": 33.02790832519531, + "margin_dpo/margin_std": 23.7076416015625, + "step": 269 + }, + { + "epoch": 0.3964757709251101, + "grad_norm": 42.040462493896484, + "learning_rate": 3.794189242333106e-07, + "logits/chosen": -0.6672676205635071, + "logits/rejected": -0.6468701362609863, + "logps/chosen": -80.33956146240234, + "logps/ref_chosen": -69.53347778320312, + "logps/ref_rejected": -109.92864990234375, + "logps/rejected": -145.84991455078125, + "loss": 0.506, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18614254891872406, + "margin_dpo/beta_margin_grad_std": 0.20391584932804108, + "margin_dpo/beta_margin_mean": 2.5115182399749756, + "margin_dpo/loss_margin_mean": 25.11518096923828, + "margin_dpo/margin_mean": 25.11518096923828, + "margin_dpo/margin_std": 22.150074005126953, + "step": 270 + }, + { + "epoch": 0.39794419970631423, + "grad_norm": 51.15155792236328, + "learning_rate": 3.7831923608280514e-07, + "logits/chosen": -0.6099239587783813, + "logits/rejected": -0.5680840611457825, + "logps/chosen": -70.89580535888672, + "logps/ref_chosen": -56.76457214355469, + "logps/ref_rejected": -92.51383209228516, + "logps/rejected": -132.5828399658203, + "loss": 0.5353, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1870347112417221, + "margin_dpo/beta_margin_grad_std": 0.21322524547576904, + "margin_dpo/beta_margin_mean": 2.5937767028808594, + "margin_dpo/loss_margin_mean": 25.937767028808594, + "margin_dpo/margin_mean": 25.937767028808594, + "margin_dpo/margin_std": 23.366622924804688, + "step": 271 + }, + { + "epoch": 0.39941262848751835, + "grad_norm": 51.133304595947266, + "learning_rate": 3.772161666010912e-07, + "logits/chosen": -0.6056419014930725, + "logits/rejected": -0.5937180519104004, + "logps/chosen": -62.401695251464844, + "logps/ref_chosen": -49.49715805053711, + "logps/ref_rejected": -105.54279327392578, + "logps/rejected": -150.09609985351562, + "loss": 0.5269, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.168976292014122, + "margin_dpo/beta_margin_grad_std": 0.23079171776771545, + "margin_dpo/beta_margin_mean": 3.1648764610290527, + "margin_dpo/loss_margin_mean": 31.648765563964844, + "margin_dpo/margin_mean": 31.64876365661621, + "margin_dpo/margin_std": 26.891244888305664, + "step": 272 + }, + { + "epoch": 0.4008810572687225, + "grad_norm": 58.39497375488281, + "learning_rate": 3.761097448550755e-07, + "logits/chosen": -0.5882784128189087, + "logits/rejected": -0.5531589984893799, + "logps/chosen": -77.80586242675781, + "logps/ref_chosen": -62.97539520263672, + "logps/ref_rejected": -92.49858093261719, + "logps/rejected": -137.8081817626953, + "loss": 0.4583, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16255009174346924, + "margin_dpo/beta_margin_grad_std": 0.20158691704273224, + "margin_dpo/beta_margin_mean": 3.0479135513305664, + "margin_dpo/loss_margin_mean": 30.479137420654297, + "margin_dpo/margin_mean": 30.479137420654297, + "margin_dpo/margin_std": 25.084999084472656, + "step": 273 + }, + { + "epoch": 0.4023494860499266, + "grad_norm": 53.60417938232422, + "learning_rate": 3.75e-07, + "logits/chosen": -0.6290228366851807, + "logits/rejected": -0.5923614501953125, + "logps/chosen": -71.82681274414062, + "logps/ref_chosen": -55.66770935058594, + "logps/ref_rejected": -77.33308410644531, + "logps/rejected": -119.93658447265625, + "loss": 0.5139, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.179681196808815, + "margin_dpo/beta_margin_grad_std": 0.21081170439720154, + "margin_dpo/beta_margin_mean": 2.6444411277770996, + "margin_dpo/loss_margin_mean": 26.44441032409668, + "margin_dpo/margin_mean": 26.44441032409668, + "margin_dpo/margin_std": 23.134292602539062, + "step": 274 + }, + { + "epoch": 0.40381791483113066, + "grad_norm": 63.069358825683594, + "learning_rate": 3.738869612786737e-07, + "logits/chosen": -0.6466611623764038, + "logits/rejected": -0.6279960870742798, + "logps/chosen": -59.85034942626953, + "logps/ref_chosen": -48.594703674316406, + "logps/ref_rejected": -93.30369567871094, + "logps/rejected": -132.1536407470703, + "loss": 0.4693, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17479778826236725, + "margin_dpo/beta_margin_grad_std": 0.19887307286262512, + "margin_dpo/beta_margin_mean": 2.759430408477783, + "margin_dpo/loss_margin_mean": 27.594303131103516, + "margin_dpo/margin_mean": 27.594303131103516, + "margin_dpo/margin_std": 24.204368591308594, + "step": 275 + }, + { + "epoch": 0.4052863436123348, + "grad_norm": 62.213233947753906, + "learning_rate": 3.7277065802070204e-07, + "logits/chosen": -0.6649171113967896, + "logits/rejected": -0.624710202217102, + "logps/chosen": -70.10076141357422, + "logps/ref_chosen": -56.57740783691406, + "logps/ref_rejected": -70.36566925048828, + "logps/rejected": -109.4853744506836, + "loss": 0.5922, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20924188196659088, + "margin_dpo/beta_margin_grad_std": 0.2318626046180725, + "margin_dpo/beta_margin_mean": 2.5596346855163574, + "margin_dpo/loss_margin_mean": 25.59634780883789, + "margin_dpo/margin_mean": 25.596345901489258, + "margin_dpo/margin_std": 24.71031951904297, + "step": 276 + }, + { + "epoch": 0.4067547723935389, + "grad_norm": 40.95648193359375, + "learning_rate": 3.71651119641714e-07, + "logits/chosen": -0.6606760025024414, + "logits/rejected": -0.6246751546859741, + "logps/chosen": -68.44947814941406, + "logps/ref_chosen": -56.27156066894531, + "logps/ref_rejected": -92.88127136230469, + "logps/rejected": -129.38565063476562, + "loss": 0.4261, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17114606499671936, + "margin_dpo/beta_margin_grad_std": 0.16686135530471802, + "margin_dpo/beta_margin_mean": 2.432648181915283, + "margin_dpo/loss_margin_mean": 24.326480865478516, + "margin_dpo/margin_mean": 24.326480865478516, + "margin_dpo/margin_std": 18.5596866607666, + "step": 277 + }, + { + "epoch": 0.40822320117474303, + "grad_norm": 47.080055236816406, + "learning_rate": 3.705283756425872e-07, + "logits/chosen": -0.6622641682624817, + "logits/rejected": -0.6517907381057739, + "logps/chosen": -64.23191833496094, + "logps/ref_chosen": -52.94194030761719, + "logps/ref_rejected": -91.25357818603516, + "logps/rejected": -132.509765625, + "loss": 0.4854, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17969730496406555, + "margin_dpo/beta_margin_grad_std": 0.21197958290576935, + "margin_dpo/beta_margin_mean": 2.9966213703155518, + "margin_dpo/loss_margin_mean": 29.96621322631836, + "margin_dpo/margin_mean": 29.96621322631836, + "margin_dpo/margin_std": 26.836585998535156, + "step": 278 + }, + { + "epoch": 0.40969162995594716, + "grad_norm": 50.956111907958984, + "learning_rate": 3.6940245560867e-07, + "logits/chosen": -0.6270808577537537, + "logits/rejected": -0.5984815955162048, + "logps/chosen": -60.80306625366211, + "logps/ref_chosen": -48.641319274902344, + "logps/ref_rejected": -87.8514404296875, + "logps/rejected": -129.2867889404297, + "loss": 0.4865, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17512354254722595, + "margin_dpo/beta_margin_grad_std": 0.21795479953289032, + "margin_dpo/beta_margin_mean": 2.9273602962493896, + "margin_dpo/loss_margin_mean": 29.273601531982422, + "margin_dpo/margin_mean": 29.273601531982422, + "margin_dpo/margin_std": 24.12653160095215, + "step": 279 + }, + { + "epoch": 0.4111600587371512, + "grad_norm": 37.822959899902344, + "learning_rate": 3.6827338920900253e-07, + "logits/chosen": -0.6415497064590454, + "logits/rejected": -0.6238715648651123, + "logps/chosen": -72.27760314941406, + "logps/ref_chosen": -58.797122955322266, + "logps/ref_rejected": -98.61885070800781, + "logps/rejected": -141.04861450195312, + "loss": 0.3466, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1335011124610901, + "margin_dpo/beta_margin_grad_std": 0.17966148257255554, + "margin_dpo/beta_margin_mean": 2.8949294090270996, + "margin_dpo/loss_margin_mean": 28.949295043945312, + "margin_dpo/margin_mean": 28.94929313659668, + "margin_dpo/margin_std": 18.588150024414062, + "step": 280 + }, + { + "epoch": 0.41262848751835535, + "grad_norm": 71.57465362548828, + "learning_rate": 3.6714120619553435e-07, + "logits/chosen": -0.6258310675621033, + "logits/rejected": -0.5813519358634949, + "logps/chosen": -67.82090759277344, + "logps/ref_chosen": -55.488521575927734, + "logps/ref_rejected": -80.88258361816406, + "logps/rejected": -118.60010528564453, + "loss": 0.4834, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1572715938091278, + "margin_dpo/beta_margin_grad_std": 0.1871940791606903, + "margin_dpo/beta_margin_mean": 2.5385141372680664, + "margin_dpo/loss_margin_mean": 25.38513946533203, + "margin_dpo/margin_mean": 25.38513946533203, + "margin_dpo/margin_std": 20.046871185302734, + "step": 281 + }, + { + "epoch": 0.41409691629955947, + "grad_norm": 49.311336517333984, + "learning_rate": 3.660059364023408e-07, + "logits/chosen": -0.6380579471588135, + "logits/rejected": -0.5905803442001343, + "logps/chosen": -85.72900390625, + "logps/ref_chosen": -73.07014465332031, + "logps/ref_rejected": -95.35098266601562, + "logps/rejected": -131.3958740234375, + "loss": 0.4738, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18345773220062256, + "margin_dpo/beta_margin_grad_std": 0.18459081649780273, + "margin_dpo/beta_margin_mean": 2.3386025428771973, + "margin_dpo/loss_margin_mean": 23.38602638244629, + "margin_dpo/margin_mean": 23.386028289794922, + "margin_dpo/margin_std": 20.714370727539062, + "step": 282 + }, + { + "epoch": 0.4155653450807636, + "grad_norm": 48.578853607177734, + "learning_rate": 3.6486760974483685e-07, + "logits/chosen": -0.6443203091621399, + "logits/rejected": -0.6160309314727783, + "logps/chosen": -74.32984161376953, + "logps/ref_chosen": -61.89844512939453, + "logps/ref_rejected": -96.98655700683594, + "logps/rejected": -137.47125244140625, + "loss": 0.478, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16398155689239502, + "margin_dpo/beta_margin_grad_std": 0.20954221487045288, + "margin_dpo/beta_margin_mean": 2.8053293228149414, + "margin_dpo/loss_margin_mean": 28.05329132080078, + "margin_dpo/margin_mean": 28.05329132080078, + "margin_dpo/margin_std": 23.433940887451172, + "step": 283 + }, + { + "epoch": 0.4170337738619677, + "grad_norm": 43.66304016113281, + "learning_rate": 3.6372625621898863e-07, + "logits/chosen": -0.6190305948257446, + "logits/rejected": -0.6051605939865112, + "logps/chosen": -72.00505828857422, + "logps/ref_chosen": -58.4355354309082, + "logps/ref_rejected": -93.46926879882812, + "logps/rejected": -136.72036743164062, + "loss": 0.4128, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1565876454114914, + "margin_dpo/beta_margin_grad_std": 0.18501752614974976, + "margin_dpo/beta_margin_mean": 2.9681572914123535, + "margin_dpo/loss_margin_mean": 29.68157196044922, + "margin_dpo/margin_mean": 29.68157196044922, + "margin_dpo/margin_std": 25.59400749206543, + "step": 284 + }, + { + "epoch": 0.4185022026431718, + "grad_norm": 55.42721176147461, + "learning_rate": 3.625819059005228e-07, + "logits/chosen": -0.6967588663101196, + "logits/rejected": -0.670876681804657, + "logps/chosen": -81.565185546875, + "logps/ref_chosen": -66.2322006225586, + "logps/ref_rejected": -99.1268310546875, + "logps/rejected": -141.13421630859375, + "loss": 0.4222, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16293823719024658, + "margin_dpo/beta_margin_grad_std": 0.1807018220424652, + "margin_dpo/beta_margin_mean": 2.667440414428711, + "margin_dpo/loss_margin_mean": 26.67440414428711, + "margin_dpo/margin_mean": 26.674402236938477, + "margin_dpo/margin_std": 20.944385528564453, + "step": 285 + }, + { + "epoch": 0.4199706314243759, + "grad_norm": 55.817623138427734, + "learning_rate": 3.614345889441346e-07, + "logits/chosen": -0.6651911735534668, + "logits/rejected": -0.6330760717391968, + "logps/chosen": -86.78990173339844, + "logps/ref_chosen": -72.95100402832031, + "logps/ref_rejected": -88.58845520019531, + "logps/rejected": -130.15069580078125, + "loss": 0.551, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18454879522323608, + "margin_dpo/beta_margin_grad_std": 0.22670768201351166, + "margin_dpo/beta_margin_mean": 2.772334575653076, + "margin_dpo/loss_margin_mean": 27.723342895507812, + "margin_dpo/margin_mean": 27.723342895507812, + "margin_dpo/margin_std": 25.246835708618164, + "step": 286 + }, + { + "epoch": 0.42143906020558003, + "grad_norm": 53.33695602416992, + "learning_rate": 3.6028433558269275e-07, + "logits/chosen": -0.6670126914978027, + "logits/rejected": -0.6226764917373657, + "logps/chosen": -75.57640075683594, + "logps/ref_chosen": -61.54115295410156, + "logps/ref_rejected": -77.6960678100586, + "logps/rejected": -118.70558166503906, + "loss": 0.5322, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19202715158462524, + "margin_dpo/beta_margin_grad_std": 0.21082551777362823, + "margin_dpo/beta_margin_mean": 2.697427272796631, + "margin_dpo/loss_margin_mean": 26.974271774291992, + "margin_dpo/margin_mean": 26.974271774291992, + "margin_dpo/margin_std": 26.020793914794922, + "step": 287 + }, + { + "epoch": 0.42290748898678415, + "grad_norm": 58.48661804199219, + "learning_rate": 3.5913117612644327e-07, + "logits/chosen": -0.6334189176559448, + "logits/rejected": -0.6017390489578247, + "logps/chosen": -72.49565124511719, + "logps/ref_chosen": -56.661224365234375, + "logps/ref_rejected": -87.335693359375, + "logps/rejected": -130.8236541748047, + "loss": 0.4353, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16223303973674774, + "margin_dpo/beta_margin_grad_std": 0.19457679986953735, + "margin_dpo/beta_margin_mean": 2.765352249145508, + "margin_dpo/loss_margin_mean": 27.653522491455078, + "margin_dpo/margin_mean": 27.653522491455078, + "margin_dpo/margin_std": 21.126564025878906, + "step": 288 + }, + { + "epoch": 0.4243759177679883, + "grad_norm": 51.02857208251953, + "learning_rate": 3.5797514096221024e-07, + "logits/chosen": -0.6474858522415161, + "logits/rejected": -0.6363035440444946, + "logps/chosen": -61.39606857299805, + "logps/ref_chosen": -45.23039245605469, + "logps/ref_rejected": -87.64266967773438, + "logps/rejected": -134.01449584960938, + "loss": 0.5024, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18502648174762726, + "margin_dpo/beta_margin_grad_std": 0.20525604486465454, + "margin_dpo/beta_margin_mean": 3.020615339279175, + "margin_dpo/loss_margin_mean": 30.206151962280273, + "margin_dpo/margin_mean": 30.206153869628906, + "margin_dpo/margin_std": 29.076404571533203, + "step": 289 + }, + { + "epoch": 0.42584434654919234, + "grad_norm": 61.92301559448242, + "learning_rate": 3.568162605525952e-07, + "logits/chosen": -0.6007183194160461, + "logits/rejected": -0.596926748752594, + "logps/chosen": -71.86837768554688, + "logps/ref_chosen": -55.47149658203125, + "logps/ref_rejected": -116.70857238769531, + "logps/rejected": -164.587646484375, + "loss": 0.51, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17327924072742462, + "margin_dpo/beta_margin_grad_std": 0.2234293818473816, + "margin_dpo/beta_margin_mean": 3.1482198238372803, + "margin_dpo/loss_margin_mean": 31.482196807861328, + "margin_dpo/margin_mean": 31.482196807861328, + "margin_dpo/margin_std": 29.341014862060547, + "step": 290 + }, + { + "epoch": 0.42731277533039647, + "grad_norm": 56.78514099121094, + "learning_rate": 3.5565456543517485e-07, + "logits/chosen": -0.66310715675354, + "logits/rejected": -0.6308495998382568, + "logps/chosen": -75.83061981201172, + "logps/ref_chosen": -63.26036834716797, + "logps/ref_rejected": -89.29708862304688, + "logps/rejected": -129.5008087158203, + "loss": 0.4818, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16443225741386414, + "margin_dpo/beta_margin_grad_std": 0.20096154510974884, + "margin_dpo/beta_margin_mean": 2.7633466720581055, + "margin_dpo/loss_margin_mean": 27.633464813232422, + "margin_dpo/margin_mean": 27.633464813232422, + "margin_dpo/margin_std": 22.616226196289062, + "step": 291 + }, + { + "epoch": 0.4287812041116006, + "grad_norm": 53.309532165527344, + "learning_rate": 3.5449008622169583e-07, + "logits/chosen": -0.644907534122467, + "logits/rejected": -0.6032723188400269, + "logps/chosen": -70.52488708496094, + "logps/ref_chosen": -53.91852951049805, + "logps/ref_rejected": -89.96138000488281, + "logps/rejected": -136.3544921875, + "loss": 0.3955, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1559363305568695, + "margin_dpo/beta_margin_grad_std": 0.17519932985305786, + "margin_dpo/beta_margin_mean": 2.978675127029419, + "margin_dpo/loss_margin_mean": 29.78675079345703, + "margin_dpo/margin_mean": 29.78675079345703, + "margin_dpo/margin_std": 24.420841217041016, + "step": 292 + }, + { + "epoch": 0.4302496328928047, + "grad_norm": 54.48039245605469, + "learning_rate": 3.5332285359726846e-07, + "logits/chosen": -0.6540181040763855, + "logits/rejected": -0.6253507137298584, + "logps/chosen": -76.41354370117188, + "logps/ref_chosen": -60.376033782958984, + "logps/ref_rejected": -77.8524398803711, + "logps/rejected": -118.03453063964844, + "loss": 0.5943, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2110597789287567, + "margin_dpo/beta_margin_grad_std": 0.21620804071426392, + "margin_dpo/beta_margin_mean": 2.4144577980041504, + "margin_dpo/loss_margin_mean": 24.144577026367188, + "margin_dpo/margin_mean": 24.144577026367188, + "margin_dpo/margin_std": 24.406749725341797, + "step": 293 + }, + { + "epoch": 0.43171806167400884, + "grad_norm": 42.810264587402344, + "learning_rate": 3.5215289831955786e-07, + "logits/chosen": -0.6317086219787598, + "logits/rejected": -0.6187810897827148, + "logps/chosen": -62.521305084228516, + "logps/ref_chosen": -48.0875358581543, + "logps/ref_rejected": -81.89698791503906, + "logps/rejected": -123.5872573852539, + "loss": 0.5175, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1887100636959076, + "margin_dpo/beta_margin_grad_std": 0.20870058238506317, + "margin_dpo/beta_margin_mean": 2.725649833679199, + "margin_dpo/loss_margin_mean": 27.256500244140625, + "margin_dpo/margin_mean": 27.256500244140625, + "margin_dpo/margin_std": 25.993709564208984, + "step": 294 + }, + { + "epoch": 0.4331864904552129, + "grad_norm": 61.524410247802734, + "learning_rate": 3.509802512179737e-07, + "logits/chosen": -0.59177565574646, + "logits/rejected": -0.5824375748634338, + "logps/chosen": -68.55509185791016, + "logps/ref_chosen": -49.92467498779297, + "logps/ref_rejected": -87.45632934570312, + "logps/rejected": -133.15512084960938, + "loss": 0.595, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18381169438362122, + "margin_dpo/beta_margin_grad_std": 0.22098243236541748, + "margin_dpo/beta_margin_mean": 2.706838607788086, + "margin_dpo/loss_margin_mean": 27.06838607788086, + "margin_dpo/margin_mean": 27.06838607788086, + "margin_dpo/margin_std": 24.90131378173828, + "step": 295 + }, + { + "epoch": 0.434654919236417, + "grad_norm": 85.08708953857422, + "learning_rate": 3.498049431928577e-07, + "logits/chosen": -0.6830310821533203, + "logits/rejected": -0.6430518627166748, + "logps/chosen": -83.78386688232422, + "logps/ref_chosen": -65.49124145507812, + "logps/ref_rejected": -93.08908081054688, + "logps/rejected": -134.91554260253906, + "loss": 0.7283, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23585554957389832, + "margin_dpo/beta_margin_grad_std": 0.2520889639854431, + "margin_dpo/beta_margin_mean": 2.3533830642700195, + "margin_dpo/loss_margin_mean": 23.533828735351562, + "margin_dpo/margin_mean": 23.53382682800293, + "margin_dpo/margin_std": 26.529495239257812, + "step": 296 + }, + { + "epoch": 0.43612334801762115, + "grad_norm": 44.37043762207031, + "learning_rate": 3.486270052146694e-07, + "logits/chosen": -0.5455184578895569, + "logits/rejected": -0.5094451904296875, + "logps/chosen": -74.52252197265625, + "logps/ref_chosen": -56.47694778442383, + "logps/ref_rejected": -95.1385498046875, + "logps/rejected": -141.77182006835938, + "loss": 0.4232, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16087649762630463, + "margin_dpo/beta_margin_grad_std": 0.18778559565544128, + "margin_dpo/beta_margin_mean": 2.858769416809082, + "margin_dpo/loss_margin_mean": 28.587692260742188, + "margin_dpo/margin_mean": 28.587696075439453, + "margin_dpo/margin_std": 23.822004318237305, + "step": 297 + }, + { + "epoch": 0.43759177679882527, + "grad_norm": 43.25141143798828, + "learning_rate": 3.474464683231698e-07, + "logits/chosen": -0.642224907875061, + "logits/rejected": -0.6368216276168823, + "logps/chosen": -83.6908950805664, + "logps/ref_chosen": -67.32516479492188, + "logps/ref_rejected": -116.66217041015625, + "logps/rejected": -162.75466918945312, + "loss": 0.4123, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16109539568424225, + "margin_dpo/beta_margin_grad_std": 0.18005360662937164, + "margin_dpo/beta_margin_mean": 2.972676992416382, + "margin_dpo/loss_margin_mean": 29.726768493652344, + "margin_dpo/margin_mean": 29.726768493652344, + "margin_dpo/margin_std": 26.26955795288086, + "step": 298 + }, + { + "epoch": 0.4390602055800294, + "grad_norm": 59.36482238769531, + "learning_rate": 3.462633636266041e-07, + "logits/chosen": -0.5800520181655884, + "logits/rejected": -0.5592917799949646, + "logps/chosen": -64.02508544921875, + "logps/ref_chosen": -48.96209716796875, + "logps/ref_rejected": -84.32823944091797, + "logps/rejected": -130.45758056640625, + "loss": 0.5094, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1787450611591339, + "margin_dpo/beta_margin_grad_std": 0.2245379388332367, + "margin_dpo/beta_margin_mean": 3.1066365242004395, + "margin_dpo/loss_margin_mean": 31.066364288330078, + "margin_dpo/margin_mean": 31.066362380981445, + "margin_dpo/margin_std": 27.945383071899414, + "step": 299 + }, + { + "epoch": 0.44052863436123346, + "grad_norm": 81.1234359741211, + "learning_rate": 3.4507772230088147e-07, + "logits/chosen": -0.6008783578872681, + "logits/rejected": -0.5817815065383911, + "logps/chosen": -80.24883270263672, + "logps/ref_chosen": -59.073707580566406, + "logps/ref_rejected": -95.9664535522461, + "logps/rejected": -146.92205810546875, + "loss": 0.699, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20390301942825317, + "margin_dpo/beta_margin_grad_std": 0.26888635754585266, + "margin_dpo/beta_margin_mean": 2.9780476093292236, + "margin_dpo/loss_margin_mean": 29.780475616455078, + "margin_dpo/margin_mean": 29.780475616455078, + "margin_dpo/margin_std": 29.694149017333984, + "step": 300 + }, + { + "epoch": 0.44052863436123346, + "eval_logits/chosen": -0.6107151508331299, + "eval_logits/rejected": -0.5844902992248535, + "eval_logps/chosen": -99.96916198730469, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -125.81926727294922, + "eval_loss": 0.4413561224937439, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.28278234601020813, + "eval_margin_dpo/beta_margin_grad_std": 0.2516450583934784, + "eval_margin_dpo/beta_margin_mean": 1.8103224039077759, + "eval_margin_dpo/loss_margin_mean": 18.10322380065918, + "eval_margin_dpo/margin_mean": 18.10322380065918, + "eval_margin_dpo/margin_std": 23.78249168395996, + "eval_runtime": 39.9127, + "eval_samples_per_second": 58.603, + "eval_steps_per_second": 1.854, + "step": 300 + }, + { + "epoch": 0.4419970631424376, + "grad_norm": 47.328758239746094, + "learning_rate": 3.4388957558875316e-07, + "logits/chosen": -0.6326008439064026, + "logits/rejected": -0.6015191078186035, + "logps/chosen": -75.48579406738281, + "logps/ref_chosen": -57.249366760253906, + "logps/ref_rejected": -92.35354614257812, + "logps/rejected": -141.67437744140625, + "loss": 0.398, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15365365147590637, + "margin_dpo/beta_margin_grad_std": 0.18743260204792023, + "margin_dpo/beta_margin_mean": 3.1084399223327637, + "margin_dpo/loss_margin_mean": 31.084396362304688, + "margin_dpo/margin_mean": 31.084396362304688, + "margin_dpo/margin_std": 25.64261245727539, + "step": 301 + }, + { + "epoch": 0.4434654919236417, + "grad_norm": 68.61962127685547, + "learning_rate": 3.426989547989902e-07, + "logits/chosen": -0.5673216581344604, + "logits/rejected": -0.5571401119232178, + "logps/chosen": -66.45941925048828, + "logps/ref_chosen": -51.19799041748047, + "logps/ref_rejected": -97.22636413574219, + "logps/rejected": -141.73390197753906, + "loss": 0.5652, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18492794036865234, + "margin_dpo/beta_margin_grad_std": 0.20831260085105896, + "margin_dpo/beta_margin_mean": 2.9246113300323486, + "margin_dpo/loss_margin_mean": 29.246112823486328, + "margin_dpo/margin_mean": 29.246112823486328, + "margin_dpo/margin_std": 29.20469093322754, + "step": 302 + }, + { + "epoch": 0.44493392070484583, + "grad_norm": 67.56126403808594, + "learning_rate": 3.4150589130555773e-07, + "logits/chosen": -0.6228208541870117, + "logits/rejected": -0.5857237577438354, + "logps/chosen": -83.40345764160156, + "logps/ref_chosen": -66.71394348144531, + "logps/ref_rejected": -86.94542694091797, + "logps/rejected": -131.71636962890625, + "loss": 0.6418, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20844492316246033, + "margin_dpo/beta_margin_grad_std": 0.25230517983436584, + "margin_dpo/beta_margin_mean": 2.80814266204834, + "margin_dpo/loss_margin_mean": 28.081424713134766, + "margin_dpo/margin_mean": 28.081424713134766, + "margin_dpo/margin_std": 28.703716278076172, + "step": 303 + }, + { + "epoch": 0.44640234948604995, + "grad_norm": 58.02699661254883, + "learning_rate": 3.403104165467883e-07, + "logits/chosen": -0.6468909382820129, + "logits/rejected": -0.6176923513412476, + "logps/chosen": -86.12702178955078, + "logps/ref_chosen": -71.95069885253906, + "logps/ref_rejected": -90.47203063964844, + "logps/rejected": -132.961181640625, + "loss": 0.4589, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15100273489952087, + "margin_dpo/beta_margin_grad_std": 0.2101380079984665, + "margin_dpo/beta_margin_mean": 2.831282377243042, + "margin_dpo/loss_margin_mean": 28.312822341918945, + "margin_dpo/margin_mean": 28.312822341918945, + "margin_dpo/margin_std": 20.519695281982422, + "step": 304 + }, + { + "epoch": 0.447870778267254, + "grad_norm": 51.01959991455078, + "learning_rate": 3.391125620245535e-07, + "logits/chosen": -0.6300150156021118, + "logits/rejected": -0.5907981395721436, + "logps/chosen": -84.66513061523438, + "logps/ref_chosen": -66.79523468017578, + "logps/ref_rejected": -92.75459289550781, + "logps/rejected": -139.4754638671875, + "loss": 0.4261, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16771967709064484, + "margin_dpo/beta_margin_grad_std": 0.1794486939907074, + "margin_dpo/beta_margin_mean": 2.885097026824951, + "margin_dpo/loss_margin_mean": 28.850971221923828, + "margin_dpo/margin_mean": 28.850971221923828, + "margin_dpo/margin_std": 26.399646759033203, + "step": 305 + }, + { + "epoch": 0.44933920704845814, + "grad_norm": 74.01148986816406, + "learning_rate": 3.3791235930343417e-07, + "logits/chosen": -0.6759564876556396, + "logits/rejected": -0.626822829246521, + "logps/chosen": -85.26339721679688, + "logps/ref_chosen": -69.68389892578125, + "logps/ref_rejected": -85.15919494628906, + "logps/rejected": -128.5599822998047, + "loss": 0.5096, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16733743250370026, + "margin_dpo/beta_margin_grad_std": 0.21074122190475464, + "margin_dpo/beta_margin_mean": 2.78212833404541, + "margin_dpo/loss_margin_mean": 27.821285247802734, + "margin_dpo/margin_mean": 27.8212833404541, + "margin_dpo/margin_std": 23.266021728515625, + "step": 306 + }, + { + "epoch": 0.45080763582966227, + "grad_norm": 54.2598762512207, + "learning_rate": 3.367098400098881e-07, + "logits/chosen": -0.6196017265319824, + "logits/rejected": -0.5918940305709839, + "logps/chosen": -86.1854476928711, + "logps/ref_chosen": -70.16542053222656, + "logps/ref_rejected": -86.97230529785156, + "logps/rejected": -128.44007873535156, + "loss": 0.5398, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1930994689464569, + "margin_dpo/beta_margin_grad_std": 0.21751633286476135, + "margin_dpo/beta_margin_mean": 2.544773578643799, + "margin_dpo/loss_margin_mean": 25.447734832763672, + "margin_dpo/margin_mean": 25.447734832763672, + "margin_dpo/margin_std": 24.255327224731445, + "step": 307 + }, + { + "epoch": 0.4522760646108664, + "grad_norm": 42.99238967895508, + "learning_rate": 3.355050358314172e-07, + "logits/chosen": -0.6084394454956055, + "logits/rejected": -0.581619381904602, + "logps/chosen": -70.17066955566406, + "logps/ref_chosen": -55.2449951171875, + "logps/ref_rejected": -79.37226104736328, + "logps/rejected": -123.38723754882812, + "loss": 0.4913, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1686662882566452, + "margin_dpo/beta_margin_grad_std": 0.1977521926164627, + "margin_dpo/beta_margin_mean": 2.908930778503418, + "margin_dpo/loss_margin_mean": 29.089309692382812, + "margin_dpo/margin_mean": 29.089309692382812, + "margin_dpo/margin_std": 26.31514549255371, + "step": 308 + }, + { + "epoch": 0.45374449339207046, + "grad_norm": 54.909400939941406, + "learning_rate": 3.3429797851573183e-07, + "logits/chosen": -0.6183820366859436, + "logits/rejected": -0.5834609866142273, + "logps/chosen": -66.57117462158203, + "logps/ref_chosen": -48.959083557128906, + "logps/ref_rejected": -82.34072875976562, + "logps/rejected": -128.1937713623047, + "loss": 0.5046, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17868635058403015, + "margin_dpo/beta_margin_grad_std": 0.21038393676280975, + "margin_dpo/beta_margin_mean": 2.824094295501709, + "margin_dpo/loss_margin_mean": 28.240943908691406, + "margin_dpo/margin_mean": 28.240943908691406, + "margin_dpo/margin_std": 24.293102264404297, + "step": 309 + }, + { + "epoch": 0.4552129221732746, + "grad_norm": 50.08707809448242, + "learning_rate": 3.3308869986991487e-07, + "logits/chosen": -0.6874780058860779, + "logits/rejected": -0.6412575244903564, + "logps/chosen": -78.43482971191406, + "logps/ref_chosen": -62.74177932739258, + "logps/ref_rejected": -79.9302978515625, + "logps/rejected": -120.06564331054688, + "loss": 0.4422, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17856904864311218, + "margin_dpo/beta_margin_grad_std": 0.16496190428733826, + "margin_dpo/beta_margin_mean": 2.4442286491394043, + "margin_dpo/loss_margin_mean": 24.44228744506836, + "margin_dpo/margin_mean": 24.44228744506836, + "margin_dpo/margin_std": 20.531139373779297, + "step": 310 + }, + { + "epoch": 0.4566813509544787, + "grad_norm": 63.15940475463867, + "learning_rate": 3.3187723175958346e-07, + "logits/chosen": -0.5884615182876587, + "logits/rejected": -0.54796302318573, + "logps/chosen": -73.1263656616211, + "logps/ref_chosen": -53.027976989746094, + "logps/ref_rejected": -77.43820190429688, + "logps/rejected": -131.83961486816406, + "loss": 0.3482, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12926018238067627, + "margin_dpo/beta_margin_grad_std": 0.1706952154636383, + "margin_dpo/beta_margin_mean": 3.4303030967712402, + "margin_dpo/loss_margin_mean": 34.30303192138672, + "margin_dpo/margin_mean": 34.30303192138672, + "margin_dpo/margin_std": 25.374624252319336, + "step": 311 + }, + { + "epoch": 0.4581497797356828, + "grad_norm": 56.578857421875, + "learning_rate": 3.306636061080487e-07, + "logits/chosen": -0.5995860695838928, + "logits/rejected": -0.555045485496521, + "logps/chosen": -65.98387908935547, + "logps/ref_chosen": -49.39221954345703, + "logps/ref_rejected": -75.79280090332031, + "logps/rejected": -122.10321807861328, + "loss": 0.4842, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1694258749485016, + "margin_dpo/beta_margin_grad_std": 0.21930669248104095, + "margin_dpo/beta_margin_mean": 2.9718756675720215, + "margin_dpo/loss_margin_mean": 29.7187557220459, + "margin_dpo/margin_mean": 29.7187557220459, + "margin_dpo/margin_std": 26.206937789916992, + "step": 312 + }, + { + "epoch": 0.45961820851688695, + "grad_norm": 57.107025146484375, + "learning_rate": 3.2944785489547537e-07, + "logits/chosen": -0.6909008026123047, + "logits/rejected": -0.6554454565048218, + "logps/chosen": -64.720458984375, + "logps/ref_chosen": -50.152740478515625, + "logps/ref_rejected": -86.40620422363281, + "logps/rejected": -126.65206909179688, + "loss": 0.6326, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21527621150016785, + "margin_dpo/beta_margin_grad_std": 0.22800129652023315, + "margin_dpo/beta_margin_mean": 2.567814350128174, + "margin_dpo/loss_margin_mean": 25.678142547607422, + "margin_dpo/margin_mean": 25.678142547607422, + "margin_dpo/margin_std": 26.8893985748291, + "step": 313 + }, + { + "epoch": 0.461086637298091, + "grad_norm": 58.55873489379883, + "learning_rate": 3.2823001015803857e-07, + "logits/chosen": -0.6338675022125244, + "logits/rejected": -0.609628438949585, + "logps/chosen": -72.63245391845703, + "logps/ref_chosen": -57.23758316040039, + "logps/ref_rejected": -97.59652709960938, + "logps/rejected": -138.97230529785156, + "loss": 0.5754, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2068512737751007, + "margin_dpo/beta_margin_grad_std": 0.22196519374847412, + "margin_dpo/beta_margin_mean": 2.598090648651123, + "margin_dpo/loss_margin_mean": 25.98090362548828, + "margin_dpo/margin_mean": 25.980905532836914, + "margin_dpo/margin_std": 25.633577346801758, + "step": 314 + }, + { + "epoch": 0.46255506607929514, + "grad_norm": 47.718597412109375, + "learning_rate": 3.270101039870797e-07, + "logits/chosen": -0.6039018630981445, + "logits/rejected": -0.5841349959373474, + "logps/chosen": -64.1601791381836, + "logps/ref_chosen": -49.06958770751953, + "logps/ref_rejected": -85.68087768554688, + "logps/rejected": -125.34097290039062, + "loss": 0.4916, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18752586841583252, + "margin_dpo/beta_margin_grad_std": 0.18681946396827698, + "margin_dpo/beta_margin_mean": 2.456951141357422, + "margin_dpo/loss_margin_mean": 24.56951141357422, + "margin_dpo/margin_mean": 24.56951141357422, + "margin_dpo/margin_std": 22.756999969482422, + "step": 315 + }, + { + "epoch": 0.46402349486049926, + "grad_norm": 51.623634338378906, + "learning_rate": 3.2578816852826086e-07, + "logits/chosen": -0.6183241605758667, + "logits/rejected": -0.6124423146247864, + "logps/chosen": -71.89447784423828, + "logps/ref_chosen": -54.26074981689453, + "logps/ref_rejected": -101.2814712524414, + "logps/rejected": -148.76461791992188, + "loss": 0.4158, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15852956473827362, + "margin_dpo/beta_margin_grad_std": 0.18403339385986328, + "margin_dpo/beta_margin_mean": 2.984943389892578, + "margin_dpo/loss_margin_mean": 29.84943389892578, + "margin_dpo/margin_mean": 29.84943389892578, + "margin_dpo/margin_std": 26.252422332763672, + "step": 316 + }, + { + "epoch": 0.4654919236417034, + "grad_norm": 38.064273834228516, + "learning_rate": 3.2456423598071783e-07, + "logits/chosen": -0.6562374830245972, + "logits/rejected": -0.6188434958457947, + "logps/chosen": -69.55624389648438, + "logps/ref_chosen": -56.094207763671875, + "logps/ref_rejected": -100.69905090332031, + "logps/rejected": -148.02902221679688, + "loss": 0.3529, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13426414132118225, + "margin_dpo/beta_margin_grad_std": 0.1786133348941803, + "margin_dpo/beta_margin_mean": 3.386793375015259, + "margin_dpo/loss_margin_mean": 33.86793518066406, + "margin_dpo/margin_mean": 33.86793518066406, + "margin_dpo/margin_std": 24.910192489624023, + "step": 317 + }, + { + "epoch": 0.4669603524229075, + "grad_norm": 47.10121154785156, + "learning_rate": 3.233383385962115e-07, + "logits/chosen": -0.6792348623275757, + "logits/rejected": -0.6371433138847351, + "logps/chosen": -77.5732421875, + "logps/ref_chosen": -64.64570617675781, + "logps/ref_rejected": -82.76425170898438, + "logps/rejected": -126.16979217529297, + "loss": 0.4256, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15739941596984863, + "margin_dpo/beta_margin_grad_std": 0.1940799355506897, + "margin_dpo/beta_margin_mean": 3.047799587249756, + "margin_dpo/loss_margin_mean": 30.47799301147461, + "margin_dpo/margin_mean": 30.47799301147461, + "margin_dpo/margin_std": 25.105358123779297, + "step": 318 + }, + { + "epoch": 0.4684287812041116, + "grad_norm": 41.460880279541016, + "learning_rate": 3.2211050867827805e-07, + "logits/chosen": -0.6292097568511963, + "logits/rejected": -0.6156477928161621, + "logps/chosen": -62.311241149902344, + "logps/ref_chosen": -49.383758544921875, + "logps/ref_rejected": -113.90650939941406, + "logps/rejected": -156.33575439453125, + "loss": 0.363, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1437537968158722, + "margin_dpo/beta_margin_grad_std": 0.17065343260765076, + "margin_dpo/beta_margin_mean": 2.9501757621765137, + "margin_dpo/loss_margin_mean": 29.501754760742188, + "margin_dpo/margin_mean": 29.501754760742188, + "margin_dpo/margin_std": 22.302837371826172, + "step": 319 + }, + { + "epoch": 0.4698972099853157, + "grad_norm": 51.28620910644531, + "learning_rate": 3.208807785813777e-07, + "logits/chosen": -0.6504048109054565, + "logits/rejected": -0.6378560066223145, + "logps/chosen": -74.24742126464844, + "logps/ref_chosen": -59.50489044189453, + "logps/ref_rejected": -97.66716766357422, + "logps/rejected": -139.2616729736328, + "loss": 0.4858, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17591118812561035, + "margin_dpo/beta_margin_grad_std": 0.1899009644985199, + "margin_dpo/beta_margin_mean": 2.685196876525879, + "margin_dpo/loss_margin_mean": 26.851966857910156, + "margin_dpo/margin_mean": 26.85196876525879, + "margin_dpo/margin_std": 23.467105865478516, + "step": 320 + }, + { + "epoch": 0.4713656387665198, + "grad_norm": 72.78955078125, + "learning_rate": 3.1964918071004217e-07, + "logits/chosen": -0.6326063871383667, + "logits/rejected": -0.5948277711868286, + "logps/chosen": -80.62930297851562, + "logps/ref_chosen": -61.548683166503906, + "logps/ref_rejected": -91.64103698730469, + "logps/rejected": -136.67556762695312, + "loss": 0.7106, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.22593584656715393, + "margin_dpo/beta_margin_grad_std": 0.2594347894191742, + "margin_dpo/beta_margin_mean": 2.595390558242798, + "margin_dpo/loss_margin_mean": 25.95390510559082, + "margin_dpo/margin_mean": 25.953907012939453, + "margin_dpo/margin_std": 27.37790298461914, + "step": 321 + }, + { + "epoch": 0.47283406754772395, + "grad_norm": 53.10894775390625, + "learning_rate": 3.184157475180207e-07, + "logits/chosen": -0.6191203594207764, + "logits/rejected": -0.597158670425415, + "logps/chosen": -72.91082763671875, + "logps/ref_chosen": -57.29003143310547, + "logps/ref_rejected": -95.74992370605469, + "logps/rejected": -143.1072235107422, + "loss": 0.4304, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16689737141132355, + "margin_dpo/beta_margin_grad_std": 0.18274548649787903, + "margin_dpo/beta_margin_mean": 3.173649311065674, + "margin_dpo/loss_margin_mean": 31.736494064331055, + "margin_dpo/margin_mean": 31.736492156982422, + "margin_dpo/margin_std": 27.840457916259766, + "step": 322 + }, + { + "epoch": 0.47430249632892807, + "grad_norm": 46.58567428588867, + "learning_rate": 3.171805115074251e-07, + "logits/chosen": -0.6087906360626221, + "logits/rejected": -0.5820388197898865, + "logps/chosen": -66.820556640625, + "logps/ref_chosen": -51.23395919799805, + "logps/ref_rejected": -75.06192016601562, + "logps/rejected": -121.96331787109375, + "loss": 0.422, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15601900219917297, + "margin_dpo/beta_margin_grad_std": 0.19936603307724, + "margin_dpo/beta_margin_mean": 3.1314802169799805, + "margin_dpo/loss_margin_mean": 31.314802169799805, + "margin_dpo/margin_mean": 31.314802169799805, + "margin_dpo/margin_std": 25.376670837402344, + "step": 323 + }, + { + "epoch": 0.47577092511013214, + "grad_norm": 56.77102279663086, + "learning_rate": 3.1594350522787295e-07, + "logits/chosen": -0.6091630458831787, + "logits/rejected": -0.5578924417495728, + "logps/chosen": -82.68987274169922, + "logps/ref_chosen": -65.13516998291016, + "logps/ref_rejected": -86.47750091552734, + "logps/rejected": -133.79421997070312, + "loss": 0.4592, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16746217012405396, + "margin_dpo/beta_margin_grad_std": 0.20326107740402222, + "margin_dpo/beta_margin_mean": 2.9762015342712402, + "margin_dpo/loss_margin_mean": 29.762012481689453, + "margin_dpo/margin_mean": 29.762012481689453, + "margin_dpo/margin_std": 25.058231353759766, + "step": 324 + }, + { + "epoch": 0.47723935389133626, + "grad_norm": 43.94253158569336, + "learning_rate": 3.147047612756302e-07, + "logits/chosen": -0.625763475894928, + "logits/rejected": -0.5584316253662109, + "logps/chosen": -70.6198501586914, + "logps/ref_chosen": -56.215599060058594, + "logps/ref_rejected": -70.0859375, + "logps/rejected": -113.17784118652344, + "loss": 0.4258, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16012780368328094, + "margin_dpo/beta_margin_grad_std": 0.19097131490707397, + "margin_dpo/beta_margin_mean": 2.868764877319336, + "margin_dpo/loss_margin_mean": 28.68764877319336, + "margin_dpo/margin_mean": 28.68764877319336, + "margin_dpo/margin_std": 21.614681243896484, + "step": 325 + }, + { + "epoch": 0.4787077826725404, + "grad_norm": 57.078155517578125, + "learning_rate": 3.134643122927519e-07, + "logits/chosen": -0.670049250125885, + "logits/rejected": -0.6241730451583862, + "logps/chosen": -90.87605285644531, + "logps/ref_chosen": -72.72496032714844, + "logps/ref_rejected": -79.84678649902344, + "logps/rejected": -123.93955993652344, + "loss": 0.5032, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19036465883255005, + "margin_dpo/beta_margin_grad_std": 0.20020395517349243, + "margin_dpo/beta_margin_mean": 2.5941686630249023, + "margin_dpo/loss_margin_mean": 25.94168472290039, + "margin_dpo/margin_mean": 25.941686630249023, + "margin_dpo/margin_std": 24.112701416015625, + "step": 326 + }, + { + "epoch": 0.4801762114537445, + "grad_norm": 48.156856536865234, + "learning_rate": 3.1222219096622264e-07, + "logits/chosen": -0.6242316365242004, + "logits/rejected": -0.5801475048065186, + "logps/chosen": -84.52735900878906, + "logps/ref_chosen": -69.13441467285156, + "logps/ref_rejected": -111.93377685546875, + "logps/rejected": -164.81890869140625, + "loss": 0.2858, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1086474135518074, + "margin_dpo/beta_margin_grad_std": 0.16801781952381134, + "margin_dpo/beta_margin_mean": 3.749218463897705, + "margin_dpo/loss_margin_mean": 37.492183685302734, + "margin_dpo/margin_mean": 37.492183685302734, + "margin_dpo/margin_std": 24.61020278930664, + "step": 327 + }, + { + "epoch": 0.48164464023494863, + "grad_norm": 53.24053192138672, + "learning_rate": 3.1097843002709427e-07, + "logits/chosen": -0.631726861000061, + "logits/rejected": -0.6111768484115601, + "logps/chosen": -78.70730590820312, + "logps/ref_chosen": -59.68719482421875, + "logps/ref_rejected": -90.85499572753906, + "logps/rejected": -137.83163452148438, + "loss": 0.476, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17879313230514526, + "margin_dpo/beta_margin_grad_std": 0.1966237723827362, + "margin_dpo/beta_margin_mean": 2.7956528663635254, + "margin_dpo/loss_margin_mean": 27.956527709960938, + "margin_dpo/margin_mean": 27.956527709960938, + "margin_dpo/margin_std": 25.300691604614258, + "step": 328 + }, + { + "epoch": 0.4831130690161527, + "grad_norm": 60.33453369140625, + "learning_rate": 3.0973306224962437e-07, + "logits/chosen": -0.6395320892333984, + "logits/rejected": -0.5939961671829224, + "logps/chosen": -82.35140991210938, + "logps/ref_chosen": -65.2461929321289, + "logps/ref_rejected": -100.69770812988281, + "logps/rejected": -155.14166259765625, + "loss": 0.3708, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12009057402610779, + "margin_dpo/beta_margin_grad_std": 0.18349771201610565, + "margin_dpo/beta_margin_mean": 3.733874797821045, + "margin_dpo/loss_margin_mean": 37.3387451171875, + "margin_dpo/margin_mean": 37.3387451171875, + "margin_dpo/margin_std": 26.804096221923828, + "step": 329 + }, + { + "epoch": 0.4845814977973568, + "grad_norm": 48.900360107421875, + "learning_rate": 3.084861204504122e-07, + "logits/chosen": -0.5813232660293579, + "logits/rejected": -0.5687066316604614, + "logps/chosen": -64.73994445800781, + "logps/ref_chosen": -46.998348236083984, + "logps/ref_rejected": -86.87684631347656, + "logps/rejected": -136.06546020507812, + "loss": 0.4066, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14167913794517517, + "margin_dpo/beta_margin_grad_std": 0.186563640832901, + "margin_dpo/beta_margin_mean": 3.1447031497955322, + "margin_dpo/loss_margin_mean": 31.44702911376953, + "margin_dpo/margin_mean": 31.44702911376953, + "margin_dpo/margin_std": 24.540428161621094, + "step": 330 + }, + { + "epoch": 0.48604992657856094, + "grad_norm": 37.78254699707031, + "learning_rate": 3.072376374875335e-07, + "logits/chosen": -0.6245772838592529, + "logits/rejected": -0.5876287221908569, + "logps/chosen": -66.91592407226562, + "logps/ref_chosen": -50.52424621582031, + "logps/ref_rejected": -89.01544189453125, + "logps/rejected": -139.790283203125, + "loss": 0.2587, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10614001750946045, + "margin_dpo/beta_margin_grad_std": 0.14252358675003052, + "margin_dpo/beta_margin_mean": 3.438317060470581, + "margin_dpo/loss_margin_mean": 34.38317108154297, + "margin_dpo/margin_mean": 34.38317108154297, + "margin_dpo/margin_std": 23.31448745727539, + "step": 331 + }, + { + "epoch": 0.48751835535976507, + "grad_norm": 50.14997100830078, + "learning_rate": 3.059876462596758e-07, + "logits/chosen": -0.648339033126831, + "logits/rejected": -0.6188260316848755, + "logps/chosen": -67.62520599365234, + "logps/ref_chosen": -49.18028259277344, + "logps/ref_rejected": -76.48515319824219, + "logps/rejected": -120.72598266601562, + "loss": 0.5397, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1846732795238495, + "margin_dpo/beta_margin_grad_std": 0.21404841542243958, + "margin_dpo/beta_margin_mean": 2.579591751098633, + "margin_dpo/loss_margin_mean": 25.795917510986328, + "margin_dpo/margin_mean": 25.795917510986328, + "margin_dpo/margin_std": 22.294769287109375, + "step": 332 + }, + { + "epoch": 0.4889867841409692, + "grad_norm": 64.34756469726562, + "learning_rate": 3.0473617970527015e-07, + "logits/chosen": -0.5994927883148193, + "logits/rejected": -0.5866981744766235, + "logps/chosen": -83.70030212402344, + "logps/ref_chosen": -63.75574493408203, + "logps/ref_rejected": -95.04411315917969, + "logps/rejected": -147.88723754882812, + "loss": 0.5269, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1775931566953659, + "margin_dpo/beta_margin_grad_std": 0.22904759645462036, + "margin_dpo/beta_margin_mean": 3.2898573875427246, + "margin_dpo/loss_margin_mean": 32.8985710144043, + "margin_dpo/margin_mean": 32.8985710144043, + "margin_dpo/margin_std": 28.806324005126953, + "step": 333 + }, + { + "epoch": 0.49045521292217326, + "grad_norm": 47.576019287109375, + "learning_rate": 3.034832708016243e-07, + "logits/chosen": -0.5982068777084351, + "logits/rejected": -0.5783542394638062, + "logps/chosen": -87.60667419433594, + "logps/ref_chosen": -66.97975158691406, + "logps/ref_rejected": -95.31692504882812, + "logps/rejected": -147.35403442382812, + "loss": 0.3524, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12977060675621033, + "margin_dpo/beta_margin_grad_std": 0.18269598484039307, + "margin_dpo/beta_margin_mean": 3.141017198562622, + "margin_dpo/loss_margin_mean": 31.410171508789062, + "margin_dpo/margin_mean": 31.410171508789062, + "margin_dpo/margin_std": 22.10809326171875, + "step": 334 + }, + { + "epoch": 0.4919236417033774, + "grad_norm": 60.33529281616211, + "learning_rate": 3.022289525640531e-07, + "logits/chosen": -0.6425787210464478, + "logits/rejected": -0.6166863441467285, + "logps/chosen": -80.82369995117188, + "logps/ref_chosen": -62.54248046875, + "logps/ref_rejected": -87.6176986694336, + "logps/rejected": -133.60256958007812, + "loss": 0.5288, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1897895336151123, + "margin_dpo/beta_margin_grad_std": 0.2193058580160141, + "margin_dpo/beta_margin_mean": 2.770364284515381, + "margin_dpo/loss_margin_mean": 27.703643798828125, + "margin_dpo/margin_mean": 27.703643798828125, + "margin_dpo/margin_std": 25.8885555267334, + "step": 335 + }, + { + "epoch": 0.4933920704845815, + "grad_norm": 64.78392791748047, + "learning_rate": 3.009732580450086e-07, + "logits/chosen": -0.6276768445968628, + "logits/rejected": -0.6149314641952515, + "logps/chosen": -74.1087646484375, + "logps/ref_chosen": -54.531150817871094, + "logps/ref_rejected": -104.40424346923828, + "logps/rejected": -158.13856506347656, + "loss": 0.4812, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13777320086956024, + "margin_dpo/beta_margin_grad_std": 0.21476463973522186, + "margin_dpo/beta_margin_mean": 3.4156715869903564, + "margin_dpo/loss_margin_mean": 34.156715393066406, + "margin_dpo/margin_mean": 34.156715393066406, + "margin_dpo/margin_std": 28.737443923950195, + "step": 336 + }, + { + "epoch": 0.4948604992657856, + "grad_norm": 56.005924224853516, + "learning_rate": 2.9971622033320914e-07, + "logits/chosen": -0.6466571092605591, + "logits/rejected": -0.6224143505096436, + "logps/chosen": -82.91951751708984, + "logps/ref_chosen": -65.12869262695312, + "logps/ref_rejected": -101.72701263427734, + "logps/rejected": -150.4021759033203, + "loss": 0.3742, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14107711613178253, + "margin_dpo/beta_margin_grad_std": 0.1787952482700348, + "margin_dpo/beta_margin_mean": 3.0884342193603516, + "margin_dpo/loss_margin_mean": 30.88433837890625, + "margin_dpo/margin_mean": 30.884340286254883, + "margin_dpo/margin_std": 22.561031341552734, + "step": 337 + }, + { + "epoch": 0.49632892804698975, + "grad_norm": 53.44011306762695, + "learning_rate": 2.984578725527675e-07, + "logits/chosen": -0.6315950155258179, + "logits/rejected": -0.6056466102600098, + "logps/chosen": -78.76591491699219, + "logps/ref_chosen": -58.422706604003906, + "logps/ref_rejected": -89.06854248046875, + "logps/rejected": -140.06710815429688, + "loss": 0.3915, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15517953038215637, + "margin_dpo/beta_margin_grad_std": 0.1764228343963623, + "margin_dpo/beta_margin_mean": 3.065535545349121, + "margin_dpo/loss_margin_mean": 30.655353546142578, + "margin_dpo/margin_mean": 30.655353546142578, + "margin_dpo/margin_std": 24.606985092163086, + "step": 338 + }, + { + "epoch": 0.4977973568281938, + "grad_norm": 42.09189987182617, + "learning_rate": 2.9719824786231796e-07, + "logits/chosen": -0.7080618143081665, + "logits/rejected": -0.6741960048675537, + "logps/chosen": -77.78138732910156, + "logps/ref_chosen": -59.99531555175781, + "logps/ref_rejected": -103.9109115600586, + "logps/rejected": -156.21578979492188, + "loss": 0.3539, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13084164261817932, + "margin_dpo/beta_margin_grad_std": 0.18853557109832764, + "margin_dpo/beta_margin_mean": 3.4518818855285645, + "margin_dpo/loss_margin_mean": 34.51881790161133, + "margin_dpo/margin_mean": 34.51881790161133, + "margin_dpo/margin_std": 25.89126205444336, + "step": 339 + }, + { + "epoch": 0.49926578560939794, + "grad_norm": 45.83633804321289, + "learning_rate": 2.959373794541426e-07, + "logits/chosen": -0.6209253072738647, + "logits/rejected": -0.5888671875, + "logps/chosen": -73.24740600585938, + "logps/ref_chosen": -52.83022689819336, + "logps/ref_rejected": -73.10723876953125, + "logps/rejected": -127.31039428710938, + "loss": 0.3856, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14801771938800812, + "margin_dpo/beta_margin_grad_std": 0.17904043197631836, + "margin_dpo/beta_margin_mean": 3.3785972595214844, + "margin_dpo/loss_margin_mean": 33.78596878051758, + "margin_dpo/margin_mean": 33.785972595214844, + "margin_dpo/margin_std": 30.059484481811523, + "step": 340 + }, + { + "epoch": 0.5007342143906021, + "grad_norm": 47.085533142089844, + "learning_rate": 2.946753005532965e-07, + "logits/chosen": -0.6055405735969543, + "logits/rejected": -0.5906496047973633, + "logps/chosen": -70.06444549560547, + "logps/ref_chosen": -47.899803161621094, + "logps/ref_rejected": -101.80987548828125, + "logps/rejected": -161.025390625, + "loss": 0.3145, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12136228382587433, + "margin_dpo/beta_margin_grad_std": 0.17082762718200684, + "margin_dpo/beta_margin_mean": 3.7050869464874268, + "margin_dpo/loss_margin_mean": 37.050865173339844, + "margin_dpo/margin_mean": 37.050865173339844, + "margin_dpo/margin_std": 26.06426429748535, + "step": 341 + }, + { + "epoch": 0.5022026431718062, + "grad_norm": 70.15333557128906, + "learning_rate": 2.934120444167326e-07, + "logits/chosen": -0.5826171040534973, + "logits/rejected": -0.5372592210769653, + "logps/chosen": -90.79473114013672, + "logps/ref_chosen": -71.99664306640625, + "logps/ref_rejected": -92.58959197998047, + "logps/rejected": -143.8526611328125, + "loss": 0.452, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1657615751028061, + "margin_dpo/beta_margin_grad_std": 0.21393808722496033, + "margin_dpo/beta_margin_mean": 3.2464988231658936, + "margin_dpo/loss_margin_mean": 32.464988708496094, + "margin_dpo/margin_mean": 32.464988708496094, + "margin_dpo/margin_std": 28.956148147583008, + "step": 342 + }, + { + "epoch": 0.5036710719530103, + "grad_norm": 59.99635696411133, + "learning_rate": 2.9214764433242476e-07, + "logits/chosen": -0.6327919363975525, + "logits/rejected": -0.6083285808563232, + "logps/chosen": -71.64889526367188, + "logps/ref_chosen": -54.40562438964844, + "logps/ref_rejected": -111.04141998291016, + "logps/rejected": -162.94818115234375, + "loss": 0.3762, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13154786825180054, + "margin_dpo/beta_margin_grad_std": 0.1953406035900116, + "margin_dpo/beta_margin_mean": 3.46634840965271, + "margin_dpo/loss_margin_mean": 34.663482666015625, + "margin_dpo/margin_mean": 34.663482666015625, + "margin_dpo/margin_std": 24.739078521728516, + "step": 343 + }, + { + "epoch": 0.5051395007342144, + "grad_norm": 60.24159622192383, + "learning_rate": 2.9088213361849126e-07, + "logits/chosen": -0.60174560546875, + "logits/rejected": -0.5771138072013855, + "logps/chosen": -74.28924560546875, + "logps/ref_chosen": -53.96466827392578, + "logps/ref_rejected": -90.62336730957031, + "logps/rejected": -139.2759246826172, + "loss": 0.5701, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19683772325515747, + "margin_dpo/beta_margin_grad_std": 0.23028729856014252, + "margin_dpo/beta_margin_mean": 2.8327980041503906, + "margin_dpo/loss_margin_mean": 28.327980041503906, + "margin_dpo/margin_mean": 28.327980041503906, + "margin_dpo/margin_std": 28.41692543029785, + "step": 344 + }, + { + "epoch": 0.5066079295154186, + "grad_norm": 52.972599029541016, + "learning_rate": 2.896155456223163e-07, + "logits/chosen": -0.6189597845077515, + "logits/rejected": -0.5859960317611694, + "logps/chosen": -81.26602172851562, + "logps/ref_chosen": -61.685699462890625, + "logps/ref_rejected": -99.49040985107422, + "logps/rejected": -153.17733764648438, + "loss": 0.385, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12680958211421967, + "margin_dpo/beta_margin_grad_std": 0.19373470544815063, + "margin_dpo/beta_margin_mean": 3.4106602668762207, + "margin_dpo/loss_margin_mean": 34.10660171508789, + "margin_dpo/margin_mean": 34.10660171508789, + "margin_dpo/margin_std": 26.537147521972656, + "step": 345 + }, + { + "epoch": 0.5080763582966226, + "grad_norm": 65.10262298583984, + "learning_rate": 2.883479137196714e-07, + "logits/chosen": -0.6390465497970581, + "logits/rejected": -0.6188012361526489, + "logps/chosen": -77.49059295654297, + "logps/ref_chosen": -55.256263732910156, + "logps/ref_rejected": -77.41532135009766, + "logps/rejected": -130.3055877685547, + "loss": 0.4883, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16579663753509521, + "margin_dpo/beta_margin_grad_std": 0.21993526816368103, + "margin_dpo/beta_margin_mean": 3.065593719482422, + "margin_dpo/loss_margin_mean": 30.655935287475586, + "margin_dpo/margin_mean": 30.655933380126953, + "margin_dpo/margin_std": 26.671146392822266, + "step": 346 + }, + { + "epoch": 0.5095447870778267, + "grad_norm": 58.82464599609375, + "learning_rate": 2.8707927131383614e-07, + "logits/chosen": -0.6383576393127441, + "logits/rejected": -0.5973784923553467, + "logps/chosen": -80.98310852050781, + "logps/ref_chosen": -57.56624221801758, + "logps/ref_rejected": -92.35508728027344, + "logps/rejected": -146.76962280273438, + "loss": 0.507, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15645217895507812, + "margin_dpo/beta_margin_grad_std": 0.2233504056930542, + "margin_dpo/beta_margin_mean": 3.099766731262207, + "margin_dpo/loss_margin_mean": 30.99766731262207, + "margin_dpo/margin_mean": 30.997665405273438, + "margin_dpo/margin_std": 27.08733367919922, + "step": 347 + }, + { + "epoch": 0.5110132158590308, + "grad_norm": 56.679996490478516, + "learning_rate": 2.858096518347179e-07, + "logits/chosen": -0.6223098635673523, + "logits/rejected": -0.5989496111869812, + "logps/chosen": -76.78868103027344, + "logps/ref_chosen": -56.31770324707031, + "logps/ref_rejected": -89.13837432861328, + "logps/rejected": -139.9176025390625, + "loss": 0.534, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18657389283180237, + "margin_dpo/beta_margin_grad_std": 0.2263115644454956, + "margin_dpo/beta_margin_mean": 3.0308241844177246, + "margin_dpo/loss_margin_mean": 30.308242797851562, + "margin_dpo/margin_mean": 30.308242797851562, + "margin_dpo/margin_std": 26.598758697509766, + "step": 348 + }, + { + "epoch": 0.5124816446402349, + "grad_norm": 74.86937713623047, + "learning_rate": 2.845390887379706e-07, + "logits/chosen": -0.6142607927322388, + "logits/rejected": -0.6013126373291016, + "logps/chosen": -76.69570922851562, + "logps/ref_chosen": -58.0255126953125, + "logps/ref_rejected": -97.50515747070312, + "logps/rejected": -142.0050506591797, + "loss": 0.7182, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.22203311324119568, + "margin_dpo/beta_margin_grad_std": 0.25500667095184326, + "margin_dpo/beta_margin_mean": 2.5829694271087646, + "margin_dpo/loss_margin_mean": 25.829692840576172, + "margin_dpo/margin_mean": 25.829696655273438, + "margin_dpo/margin_std": 29.345046997070312, + "step": 349 + }, + { + "epoch": 0.5139500734214391, + "grad_norm": 60.53803253173828, + "learning_rate": 2.8326761550411346e-07, + "logits/chosen": -0.6528929471969604, + "logits/rejected": -0.6275583505630493, + "logps/chosen": -83.62789916992188, + "logps/ref_chosen": -64.33049011230469, + "logps/ref_rejected": -89.87164306640625, + "logps/rejected": -136.76258850097656, + "loss": 0.6313, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20285803079605103, + "margin_dpo/beta_margin_grad_std": 0.24381616711616516, + "margin_dpo/beta_margin_mean": 2.7593541145324707, + "margin_dpo/loss_margin_mean": 27.59354019165039, + "margin_dpo/margin_mean": 27.59354019165039, + "margin_dpo/margin_std": 27.823108673095703, + "step": 350 + }, + { + "epoch": 0.5154185022026432, + "grad_norm": 45.59613800048828, + "learning_rate": 2.819952656376487e-07, + "logits/chosen": -0.5691178441047668, + "logits/rejected": -0.5438896417617798, + "logps/chosen": -77.93399810791016, + "logps/ref_chosen": -60.6721305847168, + "logps/ref_rejected": -101.5654296875, + "logps/rejected": -152.97235107421875, + "loss": 0.3518, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12643350660800934, + "margin_dpo/beta_margin_grad_std": 0.19213062524795532, + "margin_dpo/beta_margin_mean": 3.414506196975708, + "margin_dpo/loss_margin_mean": 34.145057678222656, + "margin_dpo/margin_mean": 34.14506149291992, + "margin_dpo/margin_std": 24.413818359375, + "step": 351 + }, + { + "epoch": 0.5168869309838473, + "grad_norm": 69.56964111328125, + "learning_rate": 2.8072207266617854e-07, + "logits/chosen": -0.6274293661117554, + "logits/rejected": -0.5914252996444702, + "logps/chosen": -88.6449203491211, + "logps/ref_chosen": -70.9434585571289, + "logps/ref_rejected": -76.6419677734375, + "logps/rejected": -121.17511749267578, + "loss": 0.5783, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20047959685325623, + "margin_dpo/beta_margin_grad_std": 0.2212606817483902, + "margin_dpo/beta_margin_mean": 2.683168888092041, + "margin_dpo/loss_margin_mean": 26.831687927246094, + "margin_dpo/margin_mean": 26.831687927246094, + "margin_dpo/margin_std": 27.10396385192871, + "step": 352 + }, + { + "epoch": 0.5183553597650514, + "grad_norm": 70.63945007324219, + "learning_rate": 2.794480701395219e-07, + "logits/chosen": -0.6379419565200806, + "logits/rejected": -0.610392689704895, + "logps/chosen": -78.57247924804688, + "logps/ref_chosen": -58.39533996582031, + "logps/ref_rejected": -80.33552551269531, + "logps/rejected": -127.21942138671875, + "loss": 0.7041, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2198677659034729, + "margin_dpo/beta_margin_grad_std": 0.26175159215927124, + "margin_dpo/beta_margin_mean": 2.6706738471984863, + "margin_dpo/loss_margin_mean": 26.706737518310547, + "margin_dpo/margin_mean": 26.706737518310547, + "margin_dpo/margin_std": 27.502460479736328, + "step": 353 + }, + { + "epoch": 0.5198237885462555, + "grad_norm": 39.9495964050293, + "learning_rate": 2.781732916288303e-07, + "logits/chosen": -0.6119546890258789, + "logits/rejected": -0.5863783359527588, + "logps/chosen": -76.71435546875, + "logps/ref_chosen": -59.80299377441406, + "logps/ref_rejected": -88.75750732421875, + "logps/rejected": -137.52517700195312, + "loss": 0.2944, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11870501935482025, + "margin_dpo/beta_margin_grad_std": 0.15505337715148926, + "margin_dpo/beta_margin_mean": 3.1856298446655273, + "margin_dpo/loss_margin_mean": 31.85629653930664, + "margin_dpo/margin_mean": 31.856294631958008, + "margin_dpo/margin_std": 21.562454223632812, + "step": 354 + }, + { + "epoch": 0.5212922173274597, + "grad_norm": 37.86518096923828, + "learning_rate": 2.7689777072570284e-07, + "logits/chosen": -0.6655494570732117, + "logits/rejected": -0.6307432055473328, + "logps/chosen": -70.75237274169922, + "logps/ref_chosen": -54.128501892089844, + "logps/ref_rejected": -82.40606689453125, + "logps/rejected": -132.5486297607422, + "loss": 0.3504, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1401645541191101, + "margin_dpo/beta_margin_grad_std": 0.16528445482254028, + "margin_dpo/beta_margin_mean": 3.351868152618408, + "margin_dpo/loss_margin_mean": 33.518680572509766, + "margin_dpo/margin_mean": 33.518680572509766, + "margin_dpo/margin_std": 28.362560272216797, + "step": 355 + }, + { + "epoch": 0.5227606461086637, + "grad_norm": 97.62612915039062, + "learning_rate": 2.7562154104130176e-07, + "logits/chosen": -0.6015282273292542, + "logits/rejected": -0.5701065063476562, + "logps/chosen": -86.73289489746094, + "logps/ref_chosen": -64.67381286621094, + "logps/ref_rejected": -75.89926147460938, + "logps/rejected": -120.73099517822266, + "loss": 0.8011, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.26123011112213135, + "margin_dpo/beta_margin_grad_std": 0.2479068785905838, + "margin_dpo/beta_margin_mean": 2.2772653102874756, + "margin_dpo/loss_margin_mean": 22.772653579711914, + "margin_dpo/margin_mean": 22.772653579711914, + "margin_dpo/margin_std": 27.33060073852539, + "step": 356 + }, + { + "epoch": 0.5242290748898678, + "grad_norm": 48.75430679321289, + "learning_rate": 2.7434463620546594e-07, + "logits/chosen": -0.6201961040496826, + "logits/rejected": -0.5882294178009033, + "logps/chosen": -70.5618896484375, + "logps/ref_chosen": -52.725799560546875, + "logps/ref_rejected": -86.84115600585938, + "logps/rejected": -136.1475372314453, + "loss": 0.3922, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13970120251178741, + "margin_dpo/beta_margin_grad_std": 0.18796856701374054, + "margin_dpo/beta_margin_mean": 3.147029399871826, + "margin_dpo/loss_margin_mean": 31.470294952392578, + "margin_dpo/margin_mean": 31.470294952392578, + "margin_dpo/margin_std": 23.818038940429688, + "step": 357 + }, + { + "epoch": 0.5256975036710719, + "grad_norm": 65.04510498046875, + "learning_rate": 2.730670898658255e-07, + "logits/chosen": -0.6239925622940063, + "logits/rejected": -0.5800461173057556, + "logps/chosen": -79.59387969970703, + "logps/ref_chosen": -63.20543670654297, + "logps/ref_rejected": -88.373291015625, + "logps/rejected": -133.9655303955078, + "loss": 0.4776, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17933997511863708, + "margin_dpo/beta_margin_grad_std": 0.1934494972229004, + "margin_dpo/beta_margin_mean": 2.920379161834717, + "margin_dpo/loss_margin_mean": 29.20379066467285, + "margin_dpo/margin_mean": 29.20378875732422, + "margin_dpo/margin_std": 27.63866424560547, + "step": 358 + }, + { + "epoch": 0.527165932452276, + "grad_norm": 62.99494934082031, + "learning_rate": 2.717889356869146e-07, + "logits/chosen": -0.5847325325012207, + "logits/rejected": -0.5495982766151428, + "logps/chosen": -78.43362426757812, + "logps/ref_chosen": -56.370216369628906, + "logps/ref_rejected": -82.17375183105469, + "logps/rejected": -136.6361083984375, + "loss": 0.4728, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15706944465637207, + "margin_dpo/beta_margin_grad_std": 0.21352404356002808, + "margin_dpo/beta_margin_mean": 3.2398953437805176, + "margin_dpo/loss_margin_mean": 32.39895248413086, + "margin_dpo/margin_mean": 32.39895248413086, + "margin_dpo/margin_std": 27.692176818847656, + "step": 359 + }, + { + "epoch": 0.5286343612334802, + "grad_norm": 44.829593658447266, + "learning_rate": 2.7051020734928443e-07, + "logits/chosen": -0.5741163492202759, + "logits/rejected": -0.5485746264457703, + "logps/chosen": -70.59879302978516, + "logps/ref_chosen": -51.460384368896484, + "logps/ref_rejected": -69.83892822265625, + "logps/rejected": -118.67765808105469, + "loss": 0.4042, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14649343490600586, + "margin_dpo/beta_margin_grad_std": 0.1764645129442215, + "margin_dpo/beta_margin_mean": 2.970031976699829, + "margin_dpo/loss_margin_mean": 29.700321197509766, + "margin_dpo/margin_mean": 29.700321197509766, + "margin_dpo/margin_std": 23.251190185546875, + "step": 360 + }, + { + "epoch": 0.5301027900146843, + "grad_norm": 60.9596061706543, + "learning_rate": 2.6923093854861593e-07, + "logits/chosen": -0.6095191240310669, + "logits/rejected": -0.5885258316993713, + "logps/chosen": -73.89006042480469, + "logps/ref_chosen": -53.86951446533203, + "logps/ref_rejected": -90.76925659179688, + "logps/rejected": -139.11050415039062, + "loss": 0.4981, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1759810894727707, + "margin_dpo/beta_margin_grad_std": 0.20826107263565063, + "margin_dpo/beta_margin_mean": 2.832070827484131, + "margin_dpo/loss_margin_mean": 28.320707321166992, + "margin_dpo/margin_mean": 28.320707321166992, + "margin_dpo/margin_std": 25.021095275878906, + "step": 361 + }, + { + "epoch": 0.5315712187958884, + "grad_norm": 54.746620178222656, + "learning_rate": 2.679511629948319e-07, + "logits/chosen": -0.6106045246124268, + "logits/rejected": -0.5950082540512085, + "logps/chosen": -78.94198608398438, + "logps/ref_chosen": -58.639060974121094, + "logps/ref_rejected": -105.58195495605469, + "logps/rejected": -159.86471557617188, + "loss": 0.4172, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14724624156951904, + "margin_dpo/beta_margin_grad_std": 0.20302033424377441, + "margin_dpo/beta_margin_mean": 3.3979835510253906, + "margin_dpo/loss_margin_mean": 33.979835510253906, + "margin_dpo/margin_mean": 33.97983169555664, + "margin_dpo/margin_std": 27.78476333618164, + "step": 362 + }, + { + "epoch": 0.5330396475770925, + "grad_norm": 81.81700897216797, + "learning_rate": 2.6667091441120816e-07, + "logits/chosen": -0.626789927482605, + "logits/rejected": -0.5806140899658203, + "logps/chosen": -62.119293212890625, + "logps/ref_chosen": -44.558380126953125, + "logps/ref_rejected": -74.69496154785156, + "logps/rejected": -131.403076171875, + "loss": 0.4021, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1342936009168625, + "margin_dpo/beta_margin_grad_std": 0.2032202035188675, + "margin_dpo/beta_margin_mean": 3.914720058441162, + "margin_dpo/loss_margin_mean": 39.14720153808594, + "margin_dpo/margin_mean": 39.14720153808594, + "margin_dpo/margin_std": 32.773658752441406, + "step": 363 + }, + { + "epoch": 0.5345080763582967, + "grad_norm": 70.45586395263672, + "learning_rate": 2.6539022653348575e-07, + "logits/chosen": -0.6278142929077148, + "logits/rejected": -0.6232542991638184, + "logps/chosen": -67.96247863769531, + "logps/ref_chosen": -48.894622802734375, + "logps/ref_rejected": -91.395751953125, + "logps/rejected": -138.64413452148438, + "loss": 0.5219, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18396377563476562, + "margin_dpo/beta_margin_grad_std": 0.21280619502067566, + "margin_dpo/beta_margin_mean": 2.818053722381592, + "margin_dpo/loss_margin_mean": 28.180538177490234, + "margin_dpo/margin_mean": 28.180538177490234, + "margin_dpo/margin_std": 26.407032012939453, + "step": 364 + }, + { + "epoch": 0.5359765051395007, + "grad_norm": 53.16273498535156, + "learning_rate": 2.641091331089811e-07, + "logits/chosen": -0.5910431146621704, + "logits/rejected": -0.5750705003738403, + "logps/chosen": -69.91105651855469, + "logps/ref_chosen": -51.49274444580078, + "logps/ref_rejected": -92.70166778564453, + "logps/rejected": -138.62091064453125, + "loss": 0.4635, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1818796843290329, + "margin_dpo/beta_margin_grad_std": 0.18291005492210388, + "margin_dpo/beta_margin_mean": 2.750092029571533, + "margin_dpo/loss_margin_mean": 27.500919342041016, + "margin_dpo/margin_mean": 27.500919342041016, + "margin_dpo/margin_std": 25.346105575561523, + "step": 365 + }, + { + "epoch": 0.5374449339207048, + "grad_norm": 41.03267288208008, + "learning_rate": 2.6282766789569736e-07, + "logits/chosen": -0.6280097365379333, + "logits/rejected": -0.6183122992515564, + "logps/chosen": -61.73809051513672, + "logps/ref_chosen": -44.7205696105957, + "logps/ref_rejected": -83.31040954589844, + "logps/rejected": -129.70077514648438, + "loss": 0.3847, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1519765853881836, + "margin_dpo/beta_margin_grad_std": 0.17253069579601288, + "margin_dpo/beta_margin_mean": 2.937284469604492, + "margin_dpo/loss_margin_mean": 29.372844696044922, + "margin_dpo/margin_mean": 29.372844696044922, + "margin_dpo/margin_std": 23.400222778320312, + "step": 366 + }, + { + "epoch": 0.5389133627019089, + "grad_norm": 55.751182556152344, + "learning_rate": 2.615458646614349e-07, + "logits/chosen": -0.5929805040359497, + "logits/rejected": -0.5748361945152283, + "logps/chosen": -77.30204010009766, + "logps/ref_chosen": -58.405418395996094, + "logps/ref_rejected": -76.75132751464844, + "logps/rejected": -121.19309997558594, + "loss": 0.5059, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1770220696926117, + "margin_dpo/beta_margin_grad_std": 0.2071405053138733, + "margin_dpo/beta_margin_mean": 2.5545148849487305, + "margin_dpo/loss_margin_mean": 25.545148849487305, + "margin_dpo/margin_mean": 25.545148849487305, + "margin_dpo/margin_std": 22.493253707885742, + "step": 367 + }, + { + "epoch": 0.540381791483113, + "grad_norm": 41.846797943115234, + "learning_rate": 2.6026375718290083e-07, + "logits/chosen": -0.6356101036071777, + "logits/rejected": -0.6200574040412903, + "logps/chosen": -61.09843444824219, + "logps/ref_chosen": -44.452518463134766, + "logps/ref_rejected": -98.55526733398438, + "logps/rejected": -145.97296142578125, + "loss": 0.3508, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13728934526443481, + "margin_dpo/beta_margin_grad_std": 0.17335599660873413, + "margin_dpo/beta_margin_mean": 3.0771780014038086, + "margin_dpo/loss_margin_mean": 30.771780014038086, + "margin_dpo/margin_mean": 30.771780014038086, + "margin_dpo/margin_std": 23.21100616455078, + "step": 368 + }, + { + "epoch": 0.5418502202643172, + "grad_norm": 67.84696960449219, + "learning_rate": 2.589813792448196e-07, + "logits/chosen": -0.6399098634719849, + "logits/rejected": -0.60102778673172, + "logps/chosen": -89.86668395996094, + "logps/ref_chosen": -71.38150024414062, + "logps/ref_rejected": -91.29582214355469, + "logps/rejected": -134.7001190185547, + "loss": 0.553, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19392737746238708, + "margin_dpo/beta_margin_grad_std": 0.22195757925510406, + "margin_dpo/beta_margin_mean": 2.4919116497039795, + "margin_dpo/loss_margin_mean": 24.919116973876953, + "margin_dpo/margin_mean": 24.91911506652832, + "margin_dpo/margin_std": 22.250526428222656, + "step": 369 + }, + { + "epoch": 0.5433186490455213, + "grad_norm": 53.89767074584961, + "learning_rate": 2.5769876463904263e-07, + "logits/chosen": -0.62095046043396, + "logits/rejected": -0.5922361016273499, + "logps/chosen": -90.54344177246094, + "logps/ref_chosen": -71.60749816894531, + "logps/ref_rejected": -97.25978088378906, + "logps/rejected": -141.17263793945312, + "loss": 0.5169, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1934899091720581, + "margin_dpo/beta_margin_grad_std": 0.2069862186908722, + "margin_dpo/beta_margin_mean": 2.4976892471313477, + "margin_dpo/loss_margin_mean": 24.976890563964844, + "margin_dpo/margin_mean": 24.976890563964844, + "margin_dpo/margin_std": 21.94351577758789, + "step": 370 + }, + { + "epoch": 0.5447870778267254, + "grad_norm": 65.36404418945312, + "learning_rate": 2.5641594716365744e-07, + "logits/chosen": -0.65543532371521, + "logits/rejected": -0.6313973665237427, + "logps/chosen": -89.23640441894531, + "logps/ref_chosen": -69.41448974609375, + "logps/ref_rejected": -99.17217254638672, + "logps/rejected": -146.0745849609375, + "loss": 0.6168, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19891506433486938, + "margin_dpo/beta_margin_grad_std": 0.2336684763431549, + "margin_dpo/beta_margin_mean": 2.7080492973327637, + "margin_dpo/loss_margin_mean": 27.080493927001953, + "margin_dpo/margin_mean": 27.08049201965332, + "margin_dpo/margin_std": 28.510940551757812, + "step": 371 + }, + { + "epoch": 0.5462555066079295, + "grad_norm": 55.08517074584961, + "learning_rate": 2.551329606220976e-07, + "logits/chosen": -0.6221505403518677, + "logits/rejected": -0.5715365409851074, + "logps/chosen": -81.12614440917969, + "logps/ref_chosen": -61.8179931640625, + "logps/ref_rejected": -78.53949737548828, + "logps/rejected": -129.13607788085938, + "loss": 0.5133, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17172211408615112, + "margin_dpo/beta_margin_grad_std": 0.2211245894432068, + "margin_dpo/beta_margin_mean": 3.1288440227508545, + "margin_dpo/loss_margin_mean": 31.288440704345703, + "margin_dpo/margin_mean": 31.288440704345703, + "margin_dpo/margin_std": 29.940040588378906, + "step": 372 + }, + { + "epoch": 0.5477239353891337, + "grad_norm": 59.45585250854492, + "learning_rate": 2.538498388222517e-07, + "logits/chosen": -0.6288785338401794, + "logits/rejected": -0.5830151438713074, + "logps/chosen": -85.13172912597656, + "logps/ref_chosen": -64.21713256835938, + "logps/ref_rejected": -85.95960998535156, + "logps/rejected": -139.53817749023438, + "loss": 0.4155, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1484554558992386, + "margin_dpo/beta_margin_grad_std": 0.19429174065589905, + "margin_dpo/beta_margin_mean": 3.266396999359131, + "margin_dpo/loss_margin_mean": 32.663970947265625, + "margin_dpo/margin_mean": 32.663970947265625, + "margin_dpo/margin_std": 25.845104217529297, + "step": 373 + }, + { + "epoch": 0.5491923641703378, + "grad_norm": 47.363040924072266, + "learning_rate": 2.525666155755725e-07, + "logits/chosen": -0.6621605157852173, + "logits/rejected": -0.6285480856895447, + "logps/chosen": -88.64697265625, + "logps/ref_chosen": -70.65017700195312, + "logps/ref_rejected": -93.64016723632812, + "logps/rejected": -141.2034912109375, + "loss": 0.4313, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1634691059589386, + "margin_dpo/beta_margin_grad_std": 0.18810805678367615, + "margin_dpo/beta_margin_mean": 2.9566543102264404, + "margin_dpo/loss_margin_mean": 29.566543579101562, + "margin_dpo/margin_mean": 29.566543579101562, + "margin_dpo/margin_std": 25.27297019958496, + "step": 374 + }, + { + "epoch": 0.5506607929515418, + "grad_norm": 53.13246536254883, + "learning_rate": 2.512833246961859e-07, + "logits/chosen": -0.5945202112197876, + "logits/rejected": -0.580052375793457, + "logps/chosen": -79.16407775878906, + "logps/ref_chosen": -60.080223083496094, + "logps/ref_rejected": -88.93830871582031, + "logps/rejected": -137.2688446044922, + "loss": 0.5153, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18427500128746033, + "margin_dpo/beta_margin_grad_std": 0.21660040318965912, + "margin_dpo/beta_margin_mean": 2.924668550491333, + "margin_dpo/loss_margin_mean": 29.246685028076172, + "margin_dpo/margin_mean": 29.246685028076172, + "margin_dpo/margin_std": 24.21230697631836, + "step": 375 + }, + { + "epoch": 0.5521292217327459, + "grad_norm": 48.868709564208984, + "learning_rate": 2.5e-07, + "logits/chosen": -0.5992149114608765, + "logits/rejected": -0.5802311897277832, + "logps/chosen": -81.73542785644531, + "logps/ref_chosen": -62.660308837890625, + "logps/ref_rejected": -105.526611328125, + "logps/rejected": -156.72482299804688, + "loss": 0.3949, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1482008844614029, + "margin_dpo/beta_margin_grad_std": 0.17604166269302368, + "margin_dpo/beta_margin_mean": 3.212308883666992, + "margin_dpo/loss_margin_mean": 32.12308883666992, + "margin_dpo/margin_mean": 32.12308883666992, + "margin_dpo/margin_std": 27.578922271728516, + "step": 376 + }, + { + "epoch": 0.55359765051395, + "grad_norm": 62.117244720458984, + "learning_rate": 2.487166753038141e-07, + "logits/chosen": -0.5650719404220581, + "logits/rejected": -0.548367977142334, + "logps/chosen": -76.00762939453125, + "logps/ref_chosen": -54.478736877441406, + "logps/ref_rejected": -98.70335388183594, + "logps/rejected": -149.87831115722656, + "loss": 0.5358, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18531596660614014, + "margin_dpo/beta_margin_grad_std": 0.2207948863506317, + "margin_dpo/beta_margin_mean": 2.9646058082580566, + "margin_dpo/loss_margin_mean": 29.646059036254883, + "margin_dpo/margin_mean": 29.646059036254883, + "margin_dpo/margin_std": 26.692546844482422, + "step": 377 + }, + { + "epoch": 0.5550660792951542, + "grad_norm": 44.018394470214844, + "learning_rate": 2.4743338442442754e-07, + "logits/chosen": -0.6136064529418945, + "logits/rejected": -0.600831151008606, + "logps/chosen": -62.556495666503906, + "logps/ref_chosen": -45.02053451538086, + "logps/ref_rejected": -88.0469741821289, + "logps/rejected": -137.45811462402344, + "loss": 0.4079, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13975557684898376, + "margin_dpo/beta_margin_grad_std": 0.2044086456298828, + "margin_dpo/beta_margin_mean": 3.1875176429748535, + "margin_dpo/loss_margin_mean": 31.87517547607422, + "margin_dpo/margin_mean": 31.87517547607422, + "margin_dpo/margin_std": 25.208221435546875, + "step": 378 + }, + { + "epoch": 0.5565345080763583, + "grad_norm": 55.18994903564453, + "learning_rate": 2.461501611777483e-07, + "logits/chosen": -0.6599289774894714, + "logits/rejected": -0.6502236127853394, + "logps/chosen": -72.3454818725586, + "logps/ref_chosen": -53.182098388671875, + "logps/ref_rejected": -114.30015563964844, + "logps/rejected": -166.52252197265625, + "loss": 0.4312, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1558556854724884, + "margin_dpo/beta_margin_grad_std": 0.19769342243671417, + "margin_dpo/beta_margin_mean": 3.305896759033203, + "margin_dpo/loss_margin_mean": 33.05896759033203, + "margin_dpo/margin_mean": 33.05896759033203, + "margin_dpo/margin_std": 27.922954559326172, + "step": 379 + }, + { + "epoch": 0.5580029368575624, + "grad_norm": 79.15997314453125, + "learning_rate": 2.4486703937790243e-07, + "logits/chosen": -0.5800139904022217, + "logits/rejected": -0.5870028138160706, + "logps/chosen": -74.11767578125, + "logps/ref_chosen": -51.3530387878418, + "logps/ref_rejected": -104.19169616699219, + "logps/rejected": -161.47531127929688, + "loss": 0.5708, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17511457204818726, + "margin_dpo/beta_margin_grad_std": 0.2577556371688843, + "margin_dpo/beta_margin_mean": 3.451897144317627, + "margin_dpo/loss_margin_mean": 34.51897430419922, + "margin_dpo/margin_mean": 34.51897430419922, + "margin_dpo/margin_std": 30.276945114135742, + "step": 380 + }, + { + "epoch": 0.5594713656387665, + "grad_norm": 63.70035171508789, + "learning_rate": 2.435840528363426e-07, + "logits/chosen": -0.5940126180648804, + "logits/rejected": -0.5524269342422485, + "logps/chosen": -79.10946655273438, + "logps/ref_chosen": -57.80306625366211, + "logps/ref_rejected": -79.21940612792969, + "logps/rejected": -134.66360473632812, + "loss": 0.5249, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1618008017539978, + "margin_dpo/beta_margin_grad_std": 0.22977310419082642, + "margin_dpo/beta_margin_mean": 3.4137802124023438, + "margin_dpo/loss_margin_mean": 34.13780212402344, + "margin_dpo/margin_mean": 34.13780212402344, + "margin_dpo/margin_std": 30.301250457763672, + "step": 381 + }, + { + "epoch": 0.5609397944199707, + "grad_norm": 55.583091735839844, + "learning_rate": 2.4230123536095745e-07, + "logits/chosen": -0.6578388214111328, + "logits/rejected": -0.6236182451248169, + "logps/chosen": -84.37736511230469, + "logps/ref_chosen": -66.02030181884766, + "logps/ref_rejected": -110.71015930175781, + "logps/rejected": -164.35707092285156, + "loss": 0.402, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13427521288394928, + "margin_dpo/beta_margin_grad_std": 0.20077002048492432, + "margin_dpo/beta_margin_mean": 3.528985023498535, + "margin_dpo/loss_margin_mean": 35.28984832763672, + "margin_dpo/margin_mean": 35.28984832763672, + "margin_dpo/margin_std": 27.51814079284668, + "step": 382 + }, + { + "epoch": 0.5624082232011748, + "grad_norm": 53.64909744262695, + "learning_rate": 2.4101862075518037e-07, + "logits/chosen": -0.621538519859314, + "logits/rejected": -0.6095184087753296, + "logps/chosen": -71.61962890625, + "logps/ref_chosen": -50.39148712158203, + "logps/ref_rejected": -93.71589660644531, + "logps/rejected": -147.1136016845703, + "loss": 0.3707, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1415613293647766, + "margin_dpo/beta_margin_grad_std": 0.18154266476631165, + "margin_dpo/beta_margin_mean": 3.2169556617736816, + "margin_dpo/loss_margin_mean": 32.1695556640625, + "margin_dpo/margin_mean": 32.1695556640625, + "margin_dpo/margin_std": 25.299137115478516, + "step": 383 + }, + { + "epoch": 0.5638766519823789, + "grad_norm": 49.92765426635742, + "learning_rate": 2.397362428170992e-07, + "logits/chosen": -0.6100102663040161, + "logits/rejected": -0.5814231038093567, + "logps/chosen": -73.15731811523438, + "logps/ref_chosen": -52.046104431152344, + "logps/ref_rejected": -85.76089477539062, + "logps/rejected": -138.78662109375, + "loss": 0.4752, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17313042283058167, + "margin_dpo/beta_margin_grad_std": 0.20930971205234528, + "margin_dpo/beta_margin_mean": 3.191450357437134, + "margin_dpo/loss_margin_mean": 31.914501190185547, + "margin_dpo/margin_mean": 31.914505004882812, + "margin_dpo/margin_std": 31.282188415527344, + "step": 384 + }, + { + "epoch": 0.5653450807635829, + "grad_norm": 57.458656311035156, + "learning_rate": 2.3845413533856514e-07, + "logits/chosen": -0.636741578578949, + "logits/rejected": -0.5834276676177979, + "logps/chosen": -83.70286560058594, + "logps/ref_chosen": -65.55216217041016, + "logps/ref_rejected": -77.82792663574219, + "logps/rejected": -124.51187133789062, + "loss": 0.4448, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1649201214313507, + "margin_dpo/beta_margin_grad_std": 0.20112237334251404, + "margin_dpo/beta_margin_mean": 2.8533244132995605, + "margin_dpo/loss_margin_mean": 28.533245086669922, + "margin_dpo/margin_mean": 28.533245086669922, + "margin_dpo/margin_std": 21.985557556152344, + "step": 385 + }, + { + "epoch": 0.566813509544787, + "grad_norm": 65.1162109375, + "learning_rate": 2.3717233210430254e-07, + "logits/chosen": -0.5937461853027344, + "logits/rejected": -0.5639574527740479, + "logps/chosen": -79.37002563476562, + "logps/ref_chosen": -58.22185516357422, + "logps/ref_rejected": -92.32742309570312, + "logps/rejected": -147.26278686523438, + "loss": 0.3465, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12259222567081451, + "margin_dpo/beta_margin_grad_std": 0.18382999300956726, + "margin_dpo/beta_margin_mean": 3.3787193298339844, + "margin_dpo/loss_margin_mean": 33.787193298339844, + "margin_dpo/margin_mean": 33.787193298339844, + "margin_dpo/margin_std": 23.929697036743164, + "step": 386 + }, + { + "epoch": 0.5682819383259912, + "grad_norm": 72.24935150146484, + "learning_rate": 2.3589086689101889e-07, + "logits/chosen": -0.6583748459815979, + "logits/rejected": -0.6081060171127319, + "logps/chosen": -84.46687316894531, + "logps/ref_chosen": -66.41944885253906, + "logps/ref_rejected": -92.16915893554688, + "logps/rejected": -139.08279418945312, + "loss": 0.4609, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16438427567481995, + "margin_dpo/beta_margin_grad_std": 0.20714232325553894, + "margin_dpo/beta_margin_mean": 2.886620044708252, + "margin_dpo/loss_margin_mean": 28.866199493408203, + "margin_dpo/margin_mean": 28.86620330810547, + "margin_dpo/margin_std": 22.799579620361328, + "step": 387 + }, + { + "epoch": 0.5697503671071953, + "grad_norm": 49.13148880004883, + "learning_rate": 2.3460977346651428e-07, + "logits/chosen": -0.6356014013290405, + "logits/rejected": -0.6344074010848999, + "logps/chosen": -70.69566345214844, + "logps/ref_chosen": -50.129459381103516, + "logps/ref_rejected": -104.43305969238281, + "logps/rejected": -160.75782775878906, + "loss": 0.3715, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13866804540157318, + "margin_dpo/beta_margin_grad_std": 0.18035584688186646, + "margin_dpo/beta_margin_mean": 3.5758562088012695, + "margin_dpo/loss_margin_mean": 35.75856018066406, + "margin_dpo/margin_mean": 35.75856018066406, + "margin_dpo/margin_std": 29.00539779663086, + "step": 388 + }, + { + "epoch": 0.5712187958883994, + "grad_norm": 39.877593994140625, + "learning_rate": 2.3332908558879177e-07, + "logits/chosen": -0.670194149017334, + "logits/rejected": -0.6247744560241699, + "logps/chosen": -76.81654357910156, + "logps/ref_chosen": -57.906593322753906, + "logps/ref_rejected": -77.91454315185547, + "logps/rejected": -130.85861206054688, + "loss": 0.3492, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13106092810630798, + "margin_dpo/beta_margin_grad_std": 0.18352819979190826, + "margin_dpo/beta_margin_mean": 3.403412103652954, + "margin_dpo/loss_margin_mean": 34.03411865234375, + "margin_dpo/margin_mean": 34.03411865234375, + "margin_dpo/margin_std": 25.96773338317871, + "step": 389 + }, + { + "epoch": 0.5726872246696035, + "grad_norm": 65.6811752319336, + "learning_rate": 2.320488370051681e-07, + "logits/chosen": -0.5949693322181702, + "logits/rejected": -0.5706865787506104, + "logps/chosen": -70.46009826660156, + "logps/ref_chosen": -49.22591781616211, + "logps/ref_rejected": -85.5281982421875, + "logps/rejected": -137.94676208496094, + "loss": 0.5262, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16779480874538422, + "margin_dpo/beta_margin_grad_std": 0.23410001397132874, + "margin_dpo/beta_margin_mean": 3.118438243865967, + "margin_dpo/loss_margin_mean": 31.18438148498535, + "margin_dpo/margin_mean": 31.18438148498535, + "margin_dpo/margin_std": 27.223957061767578, + "step": 390 + }, + { + "epoch": 0.5741556534508077, + "grad_norm": 59.37446594238281, + "learning_rate": 2.3076906145138405e-07, + "logits/chosen": -0.6412761211395264, + "logits/rejected": -0.6216508150100708, + "logps/chosen": -86.92506408691406, + "logps/ref_chosen": -64.32965087890625, + "logps/ref_rejected": -86.73820495605469, + "logps/rejected": -137.03587341308594, + "loss": 0.5183, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18732406198978424, + "margin_dpo/beta_margin_grad_std": 0.2125789225101471, + "margin_dpo/beta_margin_mean": 2.7702255249023438, + "margin_dpo/loss_margin_mean": 27.702255249023438, + "margin_dpo/margin_mean": 27.702255249023438, + "margin_dpo/margin_std": 26.598276138305664, + "step": 391 + }, + { + "epoch": 0.5756240822320118, + "grad_norm": 42.18976974487305, + "learning_rate": 2.294897926507156e-07, + "logits/chosen": -0.6050982475280762, + "logits/rejected": -0.5827013850212097, + "logps/chosen": -71.68020629882812, + "logps/ref_chosen": -53.50397872924805, + "logps/ref_rejected": -102.34583282470703, + "logps/rejected": -154.8188934326172, + "loss": 0.2982, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11888954043388367, + "margin_dpo/beta_margin_grad_std": 0.1541348695755005, + "margin_dpo/beta_margin_mean": 3.429682493209839, + "margin_dpo/loss_margin_mean": 34.29682540893555, + "margin_dpo/margin_mean": 34.29682540893555, + "margin_dpo/margin_std": 25.064613342285156, + "step": 392 + }, + { + "epoch": 0.5770925110132159, + "grad_norm": 57.39728927612305, + "learning_rate": 2.2821106431308543e-07, + "logits/chosen": -0.572903037071228, + "logits/rejected": -0.543228268623352, + "logps/chosen": -65.63607788085938, + "logps/ref_chosen": -46.473915100097656, + "logps/ref_rejected": -71.96885681152344, + "logps/rejected": -118.71408081054688, + "loss": 0.5349, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19703662395477295, + "margin_dpo/beta_margin_grad_std": 0.2085367888212204, + "margin_dpo/beta_margin_mean": 2.7583065032958984, + "margin_dpo/loss_margin_mean": 27.583065032958984, + "margin_dpo/margin_mean": 27.583065032958984, + "margin_dpo/margin_std": 27.609420776367188, + "step": 393 + }, + { + "epoch": 0.57856093979442, + "grad_norm": 60.01694869995117, + "learning_rate": 2.2693291013417452e-07, + "logits/chosen": -0.6191369295120239, + "logits/rejected": -0.5974385738372803, + "logps/chosen": -71.70793151855469, + "logps/ref_chosen": -52.91154479980469, + "logps/ref_rejected": -90.82263946533203, + "logps/rejected": -140.04214477539062, + "loss": 0.577, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18772023916244507, + "margin_dpo/beta_margin_grad_std": 0.22964736819267273, + "margin_dpo/beta_margin_mean": 3.0423121452331543, + "margin_dpo/loss_margin_mean": 30.42312240600586, + "margin_dpo/margin_mean": 30.42312240600586, + "margin_dpo/margin_std": 31.186908721923828, + "step": 394 + }, + { + "epoch": 0.580029368575624, + "grad_norm": 45.10045623779297, + "learning_rate": 2.2565536379453404e-07, + "logits/chosen": -0.6933879852294922, + "logits/rejected": -0.6763237714767456, + "logps/chosen": -79.92889404296875, + "logps/ref_chosen": -62.546112060546875, + "logps/ref_rejected": -83.78262329101562, + "logps/rejected": -133.17141723632812, + "loss": 0.4329, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16615131497383118, + "margin_dpo/beta_margin_grad_std": 0.19662132859230042, + "margin_dpo/beta_margin_mean": 3.2006003856658936, + "margin_dpo/loss_margin_mean": 32.006004333496094, + "margin_dpo/margin_mean": 32.006004333496094, + "margin_dpo/margin_std": 26.82199478149414, + "step": 395 + }, + { + "epoch": 0.5814977973568282, + "grad_norm": 49.713191986083984, + "learning_rate": 2.2437845895869825e-07, + "logits/chosen": -0.6597648859024048, + "logits/rejected": -0.6144574284553528, + "logps/chosen": -88.35139465332031, + "logps/ref_chosen": -68.99594116210938, + "logps/ref_rejected": -88.64665985107422, + "logps/rejected": -139.371337890625, + "loss": 0.4051, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14218196272850037, + "margin_dpo/beta_margin_grad_std": 0.207365944981575, + "margin_dpo/beta_margin_mean": 3.1369237899780273, + "margin_dpo/loss_margin_mean": 31.369239807128906, + "margin_dpo/margin_mean": 31.369239807128906, + "margin_dpo/margin_std": 23.291423797607422, + "step": 396 + }, + { + "epoch": 0.5829662261380323, + "grad_norm": 45.640316009521484, + "learning_rate": 2.2310222927429716e-07, + "logits/chosen": -0.6247228384017944, + "logits/rejected": -0.5786880254745483, + "logps/chosen": -78.00132751464844, + "logps/ref_chosen": -61.27716827392578, + "logps/ref_rejected": -103.11612701416016, + "logps/rejected": -155.62109375, + "loss": 0.3668, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1343180537223816, + "margin_dpo/beta_margin_grad_std": 0.19318078458309174, + "margin_dpo/beta_margin_mean": 3.5780787467956543, + "margin_dpo/loss_margin_mean": 35.78078842163086, + "margin_dpo/margin_mean": 35.780784606933594, + "margin_dpo/margin_std": 27.38970184326172, + "step": 397 + }, + { + "epoch": 0.5844346549192364, + "grad_norm": 50.45404815673828, + "learning_rate": 2.2182670837116972e-07, + "logits/chosen": -0.6684058904647827, + "logits/rejected": -0.6429616212844849, + "logps/chosen": -87.1710205078125, + "logps/ref_chosen": -68.15155029296875, + "logps/ref_rejected": -108.52360534667969, + "logps/rejected": -158.55606079101562, + "loss": 0.3542, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13709759712219238, + "margin_dpo/beta_margin_grad_std": 0.17260834574699402, + "margin_dpo/beta_margin_mean": 3.1012983322143555, + "margin_dpo/loss_margin_mean": 31.012981414794922, + "margin_dpo/margin_mean": 31.012981414794922, + "margin_dpo/margin_std": 25.8978328704834, + "step": 398 + }, + { + "epoch": 0.5859030837004405, + "grad_norm": 56.667911529541016, + "learning_rate": 2.2055192986047804e-07, + "logits/chosen": -0.655005931854248, + "logits/rejected": -0.5906921625137329, + "logps/chosen": -77.88751220703125, + "logps/ref_chosen": -60.889801025390625, + "logps/ref_rejected": -77.96558380126953, + "logps/rejected": -129.66696166992188, + "loss": 0.4236, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14022482931613922, + "margin_dpo/beta_margin_grad_std": 0.2172231674194336, + "margin_dpo/beta_margin_mean": 3.470367431640625, + "margin_dpo/loss_margin_mean": 34.70367431640625, + "margin_dpo/margin_mean": 34.70367431640625, + "margin_dpo/margin_std": 25.53824234008789, + "step": 399 + }, + { + "epoch": 0.5873715124816447, + "grad_norm": 56.22902297973633, + "learning_rate": 2.192779273338215e-07, + "logits/chosen": -0.6668632626533508, + "logits/rejected": -0.6306219100952148, + "logps/chosen": -81.36758422851562, + "logps/ref_chosen": -63.64359664916992, + "logps/ref_rejected": -105.252685546875, + "logps/rejected": -161.2772674560547, + "loss": 0.4468, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1347055435180664, + "margin_dpo/beta_margin_grad_std": 0.20773792266845703, + "margin_dpo/beta_margin_mean": 3.830059289932251, + "margin_dpo/loss_margin_mean": 38.300594329833984, + "margin_dpo/margin_mean": 38.30059051513672, + "margin_dpo/margin_std": 30.820228576660156, + "step": 400 + }, + { + "epoch": 0.5873715124816447, + "eval_logits/chosen": -0.5647093653678894, + "eval_logits/rejected": -0.5334640741348267, + "eval_logps/chosen": -102.04676818847656, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -130.0719757080078, + "eval_loss": 0.42134976387023926, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.2686771750450134, + "eval_margin_dpo/beta_margin_grad_std": 0.2539796233177185, + "eval_margin_dpo/beta_margin_mean": 2.0278308391571045, + "eval_margin_dpo/loss_margin_mean": 20.27830696105957, + "eval_margin_dpo/margin_mean": 20.27830696105957, + "eval_margin_dpo/margin_std": 25.458209991455078, + "eval_runtime": 39.9217, + "eval_samples_per_second": 58.59, + "eval_steps_per_second": 1.854, + "step": 400 + }, + { + "epoch": 0.5888399412628488, + "grad_norm": 63.177490234375, + "learning_rate": 2.1800473436235136e-07, + "logits/chosen": -0.5337532758712769, + "logits/rejected": -0.5123304724693298, + "logps/chosen": -76.32991027832031, + "logps/ref_chosen": -57.16303253173828, + "logps/ref_rejected": -83.79249572753906, + "logps/rejected": -132.53762817382812, + "loss": 0.6636, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2089146077632904, + "margin_dpo/beta_margin_grad_std": 0.2529540956020355, + "margin_dpo/beta_margin_mean": 2.95782470703125, + "margin_dpo/loss_margin_mean": 29.5782470703125, + "margin_dpo/margin_mean": 29.5782470703125, + "margin_dpo/margin_std": 31.70156478881836, + "step": 401 + }, + { + "epoch": 0.5903083700440529, + "grad_norm": 25.056612014770508, + "learning_rate": 2.1673238449588665e-07, + "logits/chosen": -0.6563818454742432, + "logits/rejected": -0.6099350452423096, + "logps/chosen": -62.76488494873047, + "logps/ref_chosen": -50.74037170410156, + "logps/ref_rejected": -81.0460433959961, + "logps/rejected": -132.43775939941406, + "loss": 0.211, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.08416810631752014, + "margin_dpo/beta_margin_grad_std": 0.13652239739894867, + "margin_dpo/beta_margin_mean": 3.936720371246338, + "margin_dpo/loss_margin_mean": 39.36720275878906, + "margin_dpo/margin_mean": 39.36720275878906, + "margin_dpo/margin_std": 24.1574649810791, + "step": 402 + }, + { + "epoch": 0.591776798825257, + "grad_norm": 62.19951629638672, + "learning_rate": 2.154609112620295e-07, + "logits/chosen": -0.6823678016662598, + "logits/rejected": -0.6667909622192383, + "logps/chosen": -62.73232650756836, + "logps/ref_chosen": -47.14731216430664, + "logps/ref_rejected": -77.2666015625, + "logps/rejected": -123.16378021240234, + "loss": 0.5812, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1768760085105896, + "margin_dpo/beta_margin_grad_std": 0.22503289580345154, + "margin_dpo/beta_margin_mean": 3.0312163829803467, + "margin_dpo/loss_margin_mean": 30.312164306640625, + "margin_dpo/margin_mean": 30.312164306640625, + "margin_dpo/margin_std": 28.440311431884766, + "step": 403 + }, + { + "epoch": 0.593245227606461, + "grad_norm": 55.05302810668945, + "learning_rate": 2.1419034816528218e-07, + "logits/chosen": -0.6315656304359436, + "logits/rejected": -0.5993084907531738, + "logps/chosen": -63.717140197753906, + "logps/ref_chosen": -47.875274658203125, + "logps/ref_rejected": -77.15499877929688, + "logps/rejected": -123.78963470458984, + "loss": 0.5707, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18741407990455627, + "margin_dpo/beta_margin_grad_std": 0.23549535870552063, + "margin_dpo/beta_margin_mean": 3.0792763233184814, + "margin_dpo/loss_margin_mean": 30.792762756347656, + "margin_dpo/margin_mean": 30.792762756347656, + "margin_dpo/margin_std": 29.048046112060547, + "step": 404 + }, + { + "epoch": 0.5947136563876652, + "grad_norm": 68.80015563964844, + "learning_rate": 2.129207286861638e-07, + "logits/chosen": -0.5908911824226379, + "logits/rejected": -0.5618330240249634, + "logps/chosen": -84.89933776855469, + "logps/ref_chosen": -65.16290283203125, + "logps/ref_rejected": -87.18678283691406, + "logps/rejected": -137.16378784179688, + "loss": 0.5402, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18618471920490265, + "margin_dpo/beta_margin_grad_std": 0.2252415120601654, + "margin_dpo/beta_margin_mean": 3.024057149887085, + "margin_dpo/loss_margin_mean": 30.240570068359375, + "margin_dpo/margin_mean": 30.240571975708008, + "margin_dpo/margin_std": 27.378923416137695, + "step": 405 + }, + { + "epoch": 0.5961820851688693, + "grad_norm": 62.42378616333008, + "learning_rate": 2.1165208628032861e-07, + "logits/chosen": -0.633690357208252, + "logits/rejected": -0.6194950342178345, + "logps/chosen": -66.8834457397461, + "logps/ref_chosen": -49.740814208984375, + "logps/ref_rejected": -92.07862854003906, + "logps/rejected": -141.23345947265625, + "loss": 0.5398, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16698844730854034, + "margin_dpo/beta_margin_grad_std": 0.21780461072921753, + "margin_dpo/beta_margin_mean": 3.2012197971343994, + "margin_dpo/loss_margin_mean": 32.0121955871582, + "margin_dpo/margin_mean": 32.0121955871582, + "margin_dpo/margin_std": 28.075244903564453, + "step": 406 + }, + { + "epoch": 0.5976505139500734, + "grad_norm": 77.41171264648438, + "learning_rate": 2.1038445437768375e-07, + "logits/chosen": -0.6692545413970947, + "logits/rejected": -0.6265490055084229, + "logps/chosen": -72.6529541015625, + "logps/ref_chosen": -56.33069610595703, + "logps/ref_rejected": -77.5120849609375, + "logps/rejected": -126.45954895019531, + "loss": 0.6116, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1815359890460968, + "margin_dpo/beta_margin_grad_std": 0.24493393301963806, + "margin_dpo/beta_margin_mean": 3.2625207901000977, + "margin_dpo/loss_margin_mean": 32.625205993652344, + "margin_dpo/margin_mean": 32.625205993652344, + "margin_dpo/margin_std": 29.34493637084961, + "step": 407 + }, + { + "epoch": 0.5991189427312775, + "grad_norm": 63.60155487060547, + "learning_rate": 2.0911786638150872e-07, + "logits/chosen": -0.6971176862716675, + "logits/rejected": -0.6451106071472168, + "logps/chosen": -85.57447814941406, + "logps/ref_chosen": -69.789306640625, + "logps/ref_rejected": -90.09693908691406, + "logps/rejected": -133.64590454101562, + "loss": 0.6213, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20202887058258057, + "margin_dpo/beta_margin_grad_std": 0.24348929524421692, + "margin_dpo/beta_margin_mean": 2.7763803005218506, + "margin_dpo/loss_margin_mean": 27.76380157470703, + "margin_dpo/margin_mean": 27.76380157470703, + "margin_dpo/margin_std": 27.04732894897461, + "step": 408 + }, + { + "epoch": 0.6005873715124816, + "grad_norm": 49.4036750793457, + "learning_rate": 2.0785235566757517e-07, + "logits/chosen": -0.6140397787094116, + "logits/rejected": -0.582785964012146, + "logps/chosen": -84.53123474121094, + "logps/ref_chosen": -67.31744384765625, + "logps/ref_rejected": -84.904296875, + "logps/rejected": -133.21397399902344, + "loss": 0.4071, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15303273499011993, + "margin_dpo/beta_margin_grad_std": 0.18726620078086853, + "margin_dpo/beta_margin_mean": 3.109589099884033, + "margin_dpo/loss_margin_mean": 31.095890045166016, + "margin_dpo/margin_mean": 31.095890045166016, + "margin_dpo/margin_std": 25.748220443725586, + "step": 409 + }, + { + "epoch": 0.6020558002936858, + "grad_norm": 67.83360290527344, + "learning_rate": 2.065879555832674e-07, + "logits/chosen": -0.5768519043922424, + "logits/rejected": -0.5736193656921387, + "logps/chosen": -70.64789581298828, + "logps/ref_chosen": -51.465354919433594, + "logps/ref_rejected": -83.198974609375, + "logps/rejected": -130.64712524414062, + "loss": 0.585, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19843655824661255, + "margin_dpo/beta_margin_grad_std": 0.23984801769256592, + "margin_dpo/beta_margin_mean": 2.8265600204467773, + "margin_dpo/loss_margin_mean": 28.265602111816406, + "margin_dpo/margin_mean": 28.265600204467773, + "margin_dpo/margin_std": 26.36197280883789, + "step": 410 + }, + { + "epoch": 0.6035242290748899, + "grad_norm": 57.13195037841797, + "learning_rate": 2.0532469944670343e-07, + "logits/chosen": -0.6790816783905029, + "logits/rejected": -0.6465529203414917, + "logps/chosen": -71.7957992553711, + "logps/ref_chosen": -52.30727005004883, + "logps/ref_rejected": -80.69495391845703, + "logps/rejected": -130.16925048828125, + "loss": 0.5328, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18435493111610413, + "margin_dpo/beta_margin_grad_std": 0.22915974259376526, + "margin_dpo/beta_margin_mean": 2.9985756874084473, + "margin_dpo/loss_margin_mean": 29.985755920410156, + "margin_dpo/margin_mean": 29.985755920410156, + "margin_dpo/margin_std": 27.710227966308594, + "step": 411 + }, + { + "epoch": 0.604992657856094, + "grad_norm": 41.65260314941406, + "learning_rate": 2.0406262054585738e-07, + "logits/chosen": -0.7030426859855652, + "logits/rejected": -0.6921846866607666, + "logps/chosen": -69.07894897460938, + "logps/ref_chosen": -53.144126892089844, + "logps/ref_rejected": -100.06080627441406, + "logps/rejected": -145.71115112304688, + "loss": 0.5004, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1866704523563385, + "margin_dpo/beta_margin_grad_std": 0.2087305784225464, + "margin_dpo/beta_margin_mean": 2.9715518951416016, + "margin_dpo/loss_margin_mean": 29.715518951416016, + "margin_dpo/margin_mean": 29.715518951416016, + "margin_dpo/margin_std": 27.474346160888672, + "step": 412 + }, + { + "epoch": 0.6064610866372981, + "grad_norm": 57.669593811035156, + "learning_rate": 2.0280175213768205e-07, + "logits/chosen": -0.5741822719573975, + "logits/rejected": -0.5401548147201538, + "logps/chosen": -80.91738891601562, + "logps/ref_chosen": -61.58196258544922, + "logps/ref_rejected": -99.47340393066406, + "logps/rejected": -148.91249084472656, + "loss": 0.4797, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1573394238948822, + "margin_dpo/beta_margin_grad_std": 0.1960790902376175, + "margin_dpo/beta_margin_mean": 3.0103673934936523, + "margin_dpo/loss_margin_mean": 30.10367202758789, + "margin_dpo/margin_mean": 30.10367202758789, + "margin_dpo/margin_std": 25.22928237915039, + "step": 413 + }, + { + "epoch": 0.6079295154185022, + "grad_norm": 49.89177322387695, + "learning_rate": 2.0154212744723247e-07, + "logits/chosen": -0.6295123100280762, + "logits/rejected": -0.5924926996231079, + "logps/chosen": -62.89250183105469, + "logps/ref_chosen": -46.63148880004883, + "logps/ref_rejected": -87.64652252197266, + "logps/rejected": -139.89688110351562, + "loss": 0.3669, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13251307606697083, + "margin_dpo/beta_margin_grad_std": 0.18466657400131226, + "margin_dpo/beta_margin_mean": 3.598933219909668, + "margin_dpo/loss_margin_mean": 35.98933410644531, + "margin_dpo/margin_mean": 35.98933410644531, + "margin_dpo/margin_std": 25.766937255859375, + "step": 414 + }, + { + "epoch": 0.6093979441997063, + "grad_norm": 44.46585464477539, + "learning_rate": 2.002837796667909e-07, + "logits/chosen": -0.6287680268287659, + "logits/rejected": -0.6043534278869629, + "logps/chosen": -95.65867614746094, + "logps/ref_chosen": -78.6182861328125, + "logps/ref_rejected": -100.47752380371094, + "logps/rejected": -147.51513671875, + "loss": 0.3992, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15828433632850647, + "margin_dpo/beta_margin_grad_std": 0.17664587497711182, + "margin_dpo/beta_margin_mean": 2.9997239112854004, + "margin_dpo/loss_margin_mean": 29.997238159179688, + "margin_dpo/margin_mean": 29.997238159179688, + "margin_dpo/margin_std": 25.054841995239258, + "step": 415 + }, + { + "epoch": 0.6108663729809104, + "grad_norm": 48.96870040893555, + "learning_rate": 1.990267419549914e-07, + "logits/chosen": -0.629509449005127, + "logits/rejected": -0.5954192876815796, + "logps/chosen": -75.95622253417969, + "logps/ref_chosen": -58.27912521362305, + "logps/ref_rejected": -90.56871795654297, + "logps/rejected": -145.17367553710938, + "loss": 0.384, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13312619924545288, + "margin_dpo/beta_margin_grad_std": 0.18159282207489014, + "margin_dpo/beta_margin_mean": 3.692786693572998, + "margin_dpo/loss_margin_mean": 36.92786407470703, + "margin_dpo/margin_mean": 36.9278678894043, + "margin_dpo/margin_std": 27.910099029541016, + "step": 416 + }, + { + "epoch": 0.6123348017621145, + "grad_norm": 34.49977493286133, + "learning_rate": 1.9777104743594686e-07, + "logits/chosen": -0.6395463943481445, + "logits/rejected": -0.5739086866378784, + "logps/chosen": -66.991455078125, + "logps/ref_chosen": -50.1987190246582, + "logps/ref_rejected": -68.15184020996094, + "logps/rejected": -120.10306549072266, + "loss": 0.3145, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12301211804151535, + "margin_dpo/beta_margin_grad_std": 0.17249628901481628, + "margin_dpo/beta_margin_mean": 3.5158486366271973, + "margin_dpo/loss_margin_mean": 35.158485412597656, + "margin_dpo/margin_mean": 35.158485412597656, + "margin_dpo/margin_std": 23.311870574951172, + "step": 417 + }, + { + "epoch": 0.6138032305433186, + "grad_norm": 66.27497100830078, + "learning_rate": 1.965167291983757e-07, + "logits/chosen": -0.6523764133453369, + "logits/rejected": -0.5884617567062378, + "logps/chosen": -99.4321060180664, + "logps/ref_chosen": -81.97846984863281, + "logps/ref_rejected": -104.69148254394531, + "logps/rejected": -156.43402099609375, + "loss": 0.566, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1649475246667862, + "margin_dpo/beta_margin_grad_std": 0.23172861337661743, + "margin_dpo/beta_margin_mean": 3.4288902282714844, + "margin_dpo/loss_margin_mean": 34.288902282714844, + "margin_dpo/margin_mean": 34.288902282714844, + "margin_dpo/margin_std": 31.480552673339844, + "step": 418 + }, + { + "epoch": 0.6152716593245228, + "grad_norm": 47.13197708129883, + "learning_rate": 1.9526382029472988e-07, + "logits/chosen": -0.6016639471054077, + "logits/rejected": -0.5594383478164673, + "logps/chosen": -70.49934387207031, + "logps/ref_chosen": -52.948646545410156, + "logps/ref_rejected": -91.58309936523438, + "logps/rejected": -143.46878051757812, + "loss": 0.3025, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11779798567295074, + "margin_dpo/beta_margin_grad_std": 0.15797148644924164, + "margin_dpo/beta_margin_mean": 3.433500289916992, + "margin_dpo/loss_margin_mean": 34.33500289916992, + "margin_dpo/margin_mean": 34.33500289916992, + "margin_dpo/margin_std": 24.259674072265625, + "step": 419 + }, + { + "epoch": 0.6167400881057269, + "grad_norm": 63.46165466308594, + "learning_rate": 1.9401235374032425e-07, + "logits/chosen": -0.6578436493873596, + "logits/rejected": -0.579310417175293, + "logps/chosen": -96.29592895507812, + "logps/ref_chosen": -77.7699203491211, + "logps/ref_rejected": -69.31985473632812, + "logps/rejected": -121.10002899169922, + "loss": 0.4568, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1476997584104538, + "margin_dpo/beta_margin_grad_std": 0.207474946975708, + "margin_dpo/beta_margin_mean": 3.3254165649414062, + "margin_dpo/loss_margin_mean": 33.25416564941406, + "margin_dpo/margin_mean": 33.25416564941406, + "margin_dpo/margin_std": 27.482261657714844, + "step": 420 + }, + { + "epoch": 0.618208516886931, + "grad_norm": 75.73670959472656, + "learning_rate": 1.9276236251246653e-07, + "logits/chosen": -0.6206883192062378, + "logits/rejected": -0.5857428908348083, + "logps/chosen": -74.22445678710938, + "logps/ref_chosen": -53.765865325927734, + "logps/ref_rejected": -89.28144836425781, + "logps/rejected": -137.7203826904297, + "loss": 0.6307, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1979384869337082, + "margin_dpo/beta_margin_grad_std": 0.2501598596572876, + "margin_dpo/beta_margin_mean": 2.798034429550171, + "margin_dpo/loss_margin_mean": 27.980342864990234, + "margin_dpo/margin_mean": 27.980342864990234, + "margin_dpo/margin_std": 26.982437133789062, + "step": 421 + }, + { + "epoch": 0.6196769456681351, + "grad_norm": 69.67945098876953, + "learning_rate": 1.9151387954958792e-07, + "logits/chosen": -0.6548997163772583, + "logits/rejected": -0.6131415367126465, + "logps/chosen": -89.59654998779297, + "logps/ref_chosen": -68.6337661743164, + "logps/ref_rejected": -87.86351013183594, + "logps/rejected": -139.065185546875, + "loss": 0.5739, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18962985277175903, + "margin_dpo/beta_margin_grad_std": 0.246720552444458, + "margin_dpo/beta_margin_mean": 3.0238897800445557, + "margin_dpo/loss_margin_mean": 30.2388973236084, + "margin_dpo/margin_mean": 30.2388973236084, + "margin_dpo/margin_std": 28.849193572998047, + "step": 422 + }, + { + "epoch": 0.6211453744493393, + "grad_norm": 73.22090148925781, + "learning_rate": 1.902669377503756e-07, + "logits/chosen": -0.6237994432449341, + "logits/rejected": -0.6053036451339722, + "logps/chosen": -74.4217529296875, + "logps/ref_chosen": -54.99030303955078, + "logps/ref_rejected": -86.30654907226562, + "logps/rejected": -137.05184936523438, + "loss": 0.5645, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18330860137939453, + "margin_dpo/beta_margin_grad_std": 0.2365390956401825, + "margin_dpo/beta_margin_mean": 3.1313838958740234, + "margin_dpo/loss_margin_mean": 31.313838958740234, + "margin_dpo/margin_mean": 31.313838958740234, + "margin_dpo/margin_std": 29.860960006713867, + "step": 423 + }, + { + "epoch": 0.6226138032305433, + "grad_norm": 49.532413482666016, + "learning_rate": 1.890215699729057e-07, + "logits/chosen": -0.6332702040672302, + "logits/rejected": -0.5856061577796936, + "logps/chosen": -73.72906494140625, + "logps/ref_chosen": -56.01191711425781, + "logps/ref_rejected": -66.47896575927734, + "logps/rejected": -118.37336730957031, + "loss": 0.4253, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15173882246017456, + "margin_dpo/beta_margin_grad_std": 0.20923829078674316, + "margin_dpo/beta_margin_mean": 3.417725086212158, + "margin_dpo/loss_margin_mean": 34.17725372314453, + "margin_dpo/margin_mean": 34.17725372314453, + "margin_dpo/margin_std": 30.512378692626953, + "step": 424 + }, + { + "epoch": 0.6240822320117474, + "grad_norm": 60.32538604736328, + "learning_rate": 1.8777780903377732e-07, + "logits/chosen": -0.631500780582428, + "logits/rejected": -0.6203855872154236, + "logps/chosen": -65.76054382324219, + "logps/ref_chosen": -46.868995666503906, + "logps/ref_rejected": -95.92545318603516, + "logps/rejected": -145.38247680664062, + "loss": 0.5126, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16552463173866272, + "margin_dpo/beta_margin_grad_std": 0.224747896194458, + "margin_dpo/beta_margin_mean": 3.0565476417541504, + "margin_dpo/loss_margin_mean": 30.565475463867188, + "margin_dpo/margin_mean": 30.56547737121582, + "margin_dpo/margin_std": 24.83243179321289, + "step": 425 + }, + { + "epoch": 0.6255506607929515, + "grad_norm": 77.01701354980469, + "learning_rate": 1.8653568770724803e-07, + "logits/chosen": -0.6216360330581665, + "logits/rejected": -0.5673133730888367, + "logps/chosen": -93.9437255859375, + "logps/ref_chosen": -76.58354187011719, + "logps/ref_rejected": -81.26658630371094, + "logps/rejected": -132.55589294433594, + "loss": 0.4423, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13299913704395294, + "margin_dpo/beta_margin_grad_std": 0.21135151386260986, + "margin_dpo/beta_margin_mean": 3.392913341522217, + "margin_dpo/loss_margin_mean": 33.929134368896484, + "margin_dpo/margin_mean": 33.929134368896484, + "margin_dpo/margin_std": 26.49199867248535, + "step": 426 + }, + { + "epoch": 0.6270190895741556, + "grad_norm": 56.73555374145508, + "learning_rate": 1.8529523872436977e-07, + "logits/chosen": -0.6535402536392212, + "logits/rejected": -0.5980893969535828, + "logps/chosen": -81.8448486328125, + "logps/ref_chosen": -64.8538818359375, + "logps/ref_rejected": -78.56600952148438, + "logps/rejected": -120.1833267211914, + "loss": 0.5899, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1911955028772354, + "margin_dpo/beta_margin_grad_std": 0.20332689583301544, + "margin_dpo/beta_margin_mean": 2.4626340866088867, + "margin_dpo/loss_margin_mean": 24.626338958740234, + "margin_dpo/margin_mean": 24.626338958740234, + "margin_dpo/margin_std": 23.466392517089844, + "step": 427 + }, + { + "epoch": 0.6284875183553598, + "grad_norm": 43.91977310180664, + "learning_rate": 1.8405649477212697e-07, + "logits/chosen": -0.626772403717041, + "logits/rejected": -0.5905691385269165, + "logps/chosen": -83.34781646728516, + "logps/ref_chosen": -62.63666534423828, + "logps/ref_rejected": -103.28182220458984, + "logps/rejected": -159.70887756347656, + "loss": 0.3243, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12279026210308075, + "margin_dpo/beta_margin_grad_std": 0.17204001545906067, + "margin_dpo/beta_margin_mean": 3.571589946746826, + "margin_dpo/loss_margin_mean": 35.71589660644531, + "margin_dpo/margin_mean": 35.71589660644531, + "margin_dpo/margin_std": 27.326576232910156, + "step": 428 + }, + { + "epoch": 0.6299559471365639, + "grad_norm": 64.35308837890625, + "learning_rate": 1.828194884925749e-07, + "logits/chosen": -0.5859851837158203, + "logits/rejected": -0.5243451595306396, + "logps/chosen": -101.322509765625, + "logps/ref_chosen": -81.23401641845703, + "logps/ref_rejected": -91.79493713378906, + "logps/rejected": -141.50485229492188, + "loss": 0.5977, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19384217262268066, + "margin_dpo/beta_margin_grad_std": 0.23802496492862701, + "margin_dpo/beta_margin_mean": 2.9621434211730957, + "margin_dpo/loss_margin_mean": 29.621435165405273, + "margin_dpo/margin_mean": 29.62143325805664, + "margin_dpo/margin_std": 28.574806213378906, + "step": 429 + }, + { + "epoch": 0.631424375917768, + "grad_norm": 52.39344787597656, + "learning_rate": 1.8158425248197928e-07, + "logits/chosen": -0.5943987369537354, + "logits/rejected": -0.5758558511734009, + "logps/chosen": -79.220458984375, + "logps/ref_chosen": -60.92032241821289, + "logps/ref_rejected": -104.42280578613281, + "logps/rejected": -153.45037841796875, + "loss": 0.4773, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16452635824680328, + "margin_dpo/beta_margin_grad_std": 0.22081297636032104, + "margin_dpo/beta_margin_mean": 3.0727434158325195, + "margin_dpo/loss_margin_mean": 30.727432250976562, + "margin_dpo/margin_mean": 30.727432250976562, + "margin_dpo/margin_std": 26.309518814086914, + "step": 430 + }, + { + "epoch": 0.6328928046989721, + "grad_norm": 44.553733825683594, + "learning_rate": 1.8035081928995788e-07, + "logits/chosen": -0.5974197387695312, + "logits/rejected": -0.5811679363250732, + "logps/chosen": -76.02676391601562, + "logps/ref_chosen": -57.348751068115234, + "logps/ref_rejected": -92.84022521972656, + "logps/rejected": -146.17950439453125, + "loss": 0.3371, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13372743129730225, + "margin_dpo/beta_margin_grad_std": 0.17146742343902588, + "margin_dpo/beta_margin_mean": 3.4661264419555664, + "margin_dpo/loss_margin_mean": 34.66126251220703, + "margin_dpo/margin_mean": 34.66126251220703, + "margin_dpo/margin_std": 26.27811050415039, + "step": 431 + }, + { + "epoch": 0.6343612334801763, + "grad_norm": 57.066585540771484, + "learning_rate": 1.791192214186223e-07, + "logits/chosen": -0.551721453666687, + "logits/rejected": -0.5079036951065063, + "logps/chosen": -89.14061737060547, + "logps/ref_chosen": -71.07479095458984, + "logps/ref_rejected": -98.57952880859375, + "logps/rejected": -149.09951782226562, + "loss": 0.4364, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14938993752002716, + "margin_dpo/beta_margin_grad_std": 0.20730724930763245, + "margin_dpo/beta_margin_mean": 3.2454161643981934, + "margin_dpo/loss_margin_mean": 32.45416259765625, + "margin_dpo/margin_mean": 32.45416259765625, + "margin_dpo/margin_std": 27.234264373779297, + "step": 432 + }, + { + "epoch": 0.6358296622613803, + "grad_norm": 72.30256652832031, + "learning_rate": 1.7788949132172193e-07, + "logits/chosen": -0.6330820322036743, + "logits/rejected": -0.6014422178268433, + "logps/chosen": -81.89974975585938, + "logps/ref_chosen": -58.273193359375, + "logps/ref_rejected": -95.95089721679688, + "logps/rejected": -148.0189208984375, + "loss": 0.5896, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1933947056531906, + "margin_dpo/beta_margin_grad_std": 0.23659807443618774, + "margin_dpo/beta_margin_mean": 2.844146966934204, + "margin_dpo/loss_margin_mean": 28.441471099853516, + "margin_dpo/margin_mean": 28.441471099853516, + "margin_dpo/margin_std": 26.49103546142578, + "step": 433 + }, + { + "epoch": 0.6372980910425844, + "grad_norm": 48.833492279052734, + "learning_rate": 1.7666166140378853e-07, + "logits/chosen": -0.6459417343139648, + "logits/rejected": -0.6003463864326477, + "logps/chosen": -79.62370300292969, + "logps/ref_chosen": -61.97370147705078, + "logps/ref_rejected": -78.49861145019531, + "logps/rejected": -125.43734741210938, + "loss": 0.4262, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1577899158000946, + "margin_dpo/beta_margin_grad_std": 0.19978675246238708, + "margin_dpo/beta_margin_mean": 2.9288740158081055, + "margin_dpo/loss_margin_mean": 29.288738250732422, + "margin_dpo/margin_mean": 29.288738250732422, + "margin_dpo/margin_std": 24.996349334716797, + "step": 434 + }, + { + "epoch": 0.6387665198237885, + "grad_norm": 64.77494812011719, + "learning_rate": 1.7543576401928218e-07, + "logits/chosen": -0.6737290620803833, + "logits/rejected": -0.6396021842956543, + "logps/chosen": -69.81366729736328, + "logps/ref_chosen": -51.502052307128906, + "logps/ref_rejected": -87.56689453125, + "logps/rejected": -138.3524169921875, + "loss": 0.5095, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16580714285373688, + "margin_dpo/beta_margin_grad_std": 0.20909518003463745, + "margin_dpo/beta_margin_mean": 3.247391700744629, + "margin_dpo/loss_margin_mean": 32.473915100097656, + "margin_dpo/margin_mean": 32.473915100097656, + "margin_dpo/margin_std": 29.528972625732422, + "step": 435 + }, + { + "epoch": 0.6402349486049926, + "grad_norm": 40.28781509399414, + "learning_rate": 1.742118314717391e-07, + "logits/chosen": -0.6202067136764526, + "logits/rejected": -0.5589362978935242, + "logps/chosen": -89.22311401367188, + "logps/ref_chosen": -71.40371704101562, + "logps/ref_rejected": -82.72775268554688, + "logps/rejected": -132.42782592773438, + "loss": 0.3515, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13569772243499756, + "margin_dpo/beta_margin_grad_std": 0.17755961418151855, + "margin_dpo/beta_margin_mean": 3.1880667209625244, + "margin_dpo/loss_margin_mean": 31.880666732788086, + "margin_dpo/margin_mean": 31.880664825439453, + "margin_dpo/margin_std": 24.377714157104492, + "step": 436 + }, + { + "epoch": 0.6417033773861968, + "grad_norm": 51.87274932861328, + "learning_rate": 1.7298989601292036e-07, + "logits/chosen": -0.6447381973266602, + "logits/rejected": -0.6036201119422913, + "logps/chosen": -82.27588653564453, + "logps/ref_chosen": -64.7442626953125, + "logps/ref_rejected": -82.04356384277344, + "logps/rejected": -127.82572937011719, + "loss": 0.5283, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17866836488246918, + "margin_dpo/beta_margin_grad_std": 0.217972993850708, + "margin_dpo/beta_margin_mean": 2.8250551223754883, + "margin_dpo/loss_margin_mean": 28.25054931640625, + "margin_dpo/margin_mean": 28.25054931640625, + "margin_dpo/margin_std": 23.456018447875977, + "step": 437 + }, + { + "epoch": 0.6431718061674009, + "grad_norm": 64.63465118408203, + "learning_rate": 1.7176998984196144e-07, + "logits/chosen": -0.6529127359390259, + "logits/rejected": -0.5783262848854065, + "logps/chosen": -78.42019653320312, + "logps/ref_chosen": -59.0186653137207, + "logps/ref_rejected": -83.07682037353516, + "logps/rejected": -136.8445587158203, + "loss": 0.3724, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13743507862091064, + "margin_dpo/beta_margin_grad_std": 0.18105177581310272, + "margin_dpo/beta_margin_mean": 3.4366211891174316, + "margin_dpo/loss_margin_mean": 34.3662109375, + "margin_dpo/margin_mean": 34.3662109375, + "margin_dpo/margin_std": 26.907875061035156, + "step": 438 + }, + { + "epoch": 0.644640234948605, + "grad_norm": 65.7437744140625, + "learning_rate": 1.7055214510452458e-07, + "logits/chosen": -0.6286749243736267, + "logits/rejected": -0.607205867767334, + "logps/chosen": -77.5360107421875, + "logps/ref_chosen": -53.784080505371094, + "logps/ref_rejected": -83.98545837402344, + "logps/rejected": -134.8016357421875, + "loss": 0.5171, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18228086829185486, + "margin_dpo/beta_margin_grad_std": 0.19927145540714264, + "margin_dpo/beta_margin_mean": 2.7064239978790283, + "margin_dpo/loss_margin_mean": 27.064239501953125, + "margin_dpo/margin_mean": 27.064241409301758, + "margin_dpo/margin_std": 23.722930908203125, + "step": 439 + }, + { + "epoch": 0.6461086637298091, + "grad_norm": 95.62813568115234, + "learning_rate": 1.6933639389195134e-07, + "logits/chosen": -0.6431401968002319, + "logits/rejected": -0.6009776592254639, + "logps/chosen": -97.17742919921875, + "logps/ref_chosen": -78.56671905517578, + "logps/ref_rejected": -96.49775695800781, + "logps/rejected": -140.99290466308594, + "loss": 0.6656, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21006713807582855, + "margin_dpo/beta_margin_grad_std": 0.24986517429351807, + "margin_dpo/beta_margin_mean": 2.5884432792663574, + "margin_dpo/loss_margin_mean": 25.884429931640625, + "margin_dpo/margin_mean": 25.884429931640625, + "margin_dpo/margin_std": 27.127971649169922, + "step": 440 + }, + { + "epoch": 0.6475770925110133, + "grad_norm": 52.33854293823242, + "learning_rate": 1.681227682404166e-07, + "logits/chosen": -0.587798535823822, + "logits/rejected": -0.5523707866668701, + "logps/chosen": -80.9710693359375, + "logps/ref_chosen": -60.824440002441406, + "logps/ref_rejected": -96.47080993652344, + "logps/rejected": -147.42752075195312, + "loss": 0.4425, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1385086476802826, + "margin_dpo/beta_margin_grad_std": 0.18967705965042114, + "margin_dpo/beta_margin_mean": 3.0810084342956543, + "margin_dpo/loss_margin_mean": 30.81008529663086, + "margin_dpo/margin_mean": 30.81008529663086, + "margin_dpo/margin_std": 23.786081314086914, + "step": 441 + }, + { + "epoch": 0.6490455212922174, + "grad_norm": 36.203887939453125, + "learning_rate": 1.669113001300851e-07, + "logits/chosen": -0.5871816873550415, + "logits/rejected": -0.549630343914032, + "logps/chosen": -65.28014373779297, + "logps/ref_chosen": -47.01121520996094, + "logps/ref_rejected": -76.53926086425781, + "logps/rejected": -132.68634033203125, + "loss": 0.2789, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10812409967184067, + "margin_dpo/beta_margin_grad_std": 0.13728050887584686, + "margin_dpo/beta_margin_mean": 3.787814140319824, + "margin_dpo/loss_margin_mean": 37.878135681152344, + "margin_dpo/margin_mean": 37.878135681152344, + "margin_dpo/margin_std": 26.232383728027344, + "step": 442 + }, + { + "epoch": 0.6505139500734214, + "grad_norm": 81.11394500732422, + "learning_rate": 1.6570202148426815e-07, + "logits/chosen": -0.6177343130111694, + "logits/rejected": -0.5820919275283813, + "logps/chosen": -93.88763427734375, + "logps/ref_chosen": -71.27301788330078, + "logps/ref_rejected": -86.679931640625, + "logps/rejected": -138.04800415039062, + "loss": 0.6542, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19925275444984436, + "margin_dpo/beta_margin_grad_std": 0.2632359564304352, + "margin_dpo/beta_margin_mean": 2.8753466606140137, + "margin_dpo/loss_margin_mean": 28.75346565246582, + "margin_dpo/margin_mean": 28.753463745117188, + "margin_dpo/margin_std": 27.78663444519043, + "step": 443 + }, + { + "epoch": 0.6519823788546255, + "grad_norm": 47.98015594482422, + "learning_rate": 1.6449496416858282e-07, + "logits/chosen": -0.593714714050293, + "logits/rejected": -0.5685232877731323, + "logps/chosen": -77.04825592041016, + "logps/ref_chosen": -57.213706970214844, + "logps/ref_rejected": -97.25489044189453, + "logps/rejected": -151.35964965820312, + "loss": 0.4406, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1492740362882614, + "margin_dpo/beta_margin_grad_std": 0.21126282215118408, + "margin_dpo/beta_margin_mean": 3.42702054977417, + "margin_dpo/loss_margin_mean": 34.27020263671875, + "margin_dpo/margin_mean": 34.27020263671875, + "margin_dpo/margin_std": 28.456218719482422, + "step": 444 + }, + { + "epoch": 0.6534508076358296, + "grad_norm": 63.52720260620117, + "learning_rate": 1.6329015999011182e-07, + "logits/chosen": -0.5531260967254639, + "logits/rejected": -0.5164097547531128, + "logps/chosen": -84.41445922851562, + "logps/ref_chosen": -67.29979705810547, + "logps/ref_rejected": -92.68267822265625, + "logps/rejected": -141.507080078125, + "loss": 0.4692, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1679629534482956, + "margin_dpo/beta_margin_grad_std": 0.21126939356327057, + "margin_dpo/beta_margin_mean": 3.170973300933838, + "margin_dpo/loss_margin_mean": 31.709733963012695, + "margin_dpo/margin_mean": 31.709733963012695, + "margin_dpo/margin_std": 27.622833251953125, + "step": 445 + }, + { + "epoch": 0.6549192364170338, + "grad_norm": 50.25477600097656, + "learning_rate": 1.6208764069656578e-07, + "logits/chosen": -0.6113117933273315, + "logits/rejected": -0.5910245776176453, + "logps/chosen": -76.8818359375, + "logps/ref_chosen": -59.098487854003906, + "logps/ref_rejected": -101.26419067382812, + "logps/rejected": -149.34832763671875, + "loss": 0.4369, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16404861211776733, + "margin_dpo/beta_margin_grad_std": 0.18896964192390442, + "margin_dpo/beta_margin_mean": 3.030078887939453, + "margin_dpo/loss_margin_mean": 30.30078887939453, + "margin_dpo/margin_mean": 30.300785064697266, + "margin_dpo/margin_std": 26.338363647460938, + "step": 446 + }, + { + "epoch": 0.6563876651982379, + "grad_norm": 50.45421600341797, + "learning_rate": 1.608874379754465e-07, + "logits/chosen": -0.636214017868042, + "logits/rejected": -0.6372050046920776, + "logps/chosen": -76.49604797363281, + "logps/ref_chosen": -56.07533264160156, + "logps/ref_rejected": -98.69475555419922, + "logps/rejected": -151.05020141601562, + "loss": 0.4496, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16537348926067352, + "margin_dpo/beta_margin_grad_std": 0.20752978324890137, + "margin_dpo/beta_margin_mean": 3.193472385406494, + "margin_dpo/loss_margin_mean": 31.934722900390625, + "margin_dpo/margin_mean": 31.934722900390625, + "margin_dpo/margin_std": 28.57367515563965, + "step": 447 + }, + { + "epoch": 0.657856093979442, + "grad_norm": 49.00764846801758, + "learning_rate": 1.5968958345321177e-07, + "logits/chosen": -0.618561863899231, + "logits/rejected": -0.6021959185600281, + "logps/chosen": -81.1510238647461, + "logps/ref_chosen": -60.00384521484375, + "logps/ref_rejected": -102.26465606689453, + "logps/rejected": -155.61978149414062, + "loss": 0.3919, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13892757892608643, + "margin_dpo/beta_margin_grad_std": 0.17518764734268188, + "margin_dpo/beta_margin_mean": 3.220794439315796, + "margin_dpo/loss_margin_mean": 32.207942962646484, + "margin_dpo/margin_mean": 32.207942962646484, + "margin_dpo/margin_std": 25.56855010986328, + "step": 448 + }, + { + "epoch": 0.6593245227606461, + "grad_norm": 81.82498931884766, + "learning_rate": 1.584941086944423e-07, + "logits/chosen": -0.6117278337478638, + "logits/rejected": -0.5685479640960693, + "logps/chosen": -89.69844055175781, + "logps/ref_chosen": -67.52661895751953, + "logps/ref_rejected": -88.59690856933594, + "logps/rejected": -142.21090698242188, + "loss": 0.5996, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17296102643013, + "margin_dpo/beta_margin_grad_std": 0.23669497668743134, + "margin_dpo/beta_margin_mean": 3.1442177295684814, + "margin_dpo/loss_margin_mean": 31.442176818847656, + "margin_dpo/margin_mean": 31.442176818847656, + "margin_dpo/margin_std": 30.16796875, + "step": 449 + }, + { + "epoch": 0.6607929515418502, + "grad_norm": 41.05678176879883, + "learning_rate": 1.573010452010098e-07, + "logits/chosen": -0.6542295217514038, + "logits/rejected": -0.6270924806594849, + "logps/chosen": -73.37232971191406, + "logps/ref_chosen": -57.108116149902344, + "logps/ref_rejected": -102.75494384765625, + "logps/rejected": -153.49468994140625, + "loss": 0.3193, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12820908427238464, + "margin_dpo/beta_margin_grad_std": 0.16310177743434906, + "margin_dpo/beta_margin_mean": 3.4475526809692383, + "margin_dpo/loss_margin_mean": 34.47552490234375, + "margin_dpo/margin_mean": 34.47552490234375, + "margin_dpo/margin_std": 25.831031799316406, + "step": 450 + }, + { + "epoch": 0.6622613803230544, + "grad_norm": 75.21393585205078, + "learning_rate": 1.5611042441124687e-07, + "logits/chosen": -0.643078625202179, + "logits/rejected": -0.5938763618469238, + "logps/chosen": -80.2492904663086, + "logps/ref_chosen": -58.46883010864258, + "logps/ref_rejected": -72.92941284179688, + "logps/rejected": -124.17213439941406, + "loss": 0.553, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17004984617233276, + "margin_dpo/beta_margin_grad_std": 0.22876521944999695, + "margin_dpo/beta_margin_mean": 2.946226119995117, + "margin_dpo/loss_margin_mean": 29.46225929260254, + "margin_dpo/margin_mean": 29.46225929260254, + "margin_dpo/margin_std": 25.89090347290039, + "step": 451 + }, + { + "epoch": 0.6637298091042585, + "grad_norm": 33.82390213012695, + "learning_rate": 1.549222776991186e-07, + "logits/chosen": -0.5697954297065735, + "logits/rejected": -0.5710628628730774, + "logps/chosen": -66.59547424316406, + "logps/ref_chosen": -50.39055252075195, + "logps/ref_rejected": -97.77143096923828, + "logps/rejected": -144.08883666992188, + "loss": 0.2885, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12178364396095276, + "margin_dpo/beta_margin_grad_std": 0.13007774949073792, + "margin_dpo/beta_margin_mean": 3.0112478733062744, + "margin_dpo/loss_margin_mean": 30.11248016357422, + "margin_dpo/margin_mean": 30.11248016357422, + "margin_dpo/margin_std": 22.0058536529541, + "step": 452 + }, + { + "epoch": 0.6651982378854625, + "grad_norm": 49.218753814697266, + "learning_rate": 1.5373663637339584e-07, + "logits/chosen": -0.6415982246398926, + "logits/rejected": -0.5901994705200195, + "logps/chosen": -77.15182495117188, + "logps/ref_chosen": -57.71485137939453, + "logps/ref_rejected": -82.20741271972656, + "logps/rejected": -130.7767333984375, + "loss": 0.4672, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1702961027622223, + "margin_dpo/beta_margin_grad_std": 0.20113880932331085, + "margin_dpo/beta_margin_mean": 2.9132347106933594, + "margin_dpo/loss_margin_mean": 29.13234519958496, + "margin_dpo/margin_mean": 29.132347106933594, + "margin_dpo/margin_std": 25.473758697509766, + "step": 453 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 56.509586334228516, + "learning_rate": 1.5255353167683017e-07, + "logits/chosen": -0.6178318858146667, + "logits/rejected": -0.5745600461959839, + "logps/chosen": -81.63310241699219, + "logps/ref_chosen": -60.945648193359375, + "logps/ref_rejected": -84.9507827758789, + "logps/rejected": -138.04898071289062, + "loss": 0.3975, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14186511933803558, + "margin_dpo/beta_margin_grad_std": 0.19594962894916534, + "margin_dpo/beta_margin_mean": 3.2410740852355957, + "margin_dpo/loss_margin_mean": 32.41073989868164, + "margin_dpo/margin_mean": 32.41073989868164, + "margin_dpo/margin_std": 25.818143844604492, + "step": 454 + }, + { + "epoch": 0.6681350954478708, + "grad_norm": 45.265987396240234, + "learning_rate": 1.5137299478533064e-07, + "logits/chosen": -0.6223077774047852, + "logits/rejected": -0.5953476428985596, + "logps/chosen": -65.11666870117188, + "logps/ref_chosen": -44.88671112060547, + "logps/ref_rejected": -115.30147552490234, + "logps/rejected": -172.906982421875, + "loss": 0.3707, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1233278438448906, + "margin_dpo/beta_margin_grad_std": 0.19460362195968628, + "margin_dpo/beta_margin_mean": 3.7375543117523193, + "margin_dpo/loss_margin_mean": 37.37554168701172, + "margin_dpo/margin_mean": 37.37554168701172, + "margin_dpo/margin_std": 26.588571548461914, + "step": 455 + }, + { + "epoch": 0.6696035242290749, + "grad_norm": 51.1346321105957, + "learning_rate": 1.5019505680714232e-07, + "logits/chosen": -0.6171753406524658, + "logits/rejected": -0.6144955158233643, + "logps/chosen": -74.41389465332031, + "logps/ref_chosen": -57.036781311035156, + "logps/ref_rejected": -105.21783447265625, + "logps/rejected": -160.3103790283203, + "loss": 0.3541, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13141584396362305, + "margin_dpo/beta_margin_grad_std": 0.1724250167608261, + "margin_dpo/beta_margin_mean": 3.771542549133301, + "margin_dpo/loss_margin_mean": 37.715423583984375, + "margin_dpo/margin_mean": 37.715423583984375, + "margin_dpo/margin_std": 28.47699737548828, + "step": 456 + }, + { + "epoch": 0.671071953010279, + "grad_norm": 58.4116096496582, + "learning_rate": 1.4901974878202627e-07, + "logits/chosen": -0.5911962985992432, + "logits/rejected": -0.5620957612991333, + "logps/chosen": -72.8665542602539, + "logps/ref_chosen": -54.24253845214844, + "logps/ref_rejected": -85.10956573486328, + "logps/rejected": -136.763916015625, + "loss": 0.3892, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13140106201171875, + "margin_dpo/beta_margin_grad_std": 0.19598211348056793, + "margin_dpo/beta_margin_mean": 3.3030338287353516, + "margin_dpo/loss_margin_mean": 33.030338287353516, + "margin_dpo/margin_mean": 33.030338287353516, + "margin_dpo/margin_std": 24.58535385131836, + "step": 457 + }, + { + "epoch": 0.6725403817914831, + "grad_norm": 60.80534362792969, + "learning_rate": 1.4784710168044212e-07, + "logits/chosen": -0.5890240669250488, + "logits/rejected": -0.5499871969223022, + "logps/chosen": -74.93452453613281, + "logps/ref_chosen": -55.40888214111328, + "logps/ref_rejected": -97.68325805664062, + "logps/rejected": -155.25704956054688, + "loss": 0.4472, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1399458944797516, + "margin_dpo/beta_margin_grad_std": 0.23080742359161377, + "margin_dpo/beta_margin_mean": 3.8048152923583984, + "margin_dpo/loss_margin_mean": 38.04814910888672, + "margin_dpo/margin_mean": 38.048152923583984, + "margin_dpo/margin_std": 32.506038665771484, + "step": 458 + }, + { + "epoch": 0.6740088105726872, + "grad_norm": 49.459625244140625, + "learning_rate": 1.466771464027316e-07, + "logits/chosen": -0.6109951138496399, + "logits/rejected": -0.58476322889328, + "logps/chosen": -67.23796081542969, + "logps/ref_chosen": -46.55748748779297, + "logps/ref_rejected": -86.16854095458984, + "logps/rejected": -135.8137969970703, + "loss": 0.4559, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16255170106887817, + "margin_dpo/beta_margin_grad_std": 0.19391369819641113, + "margin_dpo/beta_margin_mean": 2.8964788913726807, + "margin_dpo/loss_margin_mean": 28.96478843688965, + "margin_dpo/margin_mean": 28.96478843688965, + "margin_dpo/margin_std": 23.446517944335938, + "step": 459 + }, + { + "epoch": 0.6754772393538914, + "grad_norm": 60.67763900756836, + "learning_rate": 1.4550991377830423e-07, + "logits/chosen": -0.6025089621543884, + "logits/rejected": -0.6064221858978271, + "logps/chosen": -70.86332702636719, + "logps/ref_chosen": -51.63489532470703, + "logps/ref_rejected": -104.11935424804688, + "logps/rejected": -156.27178955078125, + "loss": 0.4214, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.151195228099823, + "margin_dpo/beta_margin_grad_std": 0.2108098566532135, + "margin_dpo/beta_margin_mean": 3.2924013137817383, + "margin_dpo/loss_margin_mean": 32.92401123046875, + "margin_dpo/margin_mean": 32.92401123046875, + "margin_dpo/margin_std": 25.963363647460938, + "step": 460 + }, + { + "epoch": 0.6769456681350955, + "grad_norm": 62.43408966064453, + "learning_rate": 1.4434543456482518e-07, + "logits/chosen": -0.6063634157180786, + "logits/rejected": -0.591764509677887, + "logps/chosen": -80.1798095703125, + "logps/ref_chosen": -55.18195343017578, + "logps/ref_rejected": -86.47689819335938, + "logps/rejected": -139.15049743652344, + "loss": 0.5542, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1909765750169754, + "margin_dpo/beta_margin_grad_std": 0.22030548751354218, + "margin_dpo/beta_margin_mean": 2.767573833465576, + "margin_dpo/loss_margin_mean": 27.675739288330078, + "margin_dpo/margin_mean": 27.675739288330078, + "margin_dpo/margin_std": 27.114221572875977, + "step": 461 + }, + { + "epoch": 0.6784140969162996, + "grad_norm": 65.849853515625, + "learning_rate": 1.4318373944740484e-07, + "logits/chosen": -0.6203492879867554, + "logits/rejected": -0.5816408395767212, + "logps/chosen": -93.56144714355469, + "logps/ref_chosen": -69.92803955078125, + "logps/ref_rejected": -78.84111785888672, + "logps/rejected": -129.50086975097656, + "loss": 0.5525, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1917511373758316, + "margin_dpo/beta_margin_grad_std": 0.21951280534267426, + "margin_dpo/beta_margin_mean": 2.702633857727051, + "margin_dpo/loss_margin_mean": 27.026338577270508, + "margin_dpo/margin_mean": 27.026338577270508, + "margin_dpo/margin_std": 25.6932373046875, + "step": 462 + }, + { + "epoch": 0.6798825256975036, + "grad_norm": 50.78045654296875, + "learning_rate": 1.4202485903778976e-07, + "logits/chosen": -0.5955685377120972, + "logits/rejected": -0.5663818120956421, + "logps/chosen": -76.08649444580078, + "logps/ref_chosen": -55.27437210083008, + "logps/ref_rejected": -89.02497863769531, + "logps/rejected": -143.9271240234375, + "loss": 0.3565, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12205320596694946, + "margin_dpo/beta_margin_grad_std": 0.1792411506175995, + "margin_dpo/beta_margin_mean": 3.4090020656585693, + "margin_dpo/loss_margin_mean": 34.09001922607422, + "margin_dpo/margin_mean": 34.09001922607422, + "margin_dpo/margin_std": 23.93946075439453, + "step": 463 + }, + { + "epoch": 0.6813509544787077, + "grad_norm": 57.22633743286133, + "learning_rate": 1.4086882387355658e-07, + "logits/chosen": -0.6078216433525085, + "logits/rejected": -0.6137137413024902, + "logps/chosen": -73.63619995117188, + "logps/ref_chosen": -50.91230010986328, + "logps/ref_rejected": -102.4893798828125, + "logps/rejected": -160.0600128173828, + "loss": 0.4538, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14801308512687683, + "margin_dpo/beta_margin_grad_std": 0.2087215781211853, + "margin_dpo/beta_margin_mean": 3.484673023223877, + "margin_dpo/loss_margin_mean": 34.84673309326172, + "margin_dpo/margin_mean": 34.84673309326172, + "margin_dpo/margin_std": 30.08755874633789, + "step": 464 + }, + { + "epoch": 0.6828193832599119, + "grad_norm": 50.34962844848633, + "learning_rate": 1.3971566441730714e-07, + "logits/chosen": -0.5805087089538574, + "logits/rejected": -0.5592623949050903, + "logps/chosen": -81.41581726074219, + "logps/ref_chosen": -60.116851806640625, + "logps/ref_rejected": -113.94602966308594, + "logps/rejected": -173.1104736328125, + "loss": 0.2796, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10604196786880493, + "margin_dpo/beta_margin_grad_std": 0.16912564635276794, + "margin_dpo/beta_margin_mean": 3.786548614501953, + "margin_dpo/loss_margin_mean": 37.86548614501953, + "margin_dpo/margin_mean": 37.86548614501953, + "margin_dpo/margin_std": 25.48162841796875, + "step": 465 + }, + { + "epoch": 0.684287812041116, + "grad_norm": 56.52486801147461, + "learning_rate": 1.3856541105586545e-07, + "logits/chosen": -0.6198223829269409, + "logits/rejected": -0.5899391174316406, + "logps/chosen": -75.75175476074219, + "logps/ref_chosen": -52.920921325683594, + "logps/ref_rejected": -90.3154296875, + "logps/rejected": -147.3414306640625, + "loss": 0.3836, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12490339577198029, + "margin_dpo/beta_margin_grad_std": 0.17585688829421997, + "margin_dpo/beta_margin_mean": 3.419515609741211, + "margin_dpo/loss_margin_mean": 34.195152282714844, + "margin_dpo/margin_mean": 34.195152282714844, + "margin_dpo/margin_std": 23.578819274902344, + "step": 466 + }, + { + "epoch": 0.6857562408223201, + "grad_norm": 47.07603073120117, + "learning_rate": 1.3741809409947729e-07, + "logits/chosen": -0.6401114463806152, + "logits/rejected": -0.6112991571426392, + "logps/chosen": -102.23968505859375, + "logps/ref_chosen": -78.7158203125, + "logps/ref_rejected": -102.86019897460938, + "logps/rejected": -160.81512451171875, + "loss": 0.3757, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1389492303133011, + "margin_dpo/beta_margin_grad_std": 0.19067519903182983, + "margin_dpo/beta_margin_mean": 3.443108081817627, + "margin_dpo/loss_margin_mean": 34.43107986450195, + "margin_dpo/margin_mean": 34.43107986450195, + "margin_dpo/margin_std": 27.870590209960938, + "step": 467 + }, + { + "epoch": 0.6872246696035242, + "grad_norm": 52.84998321533203, + "learning_rate": 1.362737437810114e-07, + "logits/chosen": -0.5987285375595093, + "logits/rejected": -0.5710204243659973, + "logps/chosen": -89.93443298339844, + "logps/ref_chosen": -69.93536376953125, + "logps/ref_rejected": -101.02881622314453, + "logps/rejected": -152.99951171875, + "loss": 0.3946, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14784805476665497, + "margin_dpo/beta_margin_grad_std": 0.19148442149162292, + "margin_dpo/beta_margin_mean": 3.1971635818481445, + "margin_dpo/loss_margin_mean": 31.971633911132812, + "margin_dpo/margin_mean": 31.971633911132812, + "margin_dpo/margin_std": 26.962993621826172, + "step": 468 + }, + { + "epoch": 0.6886930983847284, + "grad_norm": 57.32428741455078, + "learning_rate": 1.351323902551631e-07, + "logits/chosen": -0.6210588216781616, + "logits/rejected": -0.5882803201675415, + "logps/chosen": -91.5205078125, + "logps/ref_chosen": -68.12469482421875, + "logps/ref_rejected": -104.78640747070312, + "logps/rejected": -161.24330139160156, + "loss": 0.4417, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15189404785633087, + "margin_dpo/beta_margin_grad_std": 0.21930459141731262, + "margin_dpo/beta_margin_mean": 3.3061084747314453, + "margin_dpo/loss_margin_mean": 33.06108474731445, + "margin_dpo/margin_mean": 33.06108474731445, + "margin_dpo/margin_std": 27.455984115600586, + "step": 469 + }, + { + "epoch": 0.6901615271659325, + "grad_norm": 36.49016189575195, + "learning_rate": 1.339940635976592e-07, + "logits/chosen": -0.5721327066421509, + "logits/rejected": -0.5457053184509277, + "logps/chosen": -64.37504577636719, + "logps/ref_chosen": -43.79193115234375, + "logps/ref_rejected": -82.70285034179688, + "logps/rejected": -141.7349853515625, + "loss": 0.2322, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.0932888612151146, + "margin_dpo/beta_margin_grad_std": 0.1430518627166748, + "margin_dpo/beta_margin_mean": 3.8449037075042725, + "margin_dpo/loss_margin_mean": 38.44903564453125, + "margin_dpo/margin_mean": 38.44903564453125, + "margin_dpo/margin_std": 23.93124008178711, + "step": 470 + }, + { + "epoch": 0.6916299559471366, + "grad_norm": 55.444793701171875, + "learning_rate": 1.3285879380446563e-07, + "logits/chosen": -0.5917923450469971, + "logits/rejected": -0.5650200843811035, + "logps/chosen": -87.83808898925781, + "logps/ref_chosen": -63.33952331542969, + "logps/ref_rejected": -83.61048126220703, + "logps/rejected": -139.63504028320312, + "loss": 0.4209, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15353405475616455, + "margin_dpo/beta_margin_grad_std": 0.2025720775127411, + "margin_dpo/beta_margin_mean": 3.1526002883911133, + "margin_dpo/loss_margin_mean": 31.526004791259766, + "margin_dpo/margin_mean": 31.526002883911133, + "margin_dpo/margin_std": 24.51514434814453, + "step": 471 + }, + { + "epoch": 0.6930983847283406, + "grad_norm": 51.19646453857422, + "learning_rate": 1.317266107909975e-07, + "logits/chosen": -0.6550266742706299, + "logits/rejected": -0.6002498865127563, + "logps/chosen": -105.19808959960938, + "logps/ref_chosen": -83.66609954833984, + "logps/ref_rejected": -117.20919799804688, + "logps/rejected": -179.25303649902344, + "loss": 0.3083, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11649411916732788, + "margin_dpo/beta_margin_grad_std": 0.1778721958398819, + "margin_dpo/beta_margin_mean": 4.051185607910156, + "margin_dpo/loss_margin_mean": 40.51185607910156, + "margin_dpo/margin_mean": 40.51185607910156, + "margin_dpo/margin_std": 33.35724639892578, + "step": 472 + }, + { + "epoch": 0.6945668135095447, + "grad_norm": 78.59127044677734, + "learning_rate": 1.3059754439133002e-07, + "logits/chosen": -0.5987892746925354, + "logits/rejected": -0.5563715696334839, + "logps/chosen": -87.65088653564453, + "logps/ref_chosen": -63.49696731567383, + "logps/ref_rejected": -81.14657592773438, + "logps/rejected": -133.66065979003906, + "loss": 0.4869, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.170325368642807, + "margin_dpo/beta_margin_grad_std": 0.22086429595947266, + "margin_dpo/beta_margin_mean": 2.8360166549682617, + "margin_dpo/loss_margin_mean": 28.360164642333984, + "margin_dpo/margin_mean": 28.360164642333984, + "margin_dpo/margin_std": 22.68465805053711, + "step": 473 + }, + { + "epoch": 0.6960352422907489, + "grad_norm": 73.43638610839844, + "learning_rate": 1.2947162435741277e-07, + "logits/chosen": -0.5953601598739624, + "logits/rejected": -0.5836308598518372, + "logps/chosen": -76.69085693359375, + "logps/ref_chosen": -52.6119384765625, + "logps/ref_rejected": -90.08041381835938, + "logps/rejected": -145.01792907714844, + "loss": 0.4665, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16552072763442993, + "margin_dpo/beta_margin_grad_std": 0.22101813554763794, + "margin_dpo/beta_margin_mean": 3.085860252380371, + "margin_dpo/loss_margin_mean": 30.85860252380371, + "margin_dpo/margin_mean": 30.85860252380371, + "margin_dpo/margin_std": 25.63982582092285, + "step": 474 + }, + { + "epoch": 0.697503671071953, + "grad_norm": 43.33028030395508, + "learning_rate": 1.2834888035828596e-07, + "logits/chosen": -0.634456992149353, + "logits/rejected": -0.6314413547515869, + "logps/chosen": -63.670257568359375, + "logps/ref_chosen": -42.49519348144531, + "logps/ref_rejected": -90.06295013427734, + "logps/rejected": -145.98110961914062, + "loss": 0.3832, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14322960376739502, + "margin_dpo/beta_margin_grad_std": 0.19120553135871887, + "margin_dpo/beta_margin_mean": 3.4743099212646484, + "margin_dpo/loss_margin_mean": 34.74309539794922, + "margin_dpo/margin_mean": 34.743099212646484, + "margin_dpo/margin_std": 30.56637191772461, + "step": 475 + }, + { + "epoch": 0.6989720998531571, + "grad_norm": 60.676177978515625, + "learning_rate": 1.2722934197929802e-07, + "logits/chosen": -0.6468064785003662, + "logits/rejected": -0.6170526742935181, + "logps/chosen": -64.93257141113281, + "logps/ref_chosen": -42.949378967285156, + "logps/ref_rejected": -73.71023559570312, + "logps/rejected": -126.3614501953125, + "loss": 0.506, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17712682485580444, + "margin_dpo/beta_margin_grad_std": 0.22809603810310364, + "margin_dpo/beta_margin_mean": 3.066802501678467, + "margin_dpo/loss_margin_mean": 30.668025970458984, + "margin_dpo/margin_mean": 30.668025970458984, + "margin_dpo/margin_std": 26.836669921875, + "step": 476 + }, + { + "epoch": 0.7004405286343612, + "grad_norm": 82.7061538696289, + "learning_rate": 1.2611303872132631e-07, + "logits/chosen": -0.6342014074325562, + "logits/rejected": -0.5668247938156128, + "logps/chosen": -96.3890151977539, + "logps/ref_chosen": -70.77261352539062, + "logps/ref_rejected": -76.13737487792969, + "logps/rejected": -133.8131866455078, + "loss": 0.6051, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15660977363586426, + "margin_dpo/beta_margin_grad_std": 0.24074432253837585, + "margin_dpo/beta_margin_mean": 3.2059414386749268, + "margin_dpo/loss_margin_mean": 32.05941390991211, + "margin_dpo/margin_mean": 32.05941390991211, + "margin_dpo/margin_std": 27.974023818969727, + "step": 477 + }, + { + "epoch": 0.7019089574155654, + "grad_norm": 48.793907165527344, + "learning_rate": 1.2500000000000005e-07, + "logits/chosen": -0.6003127098083496, + "logits/rejected": -0.5863485336303711, + "logps/chosen": -61.834197998046875, + "logps/ref_chosen": -41.440513610839844, + "logps/ref_rejected": -85.36196899414062, + "logps/rejected": -140.44876098632812, + "loss": 0.3975, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14744707942008972, + "margin_dpo/beta_margin_grad_std": 0.1875038743019104, + "margin_dpo/beta_margin_mean": 3.469311237335205, + "margin_dpo/loss_margin_mean": 34.693111419677734, + "margin_dpo/margin_mean": 34.693111419677734, + "margin_dpo/margin_std": 29.18410873413086, + "step": 478 + }, + { + "epoch": 0.7033773861967695, + "grad_norm": 57.176082611083984, + "learning_rate": 1.2389025514492456e-07, + "logits/chosen": -0.5899140238761902, + "logits/rejected": -0.5823123455047607, + "logps/chosen": -79.59027099609375, + "logps/ref_chosen": -53.907920837402344, + "logps/ref_rejected": -95.1163330078125, + "logps/rejected": -151.4071044921875, + "loss": 0.4438, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1582869589328766, + "margin_dpo/beta_margin_grad_std": 0.21056291460990906, + "margin_dpo/beta_margin_mean": 3.0608415603637695, + "margin_dpo/loss_margin_mean": 30.608417510986328, + "margin_dpo/margin_mean": 30.608415603637695, + "margin_dpo/margin_std": 22.303516387939453, + "step": 479 + }, + { + "epoch": 0.7048458149779736, + "grad_norm": 74.97010040283203, + "learning_rate": 1.227838333989088e-07, + "logits/chosen": -0.5865793824195862, + "logits/rejected": -0.5290813446044922, + "logps/chosen": -85.32308959960938, + "logps/ref_chosen": -58.682701110839844, + "logps/ref_rejected": -82.93248748779297, + "logps/rejected": -146.00466918945312, + "loss": 0.5139, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15616512298583984, + "margin_dpo/beta_margin_grad_std": 0.231448233127594, + "margin_dpo/beta_margin_mean": 3.643179416656494, + "margin_dpo/loss_margin_mean": 36.431793212890625, + "margin_dpo/margin_mean": 36.431793212890625, + "margin_dpo/margin_std": 32.21718978881836, + "step": 480 + }, + { + "epoch": 0.7063142437591777, + "grad_norm": 53.85762023925781, + "learning_rate": 1.2168076391719489e-07, + "logits/chosen": -0.6352800130844116, + "logits/rejected": -0.6003815531730652, + "logps/chosen": -80.35116577148438, + "logps/ref_chosen": -54.964271545410156, + "logps/ref_rejected": -92.42044067382812, + "logps/rejected": -152.66683959960938, + "loss": 0.4429, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14123259484767914, + "margin_dpo/beta_margin_grad_std": 0.2171190083026886, + "margin_dpo/beta_margin_mean": 3.485950469970703, + "margin_dpo/loss_margin_mean": 34.85950469970703, + "margin_dpo/margin_mean": 34.85950469970703, + "margin_dpo/margin_std": 26.85974884033203, + "step": 481 + }, + { + "epoch": 0.7077826725403817, + "grad_norm": 56.575809478759766, + "learning_rate": 1.2058107576668938e-07, + "logits/chosen": -0.5973387956619263, + "logits/rejected": -0.56673264503479, + "logps/chosen": -90.35714721679688, + "logps/ref_chosen": -67.55347442626953, + "logps/ref_rejected": -87.58953857421875, + "logps/rejected": -140.64810180664062, + "loss": 0.4347, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16674280166625977, + "margin_dpo/beta_margin_grad_std": 0.18362513184547424, + "margin_dpo/beta_margin_mean": 3.0254898071289062, + "margin_dpo/loss_margin_mean": 30.254898071289062, + "margin_dpo/margin_mean": 30.254898071289062, + "margin_dpo/margin_std": 26.179527282714844, + "step": 482 + }, + { + "epoch": 0.7092511013215859, + "grad_norm": 71.42662048339844, + "learning_rate": 1.194847979251979e-07, + "logits/chosen": -0.5927727222442627, + "logits/rejected": -0.5307378768920898, + "logps/chosen": -89.05766296386719, + "logps/ref_chosen": -63.32981872558594, + "logps/ref_rejected": -95.78697204589844, + "logps/rejected": -157.198974609375, + "loss": 0.3982, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1320417821407318, + "margin_dpo/beta_margin_grad_std": 0.2117423117160797, + "margin_dpo/beta_margin_mean": 3.5684163570404053, + "margin_dpo/loss_margin_mean": 35.68416213989258, + "margin_dpo/margin_mean": 35.68416213989258, + "margin_dpo/margin_std": 27.353343963623047, + "step": 483 + }, + { + "epoch": 0.71071953010279, + "grad_norm": 52.49878692626953, + "learning_rate": 1.1839195928066101e-07, + "logits/chosen": -0.644550085067749, + "logits/rejected": -0.6064622402191162, + "logps/chosen": -81.20121002197266, + "logps/ref_chosen": -59.13812255859375, + "logps/ref_rejected": -84.37144470214844, + "logps/rejected": -142.26454162597656, + "loss": 0.3632, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14210814237594604, + "margin_dpo/beta_margin_grad_std": 0.1774926483631134, + "margin_dpo/beta_margin_mean": 3.583000659942627, + "margin_dpo/loss_margin_mean": 35.83000564575195, + "margin_dpo/margin_mean": 35.83000564575195, + "margin_dpo/margin_std": 29.71619415283203, + "step": 484 + }, + { + "epoch": 0.7121879588839941, + "grad_norm": 51.49626922607422, + "learning_rate": 1.1730258863039347e-07, + "logits/chosen": -0.5840227603912354, + "logits/rejected": -0.5531511306762695, + "logps/chosen": -77.98756408691406, + "logps/ref_chosen": -58.849571228027344, + "logps/ref_rejected": -103.36408996582031, + "logps/rejected": -163.02682495117188, + "loss": 0.4151, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14008283615112305, + "margin_dpo/beta_margin_grad_std": 0.21648435294628143, + "margin_dpo/beta_margin_mean": 4.052473545074463, + "margin_dpo/loss_margin_mean": 40.52473449707031, + "margin_dpo/margin_mean": 40.52473449707031, + "margin_dpo/margin_std": 32.42699432373047, + "step": 485 + }, + { + "epoch": 0.7136563876651982, + "grad_norm": 67.00348663330078, + "learning_rate": 1.1621671468032493e-07, + "logits/chosen": -0.6258925199508667, + "logits/rejected": -0.5719567537307739, + "logps/chosen": -78.4237060546875, + "logps/ref_chosen": -55.25966262817383, + "logps/ref_rejected": -92.13936614990234, + "logps/rejected": -154.44952392578125, + "loss": 0.4209, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14244696497917175, + "margin_dpo/beta_margin_grad_std": 0.21729934215545654, + "margin_dpo/beta_margin_mean": 3.9146108627319336, + "margin_dpo/loss_margin_mean": 39.1461067199707, + "margin_dpo/margin_mean": 39.1461067199707, + "margin_dpo/margin_std": 30.911312103271484, + "step": 486 + }, + { + "epoch": 0.7151248164464024, + "grad_norm": 57.11901092529297, + "learning_rate": 1.1513436604424378e-07, + "logits/chosen": -0.638907790184021, + "logits/rejected": -0.6043581962585449, + "logps/chosen": -75.46080017089844, + "logps/ref_chosen": -53.06330871582031, + "logps/ref_rejected": -92.4188232421875, + "logps/rejected": -152.25311279296875, + "loss": 0.3229, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12441418319940567, + "margin_dpo/beta_margin_grad_std": 0.16795003414154053, + "margin_dpo/beta_margin_mean": 3.743680477142334, + "margin_dpo/loss_margin_mean": 37.436805725097656, + "margin_dpo/margin_mean": 37.436805725097656, + "margin_dpo/margin_std": 26.35199737548828, + "step": 487 + }, + { + "epoch": 0.7165932452276065, + "grad_norm": 33.266990661621094, + "learning_rate": 1.1405557124304335e-07, + "logits/chosen": -0.591684877872467, + "logits/rejected": -0.5600037574768066, + "logps/chosen": -73.20890808105469, + "logps/ref_chosen": -52.228153228759766, + "logps/ref_rejected": -84.00656127929688, + "logps/rejected": -137.1870880126953, + "loss": 0.282, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11648823320865631, + "margin_dpo/beta_margin_grad_std": 0.14420145750045776, + "margin_dpo/beta_margin_mean": 3.219977378845215, + "margin_dpo/loss_margin_mean": 32.199771881103516, + "margin_dpo/margin_mean": 32.199771881103516, + "margin_dpo/margin_std": 21.357444763183594, + "step": 488 + }, + { + "epoch": 0.7180616740088106, + "grad_norm": 55.21537780761719, + "learning_rate": 1.1298035870396985e-07, + "logits/chosen": -0.6172722578048706, + "logits/rejected": -0.5707902908325195, + "logps/chosen": -78.21434020996094, + "logps/ref_chosen": -55.989627838134766, + "logps/ref_rejected": -79.39813232421875, + "logps/rejected": -133.66220092773438, + "loss": 0.4375, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1609182357788086, + "margin_dpo/beta_margin_grad_std": 0.20739710330963135, + "margin_dpo/beta_margin_mean": 3.2039356231689453, + "margin_dpo/loss_margin_mean": 32.03935623168945, + "margin_dpo/margin_mean": 32.03936004638672, + "margin_dpo/margin_std": 27.642593383789062, + "step": 489 + }, + { + "epoch": 0.7195301027900147, + "grad_norm": 69.54042053222656, + "learning_rate": 1.1190875675987355e-07, + "logits/chosen": -0.6238210201263428, + "logits/rejected": -0.6131519079208374, + "logps/chosen": -73.09518432617188, + "logps/ref_chosen": -52.36639404296875, + "logps/ref_rejected": -110.40904998779297, + "logps/rejected": -162.64816284179688, + "loss": 0.5726, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18633374571800232, + "margin_dpo/beta_margin_grad_std": 0.23432117700576782, + "margin_dpo/beta_margin_mean": 3.151031017303467, + "margin_dpo/loss_margin_mean": 31.510311126708984, + "margin_dpo/margin_mean": 31.510311126708984, + "margin_dpo/margin_std": 29.851600646972656, + "step": 490 + }, + { + "epoch": 0.7209985315712188, + "grad_norm": 71.87732696533203, + "learning_rate": 1.1084079364846241e-07, + "logits/chosen": -0.5947495698928833, + "logits/rejected": -0.5508404970169067, + "logps/chosen": -83.43572998046875, + "logps/ref_chosen": -60.11626434326172, + "logps/ref_rejected": -73.27278900146484, + "logps/rejected": -124.82978820800781, + "loss": 0.5807, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18778780102729797, + "margin_dpo/beta_margin_grad_std": 0.22901329398155212, + "margin_dpo/beta_margin_mean": 2.8237528800964355, + "margin_dpo/loss_margin_mean": 28.237525939941406, + "margin_dpo/margin_mean": 28.237525939941406, + "margin_dpo/margin_std": 27.937530517578125, + "step": 491 + }, + { + "epoch": 0.7224669603524229, + "grad_norm": 113.52478790283203, + "learning_rate": 1.097764975115576e-07, + "logits/chosen": -0.6196011304855347, + "logits/rejected": -0.5757460594177246, + "logps/chosen": -77.80059814453125, + "logps/ref_chosen": -53.99418258666992, + "logps/ref_rejected": -72.65962219238281, + "logps/rejected": -122.739990234375, + "loss": 0.9513, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.23549211025238037, + "margin_dpo/beta_margin_grad_std": 0.311998188495636, + "margin_dpo/beta_margin_mean": 2.6273956298828125, + "margin_dpo/loss_margin_mean": 26.273958206176758, + "margin_dpo/margin_mean": 26.273958206176758, + "margin_dpo/margin_std": 30.309785842895508, + "step": 492 + }, + { + "epoch": 0.723935389133627, + "grad_norm": 57.65090560913086, + "learning_rate": 1.0871589639435203e-07, + "logits/chosen": -0.6554695963859558, + "logits/rejected": -0.5956501960754395, + "logps/chosen": -95.95299530029297, + "logps/ref_chosen": -75.49723815917969, + "logps/ref_rejected": -87.32301330566406, + "logps/rejected": -140.97837829589844, + "loss": 0.4676, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1496451199054718, + "margin_dpo/beta_margin_grad_std": 0.22746598720550537, + "margin_dpo/beta_margin_mean": 3.319960117340088, + "margin_dpo/loss_margin_mean": 33.19960021972656, + "margin_dpo/margin_mean": 33.19960021972656, + "margin_dpo/margin_std": 26.605464935302734, + "step": 493 + }, + { + "epoch": 0.7254038179148311, + "grad_norm": 106.5522232055664, + "learning_rate": 1.0765901824467166e-07, + "logits/chosen": -0.5855438709259033, + "logits/rejected": -0.5765562653541565, + "logps/chosen": -63.644134521484375, + "logps/ref_chosen": -41.35926818847656, + "logps/ref_rejected": -86.09136962890625, + "logps/rejected": -144.02566528320312, + "loss": 0.5137, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1603744924068451, + "margin_dpo/beta_margin_grad_std": 0.23930124938488007, + "margin_dpo/beta_margin_mean": 3.5649423599243164, + "margin_dpo/loss_margin_mean": 35.64942169189453, + "margin_dpo/margin_mean": 35.64942169189453, + "margin_dpo/margin_std": 29.729022979736328, + "step": 494 + }, + { + "epoch": 0.7268722466960352, + "grad_norm": 68.87841033935547, + "learning_rate": 1.0660589091223854e-07, + "logits/chosen": -0.6375582218170166, + "logits/rejected": -0.5974992513656616, + "logps/chosen": -85.37813568115234, + "logps/ref_chosen": -63.53507995605469, + "logps/ref_rejected": -91.42443084716797, + "logps/rejected": -145.79571533203125, + "loss": 0.5198, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15870174765586853, + "margin_dpo/beta_margin_grad_std": 0.22453130781650543, + "margin_dpo/beta_margin_mean": 3.2528228759765625, + "margin_dpo/loss_margin_mean": 32.528228759765625, + "margin_dpo/margin_mean": 32.52822494506836, + "margin_dpo/margin_std": 27.80425262451172, + "step": 495 + }, + { + "epoch": 0.7283406754772394, + "grad_norm": 67.1712417602539, + "learning_rate": 1.0555654214793722e-07, + "logits/chosen": -0.6405035257339478, + "logits/rejected": -0.583281397819519, + "logps/chosen": -97.08042907714844, + "logps/ref_chosen": -72.59192657470703, + "logps/ref_rejected": -84.32933807373047, + "logps/rejected": -137.63818359375, + "loss": 0.538, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17619186639785767, + "margin_dpo/beta_margin_grad_std": 0.22553400695323944, + "margin_dpo/beta_margin_mean": 2.882033348083496, + "margin_dpo/loss_margin_mean": 28.820335388183594, + "margin_dpo/margin_mean": 28.82033348083496, + "margin_dpo/margin_std": 26.039134979248047, + "step": 496 + }, + { + "epoch": 0.7298091042584435, + "grad_norm": 76.36552429199219, + "learning_rate": 1.0451099960308374e-07, + "logits/chosen": -0.6118708848953247, + "logits/rejected": -0.5635442733764648, + "logps/chosen": -84.1969985961914, + "logps/ref_chosen": -58.593971252441406, + "logps/ref_rejected": -76.28836822509766, + "logps/rejected": -130.5543670654297, + "loss": 0.6066, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20490401983261108, + "margin_dpo/beta_margin_grad_std": 0.22362196445465088, + "margin_dpo/beta_margin_mean": 2.8662962913513184, + "margin_dpo/loss_margin_mean": 28.6629638671875, + "margin_dpo/margin_mean": 28.6629638671875, + "margin_dpo/margin_std": 27.733150482177734, + "step": 497 + }, + { + "epoch": 0.7312775330396476, + "grad_norm": 86.41475677490234, + "learning_rate": 1.0346929082869641e-07, + "logits/chosen": -0.617120623588562, + "logits/rejected": -0.5855381488800049, + "logps/chosen": -95.82791137695312, + "logps/ref_chosen": -71.20565795898438, + "logps/ref_rejected": -83.95803833007812, + "logps/rejected": -139.6630859375, + "loss": 0.5315, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1733829826116562, + "margin_dpo/beta_margin_grad_std": 0.23494428396224976, + "margin_dpo/beta_margin_mean": 3.1082797050476074, + "margin_dpo/loss_margin_mean": 31.082794189453125, + "margin_dpo/margin_mean": 31.082794189453125, + "margin_dpo/margin_std": 28.04306411743164, + "step": 498 + }, + { + "epoch": 0.7327459618208517, + "grad_norm": 80.22297668457031, + "learning_rate": 1.0243144327477013e-07, + "logits/chosen": -0.6173365116119385, + "logits/rejected": -0.6094462275505066, + "logps/chosen": -74.71839904785156, + "logps/ref_chosen": -51.25519561767578, + "logps/ref_rejected": -101.07870483398438, + "logps/rejected": -156.43063354492188, + "loss": 0.6745, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17869053781032562, + "margin_dpo/beta_margin_grad_std": 0.25083693861961365, + "margin_dpo/beta_margin_mean": 3.1888723373413086, + "margin_dpo/loss_margin_mean": 31.888721466064453, + "margin_dpo/margin_mean": 31.888721466064453, + "margin_dpo/margin_std": 30.46820831298828, + "step": 499 + }, + { + "epoch": 0.7342143906020558, + "grad_norm": 44.01335144042969, + "learning_rate": 1.0139748428955333e-07, + "logits/chosen": -0.5734531879425049, + "logits/rejected": -0.5571717023849487, + "logps/chosen": -82.99430847167969, + "logps/ref_chosen": -57.027442932128906, + "logps/ref_rejected": -93.93421173095703, + "logps/rejected": -153.8037109375, + "loss": 0.38, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1365164965391159, + "margin_dpo/beta_margin_grad_std": 0.1890600323677063, + "margin_dpo/beta_margin_mean": 3.390263557434082, + "margin_dpo/loss_margin_mean": 33.90263366699219, + "margin_dpo/margin_mean": 33.90263366699219, + "margin_dpo/margin_std": 29.03835678100586, + "step": 500 + }, + { + "epoch": 0.7342143906020558, + "eval_logits/chosen": -0.6236123442649841, + "eval_logits/rejected": -0.5976437926292419, + "eval_logps/chosen": -106.9358139038086, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -136.5065155029297, + "eval_loss": 0.40981218218803406, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.25789907574653625, + "eval_margin_dpo/beta_margin_grad_std": 0.2560845613479614, + "eval_margin_dpo/beta_margin_mean": 2.1823792457580566, + "eval_margin_dpo/loss_margin_mean": 21.823793411254883, + "eval_margin_dpo/margin_mean": 21.823793411254883, + "eval_margin_dpo/margin_std": 26.597421646118164, + "eval_runtime": 39.8891, + "eval_samples_per_second": 58.638, + "eval_steps_per_second": 1.855, + "step": 500 + }, + { + "epoch": 0.73568281938326, + "grad_norm": 51.43994140625, + "learning_rate": 1.0036744111882672e-07, + "logits/chosen": -0.6275873184204102, + "logits/rejected": -0.5868571996688843, + "logps/chosen": -76.56288146972656, + "logps/ref_chosen": -54.359527587890625, + "logps/ref_rejected": -80.15670776367188, + "logps/rejected": -140.09364318847656, + "loss": 0.3432, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1260565221309662, + "margin_dpo/beta_margin_grad_std": 0.185085728764534, + "margin_dpo/beta_margin_mean": 3.7733588218688965, + "margin_dpo/loss_margin_mean": 37.733585357666016, + "margin_dpo/margin_mean": 37.733585357666016, + "margin_dpo/margin_std": 29.447450637817383, + "step": 501 + }, + { + "epoch": 0.737151248164464, + "grad_norm": 45.80177688598633, + "learning_rate": 9.934134090518592e-08, + "logits/chosen": -0.6037914752960205, + "logits/rejected": -0.542682945728302, + "logps/chosen": -90.61296844482422, + "logps/ref_chosen": -67.60050964355469, + "logps/ref_rejected": -82.94876098632812, + "logps/rejected": -139.87281799316406, + "loss": 0.3237, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1189626082777977, + "margin_dpo/beta_margin_grad_std": 0.18001875281333923, + "margin_dpo/beta_margin_mean": 3.3911592960357666, + "margin_dpo/loss_margin_mean": 33.911590576171875, + "margin_dpo/margin_mean": 33.911590576171875, + "margin_dpo/margin_std": 20.9443416595459, + "step": 502 + }, + { + "epoch": 0.7386196769456681, + "grad_norm": 54.339813232421875, + "learning_rate": 9.831921068732571e-08, + "logits/chosen": -0.5617387294769287, + "logits/rejected": -0.5207287073135376, + "logps/chosen": -76.20591735839844, + "logps/ref_chosen": -55.078407287597656, + "logps/ref_rejected": -82.50544738769531, + "logps/rejected": -137.9188690185547, + "loss": 0.4001, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13650323450565338, + "margin_dpo/beta_margin_grad_std": 0.21414102613925934, + "margin_dpo/beta_margin_mean": 3.428591251373291, + "margin_dpo/loss_margin_mean": 34.285911560058594, + "margin_dpo/margin_mean": 34.285911560058594, + "margin_dpo/margin_std": 23.53387451171875, + "step": 503 + }, + { + "epoch": 0.7400881057268722, + "grad_norm": 36.07522964477539, + "learning_rate": 9.730107739932805e-08, + "logits/chosen": -0.6169658899307251, + "logits/rejected": -0.5972700119018555, + "logps/chosen": -79.52164459228516, + "logps/ref_chosen": -59.96575164794922, + "logps/ref_rejected": -103.76213073730469, + "logps/rejected": -163.46487426757812, + "loss": 0.2426, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.0935632586479187, + "margin_dpo/beta_margin_grad_std": 0.1528908908367157, + "margin_dpo/beta_margin_mean": 4.014684677124023, + "margin_dpo/loss_margin_mean": 40.1468505859375, + "margin_dpo/margin_mean": 40.146846771240234, + "margin_dpo/margin_std": 26.04753875732422, + "step": 504 + }, + { + "epoch": 0.7415565345080763, + "grad_norm": 81.94815063476562, + "learning_rate": 9.628696786995188e-08, + "logits/chosen": -0.6530240774154663, + "logits/rejected": -0.6016232967376709, + "logps/chosen": -101.87925720214844, + "logps/ref_chosen": -76.1549072265625, + "logps/ref_rejected": -88.58537292480469, + "logps/rejected": -142.79168701171875, + "loss": 0.6762, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.2050047218799591, + "margin_dpo/beta_margin_grad_std": 0.24811255931854248, + "margin_dpo/beta_margin_mean": 2.8481969833374023, + "margin_dpo/loss_margin_mean": 28.481971740722656, + "margin_dpo/margin_mean": 28.481969833374023, + "margin_dpo/margin_std": 29.522705078125, + "step": 505 + }, + { + "epoch": 0.7430249632892805, + "grad_norm": 66.44819641113281, + "learning_rate": 9.527690882192635e-08, + "logits/chosen": -0.615195631980896, + "logits/rejected": -0.579567551612854, + "logps/chosen": -71.32270812988281, + "logps/ref_chosen": -48.96050262451172, + "logps/ref_rejected": -78.41505432128906, + "logps/rejected": -134.9954376220703, + "loss": 0.448, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15291953086853027, + "margin_dpo/beta_margin_grad_std": 0.2167114019393921, + "margin_dpo/beta_margin_mean": 3.421818733215332, + "margin_dpo/loss_margin_mean": 34.21818542480469, + "margin_dpo/margin_mean": 34.21818542480469, + "margin_dpo/margin_std": 29.17880630493164, + "step": 506 + }, + { + "epoch": 0.7444933920704846, + "grad_norm": 52.085636138916016, + "learning_rate": 9.427092687124691e-08, + "logits/chosen": -0.6226514577865601, + "logits/rejected": -0.5839424133300781, + "logps/chosen": -90.81254577636719, + "logps/ref_chosen": -66.80150604248047, + "logps/ref_rejected": -95.37289428710938, + "logps/rejected": -151.70751953125, + "loss": 0.3354, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1319025158882141, + "margin_dpo/beta_margin_grad_std": 0.17619000375270844, + "margin_dpo/beta_margin_mean": 3.2323567867279053, + "margin_dpo/loss_margin_mean": 32.32356643676758, + "margin_dpo/margin_mean": 32.32356262207031, + "margin_dpo/margin_std": 22.592437744140625, + "step": 507 + }, + { + "epoch": 0.7459618208516887, + "grad_norm": 64.97161865234375, + "learning_rate": 9.326904852647344e-08, + "logits/chosen": -0.6235086917877197, + "logits/rejected": -0.5866918563842773, + "logps/chosen": -93.52983093261719, + "logps/ref_chosen": -71.303466796875, + "logps/ref_rejected": -95.6275405883789, + "logps/rejected": -150.2212371826172, + "loss": 0.5048, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17342665791511536, + "margin_dpo/beta_margin_grad_std": 0.22466185688972473, + "margin_dpo/beta_margin_mean": 3.2367329597473145, + "margin_dpo/loss_margin_mean": 32.36732864379883, + "margin_dpo/margin_mean": 32.36732864379883, + "margin_dpo/margin_std": 28.795747756958008, + "step": 508 + }, + { + "epoch": 0.7474302496328928, + "grad_norm": 71.68479919433594, + "learning_rate": 9.227130018803195e-08, + "logits/chosen": -0.6284923553466797, + "logits/rejected": -0.5883047580718994, + "logps/chosen": -86.16942596435547, + "logps/ref_chosen": -63.81895065307617, + "logps/ref_rejected": -83.25643920898438, + "logps/rejected": -138.71392822265625, + "loss": 0.4825, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1474766582250595, + "margin_dpo/beta_margin_grad_std": 0.23035329580307007, + "margin_dpo/beta_margin_mean": 3.3107001781463623, + "margin_dpo/loss_margin_mean": 33.10700225830078, + "margin_dpo/margin_mean": 33.10700225830078, + "margin_dpo/margin_std": 26.037105560302734, + "step": 509 + }, + { + "epoch": 0.748898678414097, + "grad_norm": 57.0163459777832, + "learning_rate": 9.127770814751932e-08, + "logits/chosen": -0.5673776865005493, + "logits/rejected": -0.551094114780426, + "logps/chosen": -79.0858154296875, + "logps/ref_chosen": -51.878448486328125, + "logps/ref_rejected": -102.7651596069336, + "logps/rejected": -170.52944946289062, + "loss": 0.4056, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14309245347976685, + "margin_dpo/beta_margin_grad_std": 0.21340487897396088, + "margin_dpo/beta_margin_mean": 4.055694580078125, + "margin_dpo/loss_margin_mean": 40.556941986083984, + "margin_dpo/margin_mean": 40.556941986083984, + "margin_dpo/margin_std": 32.51176452636719, + "step": 510 + }, + { + "epoch": 0.750367107195301, + "grad_norm": 55.767330169677734, + "learning_rate": 9.028829858700973e-08, + "logits/chosen": -0.6472057104110718, + "logits/rejected": -0.6147615909576416, + "logps/chosen": -82.40501403808594, + "logps/ref_chosen": -60.23811340332031, + "logps/ref_rejected": -92.85676574707031, + "logps/rejected": -151.13473510742188, + "loss": 0.4616, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1584860384464264, + "margin_dpo/beta_margin_grad_std": 0.22719457745552063, + "margin_dpo/beta_margin_mean": 3.6111063957214355, + "margin_dpo/loss_margin_mean": 36.11106491088867, + "margin_dpo/margin_mean": 36.11106491088867, + "margin_dpo/margin_std": 29.813339233398438, + "step": 511 + }, + { + "epoch": 0.7518355359765051, + "grad_norm": 45.10017776489258, + "learning_rate": 8.930309757836516e-08, + "logits/chosen": -0.6407291889190674, + "logits/rejected": -0.6046779155731201, + "logps/chosen": -80.43107604980469, + "logps/ref_chosen": -54.905494689941406, + "logps/ref_rejected": -81.87586975097656, + "logps/rejected": -142.87881469726562, + "loss": 0.3367, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.132303386926651, + "margin_dpo/beta_margin_grad_std": 0.1747945249080658, + "margin_dpo/beta_margin_mean": 3.547736644744873, + "margin_dpo/loss_margin_mean": 35.47736358642578, + "margin_dpo/margin_mean": 35.47736740112305, + "margin_dpo/margin_std": 27.888330459594727, + "step": 512 + }, + { + "epoch": 0.7533039647577092, + "grad_norm": 74.60807800292969, + "learning_rate": 8.832213108254863e-08, + "logits/chosen": -0.6363452672958374, + "logits/rejected": -0.5841466188430786, + "logps/chosen": -89.99166870117188, + "logps/ref_chosen": -64.91644287109375, + "logps/ref_rejected": -76.06245422363281, + "logps/rejected": -131.63497924804688, + "loss": 0.5735, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16800448298454285, + "margin_dpo/beta_margin_grad_std": 0.2321883887052536, + "margin_dpo/beta_margin_mean": 3.049729347229004, + "margin_dpo/loss_margin_mean": 30.49729347229004, + "margin_dpo/margin_mean": 30.497295379638672, + "margin_dpo/margin_std": 25.966400146484375, + "step": 513 + }, + { + "epoch": 0.7547723935389133, + "grad_norm": 72.9621810913086, + "learning_rate": 8.734542494893954e-08, + "logits/chosen": -0.6219351291656494, + "logits/rejected": -0.5745389461517334, + "logps/chosen": -100.20359802246094, + "logps/ref_chosen": -74.22957611083984, + "logps/ref_rejected": -78.945556640625, + "logps/rejected": -135.80770874023438, + "loss": 0.7095, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.21395042538642883, + "margin_dpo/beta_margin_grad_std": 0.27207687497138977, + "margin_dpo/beta_margin_mean": 3.0888137817382812, + "margin_dpo/loss_margin_mean": 30.88813591003418, + "margin_dpo/margin_mean": 30.888137817382812, + "margin_dpo/margin_std": 33.99193572998047, + "step": 514 + }, + { + "epoch": 0.7562408223201175, + "grad_norm": 49.74563980102539, + "learning_rate": 8.637300491465272e-08, + "logits/chosen": -0.6339064836502075, + "logits/rejected": -0.6179243326187134, + "logps/chosen": -73.3636703491211, + "logps/ref_chosen": -50.40156555175781, + "logps/ref_rejected": -87.09774780273438, + "logps/rejected": -143.69723510742188, + "loss": 0.377, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14515420794487, + "margin_dpo/beta_margin_grad_std": 0.17126330733299255, + "margin_dpo/beta_margin_mean": 3.363739490509033, + "margin_dpo/loss_margin_mean": 33.63739776611328, + "margin_dpo/margin_mean": 33.63739776611328, + "margin_dpo/margin_std": 27.569812774658203, + "step": 515 + }, + { + "epoch": 0.7577092511013216, + "grad_norm": 51.14539337158203, + "learning_rate": 8.540489660386064e-08, + "logits/chosen": -0.642087459564209, + "logits/rejected": -0.6219902038574219, + "logps/chosen": -87.75926208496094, + "logps/ref_chosen": -64.6495590209961, + "logps/ref_rejected": -111.72238159179688, + "logps/rejected": -170.44625854492188, + "loss": 0.3628, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13847823441028595, + "margin_dpo/beta_margin_grad_std": 0.18551796674728394, + "margin_dpo/beta_margin_mean": 3.5614166259765625, + "margin_dpo/loss_margin_mean": 35.614166259765625, + "margin_dpo/margin_mean": 35.614166259765625, + "margin_dpo/margin_std": 28.459064483642578, + "step": 516 + }, + { + "epoch": 0.7591776798825257, + "grad_norm": 49.0867805480957, + "learning_rate": 8.444112552711752e-08, + "logits/chosen": -0.6216846704483032, + "logits/rejected": -0.5751929879188538, + "logps/chosen": -86.8254623413086, + "logps/ref_chosen": -60.913551330566406, + "logps/ref_rejected": -89.08308410644531, + "logps/rejected": -150.07212829589844, + "loss": 0.3873, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12766914069652557, + "margin_dpo/beta_margin_grad_std": 0.20745864510536194, + "margin_dpo/beta_margin_mean": 3.5077133178710938, + "margin_dpo/loss_margin_mean": 35.07713317871094, + "margin_dpo/margin_mean": 35.07713317871094, + "margin_dpo/margin_std": 25.649383544921875, + "step": 517 + }, + { + "epoch": 0.7606461086637298, + "grad_norm": 52.4042854309082, + "learning_rate": 8.348171708068747e-08, + "logits/chosen": -0.6181496381759644, + "logits/rejected": -0.6017059087753296, + "logps/chosen": -83.06076049804688, + "logps/ref_chosen": -57.45589065551758, + "logps/ref_rejected": -85.31269836425781, + "logps/rejected": -142.11749267578125, + "loss": 0.4583, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15641465783119202, + "margin_dpo/beta_margin_grad_std": 0.2147601693868637, + "margin_dpo/beta_margin_mean": 3.119992733001709, + "margin_dpo/loss_margin_mean": 31.199928283691406, + "margin_dpo/margin_mean": 31.199928283691406, + "margin_dpo/margin_std": 26.132186889648438, + "step": 518 + }, + { + "epoch": 0.762114537444934, + "grad_norm": 62.43038558959961, + "learning_rate": 8.25266965458755e-08, + "logits/chosen": -0.6043561697006226, + "logits/rejected": -0.5701404213905334, + "logps/chosen": -97.01838684082031, + "logps/ref_chosen": -74.06330871582031, + "logps/ref_rejected": -104.44416809082031, + "logps/rejected": -159.92041015625, + "loss": 0.4574, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16313457489013672, + "margin_dpo/beta_margin_grad_std": 0.1897900104522705, + "margin_dpo/beta_margin_mean": 3.2521166801452637, + "margin_dpo/loss_margin_mean": 32.52116394042969, + "margin_dpo/margin_mean": 32.52116775512695, + "margin_dpo/margin_std": 30.377395629882812, + "step": 519 + }, + { + "epoch": 0.7635829662261381, + "grad_norm": 50.88139343261719, + "learning_rate": 8.15760890883607e-08, + "logits/chosen": -0.5501081943511963, + "logits/rejected": -0.5258777141571045, + "logps/chosen": -93.9324951171875, + "logps/ref_chosen": -70.2998275756836, + "logps/ref_rejected": -99.98133850097656, + "logps/rejected": -156.6881103515625, + "loss": 0.3602, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13520291447639465, + "margin_dpo/beta_margin_grad_std": 0.1813124269247055, + "margin_dpo/beta_margin_mean": 3.307410717010498, + "margin_dpo/loss_margin_mean": 33.0741081237793, + "margin_dpo/margin_mean": 33.07410430908203, + "margin_dpo/margin_std": 24.42025375366211, + "step": 520 + }, + { + "epoch": 0.7650513950073421, + "grad_norm": 51.61702346801758, + "learning_rate": 8.062991975753378e-08, + "logits/chosen": -0.6081865429878235, + "logits/rejected": -0.578285276889801, + "logps/chosen": -80.80309295654297, + "logps/ref_chosen": -58.14292907714844, + "logps/ref_rejected": -83.28060913085938, + "logps/rejected": -138.00160217285156, + "loss": 0.4205, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15506964921951294, + "margin_dpo/beta_margin_grad_std": 0.19557394087314606, + "margin_dpo/beta_margin_mean": 3.2060821056365967, + "margin_dpo/loss_margin_mean": 32.060821533203125, + "margin_dpo/margin_mean": 32.060821533203125, + "margin_dpo/margin_std": 26.212413787841797, + "step": 521 + }, + { + "epoch": 0.7665198237885462, + "grad_norm": 49.603553771972656, + "learning_rate": 7.968821348583643e-08, + "logits/chosen": -0.610974133014679, + "logits/rejected": -0.5809307098388672, + "logps/chosen": -70.47454833984375, + "logps/ref_chosen": -46.54766845703125, + "logps/ref_rejected": -66.01388549804688, + "logps/rejected": -118.23016357421875, + "loss": 0.4643, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17351622879505157, + "margin_dpo/beta_margin_grad_std": 0.19399023056030273, + "margin_dpo/beta_margin_mean": 2.8289389610290527, + "margin_dpo/loss_margin_mean": 28.289390563964844, + "margin_dpo/margin_mean": 28.289390563964844, + "margin_dpo/margin_std": 25.933521270751953, + "step": 522 + }, + { + "epoch": 0.7679882525697503, + "grad_norm": 62.4161376953125, + "learning_rate": 7.875099508810484e-08, + "logits/chosen": -0.6249532699584961, + "logits/rejected": -0.5845484733581543, + "logps/chosen": -86.18218994140625, + "logps/ref_chosen": -61.76960372924805, + "logps/ref_rejected": -83.76141357421875, + "logps/rejected": -140.2449951171875, + "loss": 0.5486, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18028101325035095, + "margin_dpo/beta_margin_grad_std": 0.23459823429584503, + "margin_dpo/beta_margin_mean": 3.2071008682250977, + "margin_dpo/loss_margin_mean": 32.071006774902344, + "margin_dpo/margin_mean": 32.071006774902344, + "margin_dpo/margin_std": 29.059005737304688, + "step": 523 + }, + { + "epoch": 0.7694566813509545, + "grad_norm": 61.96669387817383, + "learning_rate": 7.781828926091535e-08, + "logits/chosen": -0.5891939401626587, + "logits/rejected": -0.5522305965423584, + "logps/chosen": -101.28225708007812, + "logps/ref_chosen": -78.0720443725586, + "logps/ref_rejected": -81.30198669433594, + "logps/rejected": -134.13906860351562, + "loss": 0.502, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1610349714756012, + "margin_dpo/beta_margin_grad_std": 0.2138672173023224, + "margin_dpo/beta_margin_mean": 2.962686538696289, + "margin_dpo/loss_margin_mean": 29.62686538696289, + "margin_dpo/margin_mean": 29.62686538696289, + "margin_dpo/margin_std": 24.750131607055664, + "step": 524 + }, + { + "epoch": 0.7709251101321586, + "grad_norm": 38.1711540222168, + "learning_rate": 7.689012058193384e-08, + "logits/chosen": -0.5875400304794312, + "logits/rejected": -0.5771076679229736, + "logps/chosen": -73.10057830810547, + "logps/ref_chosen": -50.827857971191406, + "logps/ref_rejected": -100.05293273925781, + "logps/rejected": -157.6528778076172, + "loss": 0.2666, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10849446058273315, + "margin_dpo/beta_margin_grad_std": 0.14546433091163635, + "margin_dpo/beta_margin_mean": 3.532721996307373, + "margin_dpo/loss_margin_mean": 35.32722091674805, + "margin_dpo/margin_mean": 35.32722091674805, + "margin_dpo/margin_std": 23.876306533813477, + "step": 525 + }, + { + "epoch": 0.7723935389133627, + "grad_norm": 69.49327850341797, + "learning_rate": 7.596651350926836e-08, + "logits/chosen": -0.6228262782096863, + "logits/rejected": -0.5682265162467957, + "logps/chosen": -88.6613540649414, + "logps/ref_chosen": -63.167232513427734, + "logps/ref_rejected": -86.30934143066406, + "logps/rejected": -146.317138671875, + "loss": 0.4345, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14551463723182678, + "margin_dpo/beta_margin_grad_std": 0.21542373299598694, + "margin_dpo/beta_margin_mean": 3.451366901397705, + "margin_dpo/loss_margin_mean": 34.51366424560547, + "margin_dpo/margin_mean": 34.51366424560547, + "margin_dpo/margin_std": 27.885501861572266, + "step": 526 + }, + { + "epoch": 0.7738619676945668, + "grad_norm": 59.04280471801758, + "learning_rate": 7.504749238082414e-08, + "logits/chosen": -0.6852065324783325, + "logits/rejected": -0.6316944360733032, + "logps/chosen": -94.76507568359375, + "logps/ref_chosen": -71.12867736816406, + "logps/ref_rejected": -78.3425521850586, + "logps/rejected": -134.12088012695312, + "loss": 0.4411, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1558818817138672, + "margin_dpo/beta_margin_grad_std": 0.2005808800458908, + "margin_dpo/beta_margin_mean": 3.2141916751861572, + "margin_dpo/loss_margin_mean": 32.14191436767578, + "margin_dpo/margin_mean": 32.14191436767578, + "margin_dpo/margin_std": 28.21198272705078, + "step": 527 + }, + { + "epoch": 0.775330396475771, + "grad_norm": 52.068695068359375, + "learning_rate": 7.413308141366254e-08, + "logits/chosen": -0.6312476396560669, + "logits/rejected": -0.6082254648208618, + "logps/chosen": -91.81694030761719, + "logps/ref_chosen": -68.0894546508789, + "logps/ref_rejected": -93.91006469726562, + "logps/rejected": -148.40286254882812, + "loss": 0.4254, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1584298014640808, + "margin_dpo/beta_margin_grad_std": 0.2020604908466339, + "margin_dpo/beta_margin_mean": 3.0765323638916016, + "margin_dpo/loss_margin_mean": 30.765323638916016, + "margin_dpo/margin_mean": 30.765323638916016, + "margin_dpo/margin_std": 25.180191040039062, + "step": 528 + }, + { + "epoch": 0.7767988252569751, + "grad_norm": 78.19168853759766, + "learning_rate": 7.322330470336313e-08, + "logits/chosen": -0.6202692985534668, + "logits/rejected": -0.5999141931533813, + "logps/chosen": -82.72918701171875, + "logps/ref_chosen": -55.5749626159668, + "logps/ref_rejected": -89.20909118652344, + "logps/rejected": -144.00283813476562, + "loss": 0.7184, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19822926819324493, + "margin_dpo/beta_margin_grad_std": 0.24975398182868958, + "margin_dpo/beta_margin_mean": 2.7639517784118652, + "margin_dpo/loss_margin_mean": 27.63951873779297, + "margin_dpo/margin_mean": 27.63951873779297, + "margin_dpo/margin_std": 28.70267677307129, + "step": 529 + }, + { + "epoch": 0.7782672540381792, + "grad_norm": 62.91647720336914, + "learning_rate": 7.231818622338822e-08, + "logits/chosen": -0.5722482204437256, + "logits/rejected": -0.5528737902641296, + "logps/chosen": -71.75675201416016, + "logps/ref_chosen": -47.601417541503906, + "logps/ref_rejected": -87.2845230102539, + "logps/rejected": -148.30410766601562, + "loss": 0.4394, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13412612676620483, + "margin_dpo/beta_margin_grad_std": 0.2340560257434845, + "margin_dpo/beta_margin_mean": 3.686424732208252, + "margin_dpo/loss_margin_mean": 36.8642463684082, + "margin_dpo/margin_mean": 36.8642463684082, + "margin_dpo/margin_std": 27.135440826416016, + "step": 530 + }, + { + "epoch": 0.7797356828193832, + "grad_norm": 56.84402084350586, + "learning_rate": 7.141774982445147e-08, + "logits/chosen": -0.6246213912963867, + "logits/rejected": -0.583504319190979, + "logps/chosen": -78.43968200683594, + "logps/ref_chosen": -55.246063232421875, + "logps/ref_rejected": -70.60598754882812, + "logps/rejected": -126.05680847167969, + "loss": 0.5239, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16395235061645508, + "margin_dpo/beta_margin_grad_std": 0.2341843992471695, + "margin_dpo/beta_margin_mean": 3.2257208824157715, + "margin_dpo/loss_margin_mean": 32.25720977783203, + "margin_dpo/margin_mean": 32.25720977783203, + "margin_dpo/margin_std": 28.00493049621582, + "step": 531 + }, + { + "epoch": 0.7812041116005873, + "grad_norm": 58.166229248046875, + "learning_rate": 7.052201923388953e-08, + "logits/chosen": -0.5881683826446533, + "logits/rejected": -0.554157018661499, + "logps/chosen": -94.40569305419922, + "logps/ref_chosen": -70.28602600097656, + "logps/ref_rejected": -86.5913314819336, + "logps/rejected": -148.8220672607422, + "loss": 0.3506, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12911692261695862, + "margin_dpo/beta_margin_grad_std": 0.19371378421783447, + "margin_dpo/beta_margin_mean": 3.8111071586608887, + "margin_dpo/loss_margin_mean": 38.11106872558594, + "margin_dpo/margin_mean": 38.11106872558594, + "margin_dpo/margin_std": 27.990556716918945, + "step": 532 + }, + { + "epoch": 0.7826725403817915, + "grad_norm": 70.82559204101562, + "learning_rate": 6.963101805503646e-08, + "logits/chosen": -0.6239089965820312, + "logits/rejected": -0.5815380215644836, + "logps/chosen": -88.10252380371094, + "logps/ref_chosen": -64.8551025390625, + "logps/ref_rejected": -76.58805847167969, + "logps/rejected": -126.4080581665039, + "loss": 0.5933, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18534313142299652, + "margin_dpo/beta_margin_grad_std": 0.23213329911231995, + "margin_dpo/beta_margin_mean": 2.657257080078125, + "margin_dpo/loss_margin_mean": 26.57257080078125, + "margin_dpo/margin_mean": 26.572572708129883, + "margin_dpo/margin_std": 23.991806030273438, + "step": 533 + }, + { + "epoch": 0.7841409691629956, + "grad_norm": 47.19809341430664, + "learning_rate": 6.874476976660184e-08, + "logits/chosen": -0.623961329460144, + "logits/rejected": -0.5935629606246948, + "logps/chosen": -82.6689453125, + "logps/ref_chosen": -60.119388580322266, + "logps/ref_rejected": -78.54347229003906, + "logps/rejected": -133.5894012451172, + "loss": 0.4033, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1534009873867035, + "margin_dpo/beta_margin_grad_std": 0.18615348637104034, + "margin_dpo/beta_margin_mean": 3.2496376037597656, + "margin_dpo/loss_margin_mean": 32.496376037597656, + "margin_dpo/margin_mean": 32.49637222290039, + "margin_dpo/margin_std": 27.735137939453125, + "step": 534 + }, + { + "epoch": 0.7856093979441997, + "grad_norm": 46.1759147644043, + "learning_rate": 6.786329772205246e-08, + "logits/chosen": -0.6095963716506958, + "logits/rejected": -0.5779241919517517, + "logps/chosen": -74.88334655761719, + "logps/ref_chosen": -54.330238342285156, + "logps/ref_rejected": -96.30763244628906, + "logps/rejected": -152.88735961914062, + "loss": 0.3929, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13403823971748352, + "margin_dpo/beta_margin_grad_std": 0.19758474826812744, + "margin_dpo/beta_margin_mean": 3.602663040161133, + "margin_dpo/loss_margin_mean": 36.02663040161133, + "margin_dpo/margin_mean": 36.02663040161133, + "margin_dpo/margin_std": 26.887496948242188, + "step": 535 + }, + { + "epoch": 0.7870778267254038, + "grad_norm": 33.667205810546875, + "learning_rate": 6.698662514899638e-08, + "logits/chosen": -0.6077243089675903, + "logits/rejected": -0.5903106927871704, + "logps/chosen": -67.83413696289062, + "logps/ref_chosen": -47.08053207397461, + "logps/ref_rejected": -89.09783935546875, + "logps/rejected": -150.54098510742188, + "loss": 0.2183, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.0890052318572998, + "margin_dpo/beta_margin_grad_std": 0.13288089632987976, + "margin_dpo/beta_margin_mean": 4.068953990936279, + "margin_dpo/loss_margin_mean": 40.689537048339844, + "margin_dpo/margin_mean": 40.689537048339844, + "margin_dpo/margin_std": 27.558616638183594, + "step": 536 + }, + { + "epoch": 0.788546255506608, + "grad_norm": 60.72896957397461, + "learning_rate": 6.611477514857114e-08, + "logits/chosen": -0.6039552688598633, + "logits/rejected": -0.5421825647354126, + "logps/chosen": -78.5447998046875, + "logps/ref_chosen": -57.747474670410156, + "logps/ref_rejected": -70.43838500976562, + "logps/rejected": -124.99288940429688, + "loss": 0.4139, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1552681028842926, + "margin_dpo/beta_margin_grad_std": 0.19483794271945953, + "margin_dpo/beta_margin_mean": 3.375717878341675, + "margin_dpo/loss_margin_mean": 33.757179260253906, + "margin_dpo/margin_mean": 33.757179260253906, + "margin_dpo/margin_std": 28.151874542236328, + "step": 537 + }, + { + "epoch": 0.7900146842878121, + "grad_norm": 46.434898376464844, + "learning_rate": 6.524777069483525e-08, + "logits/chosen": -0.616761326789856, + "logits/rejected": -0.5684964656829834, + "logps/chosen": -89.30928039550781, + "logps/ref_chosen": -66.41593933105469, + "logps/ref_rejected": -84.22808837890625, + "logps/rejected": -139.8426055908203, + "loss": 0.3541, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12844222784042358, + "margin_dpo/beta_margin_grad_std": 0.16971102356910706, + "margin_dpo/beta_margin_mean": 3.2721188068389893, + "margin_dpo/loss_margin_mean": 32.721187591552734, + "margin_dpo/margin_mean": 32.721187591552734, + "margin_dpo/margin_std": 25.067447662353516, + "step": 538 + }, + { + "epoch": 0.7914831130690162, + "grad_norm": 55.15032196044922, + "learning_rate": 6.438563463416221e-08, + "logits/chosen": -0.6659849882125854, + "logits/rejected": -0.6233581304550171, + "logps/chosen": -79.83650207519531, + "logps/ref_chosen": -58.49285125732422, + "logps/ref_rejected": -91.85395812988281, + "logps/rejected": -144.42047119140625, + "loss": 0.4882, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16426563262939453, + "margin_dpo/beta_margin_grad_std": 0.21655422449111938, + "margin_dpo/beta_margin_mean": 3.122286319732666, + "margin_dpo/loss_margin_mean": 31.22286605834961, + "margin_dpo/margin_mean": 31.22286605834961, + "margin_dpo/margin_std": 27.15618133544922, + "step": 539 + }, + { + "epoch": 0.7929515418502202, + "grad_norm": 62.82166290283203, + "learning_rate": 6.352838968463919e-08, + "logits/chosen": -0.6381834149360657, + "logits/rejected": -0.6113142967224121, + "logps/chosen": -85.19060516357422, + "logps/ref_chosen": -63.482513427734375, + "logps/ref_rejected": -116.43000030517578, + "logps/rejected": -173.4632110595703, + "loss": 0.4628, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13866056501865387, + "margin_dpo/beta_margin_grad_std": 0.2188330590724945, + "margin_dpo/beta_margin_mean": 3.5325119495391846, + "margin_dpo/loss_margin_mean": 35.32511901855469, + "margin_dpo/margin_mean": 35.32511901855469, + "margin_dpo/margin_std": 27.556922912597656, + "step": 540 + }, + { + "epoch": 0.7944199706314243, + "grad_norm": 62.53669738769531, + "learning_rate": 6.267605843546767e-08, + "logits/chosen": -0.6469000577926636, + "logits/rejected": -0.6038193702697754, + "logps/chosen": -101.33219146728516, + "logps/ref_chosen": -78.28035736083984, + "logps/ref_rejected": -103.273681640625, + "logps/rejected": -156.50372314453125, + "loss": 0.4275, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15085425972938538, + "margin_dpo/beta_margin_grad_std": 0.18950016796588898, + "margin_dpo/beta_margin_mean": 3.017820358276367, + "margin_dpo/loss_margin_mean": 30.17820167541504, + "margin_dpo/margin_mean": 30.178203582763672, + "margin_dpo/margin_std": 23.339244842529297, + "step": 541 + }, + { + "epoch": 0.7958883994126285, + "grad_norm": 39.13835144042969, + "learning_rate": 6.182866334636888e-08, + "logits/chosen": -0.6534620523452759, + "logits/rejected": -0.6460641622543335, + "logps/chosen": -80.37567901611328, + "logps/ref_chosen": -57.48497009277344, + "logps/ref_rejected": -96.47506713867188, + "logps/rejected": -153.80548095703125, + "loss": 0.366, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13392959535121918, + "margin_dpo/beta_margin_grad_std": 0.19380900263786316, + "margin_dpo/beta_margin_mean": 3.4439687728881836, + "margin_dpo/loss_margin_mean": 34.43968963623047, + "margin_dpo/margin_mean": 34.43968963623047, + "margin_dpo/margin_std": 25.000656127929688, + "step": 542 + }, + { + "epoch": 0.7973568281938326, + "grad_norm": 80.92756652832031, + "learning_rate": 6.098622674699147e-08, + "logits/chosen": -0.5841265916824341, + "logits/rejected": -0.5707241296768188, + "logps/chosen": -84.22129821777344, + "logps/ref_chosen": -60.61750793457031, + "logps/ref_rejected": -105.59896850585938, + "logps/rejected": -154.79116821289062, + "loss": 0.6059, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19587799906730652, + "margin_dpo/beta_margin_grad_std": 0.22574475407600403, + "margin_dpo/beta_margin_mean": 2.5588417053222656, + "margin_dpo/loss_margin_mean": 25.588415145874023, + "margin_dpo/margin_mean": 25.588417053222656, + "margin_dpo/margin_std": 25.410099029541016, + "step": 543 + }, + { + "epoch": 0.7988252569750367, + "grad_norm": 46.70132064819336, + "learning_rate": 6.01487708363232e-08, + "logits/chosen": -0.6044985055923462, + "logits/rejected": -0.5905438661575317, + "logps/chosen": -85.07525634765625, + "logps/ref_chosen": -59.642303466796875, + "logps/ref_rejected": -100.95469665527344, + "logps/rejected": -159.369873046875, + "loss": 0.3149, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12085522711277008, + "margin_dpo/beta_margin_grad_std": 0.16033101081848145, + "margin_dpo/beta_margin_mean": 3.2982213497161865, + "margin_dpo/loss_margin_mean": 32.98221206665039, + "margin_dpo/margin_mean": 32.98221206665039, + "margin_dpo/margin_std": 24.098819732666016, + "step": 544 + }, + { + "epoch": 0.8002936857562408, + "grad_norm": 49.51677703857422, + "learning_rate": 5.9316317682106294e-08, + "logits/chosen": -0.5612127780914307, + "logits/rejected": -0.5339560508728027, + "logps/chosen": -91.53611755371094, + "logps/ref_chosen": -67.64859771728516, + "logps/ref_rejected": -95.90800476074219, + "logps/rejected": -154.13796997070312, + "loss": 0.3835, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14522799849510193, + "margin_dpo/beta_margin_grad_std": 0.19207137823104858, + "margin_dpo/beta_margin_mean": 3.43424654006958, + "margin_dpo/loss_margin_mean": 34.34246826171875, + "margin_dpo/margin_mean": 34.34246826171875, + "margin_dpo/margin_std": 26.62921142578125, + "step": 545 + }, + { + "epoch": 0.801762114537445, + "grad_norm": 49.70174789428711, + "learning_rate": 5.848888922025552e-08, + "logits/chosen": -0.5628246665000916, + "logits/rejected": -0.5294591188430786, + "logps/chosen": -73.03337097167969, + "logps/ref_chosen": -50.744232177734375, + "logps/ref_rejected": -81.86622619628906, + "logps/rejected": -137.66339111328125, + "loss": 0.3402, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12423272430896759, + "margin_dpo/beta_margin_grad_std": 0.17410895228385925, + "margin_dpo/beta_margin_mean": 3.350802421569824, + "margin_dpo/loss_margin_mean": 33.50802230834961, + "margin_dpo/margin_mean": 33.50802230834961, + "margin_dpo/margin_std": 23.63653564453125, + "step": 546 + }, + { + "epoch": 0.8032305433186491, + "grad_norm": 94.18778228759766, + "learning_rate": 5.7666507254280265e-08, + "logits/chosen": -0.5813489556312561, + "logits/rejected": -0.5452552437782288, + "logps/chosen": -99.18565368652344, + "logps/ref_chosen": -73.6877212524414, + "logps/ref_rejected": -90.76136779785156, + "logps/rejected": -147.3549041748047, + "loss": 0.5868, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18151536583900452, + "margin_dpo/beta_margin_grad_std": 0.23811323940753937, + "margin_dpo/beta_margin_mean": 3.109560966491699, + "margin_dpo/loss_margin_mean": 31.09560775756836, + "margin_dpo/margin_mean": 31.09560775756836, + "margin_dpo/margin_std": 29.87148666381836, + "step": 547 + }, + { + "epoch": 0.8046989720998532, + "grad_norm": 51.33172607421875, + "learning_rate": 5.684919345471029e-08, + "logits/chosen": -0.6642282009124756, + "logits/rejected": -0.6327365040779114, + "logps/chosen": -87.51785278320312, + "logps/ref_chosen": -65.24634552001953, + "logps/ref_rejected": -94.11807250976562, + "logps/rejected": -150.5765380859375, + "loss": 0.4329, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14642292261123657, + "margin_dpo/beta_margin_grad_std": 0.21783213317394257, + "margin_dpo/beta_margin_mean": 3.4186956882476807, + "margin_dpo/loss_margin_mean": 34.186954498291016, + "margin_dpo/margin_mean": 34.186954498291016, + "margin_dpo/margin_std": 28.527481079101562, + "step": 548 + }, + { + "epoch": 0.8061674008810573, + "grad_norm": 59.542022705078125, + "learning_rate": 5.603696935852426e-08, + "logits/chosen": -0.587199866771698, + "logits/rejected": -0.5497395992279053, + "logps/chosen": -70.22129821777344, + "logps/ref_chosen": -49.21235656738281, + "logps/ref_rejected": -73.91031646728516, + "logps/rejected": -129.74290466308594, + "loss": 0.3415, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12509265542030334, + "margin_dpo/beta_margin_grad_std": 0.17894169688224792, + "margin_dpo/beta_margin_mean": 3.4823646545410156, + "margin_dpo/loss_margin_mean": 34.823646545410156, + "margin_dpo/margin_mean": 34.823646545410156, + "margin_dpo/margin_std": 25.530513763427734, + "step": 549 + }, + { + "epoch": 0.8076358296622613, + "grad_norm": 69.98318481445312, + "learning_rate": 5.5229856368582376e-08, + "logits/chosen": -0.5780174732208252, + "logits/rejected": -0.554786741733551, + "logps/chosen": -81.68783569335938, + "logps/ref_chosen": -56.80695343017578, + "logps/ref_rejected": -95.12580871582031, + "logps/rejected": -147.86605834960938, + "loss": 0.512, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18082059919834137, + "margin_dpo/beta_margin_grad_std": 0.22207701206207275, + "margin_dpo/beta_margin_mean": 2.7859373092651367, + "margin_dpo/loss_margin_mean": 27.859371185302734, + "margin_dpo/margin_mean": 27.859375, + "margin_dpo/margin_std": 24.073030471801758, + "step": 550 + }, + { + "epoch": 0.8091042584434655, + "grad_norm": 68.77825164794922, + "learning_rate": 5.4427875753062734e-08, + "logits/chosen": -0.6016166806221008, + "logits/rejected": -0.5792367458343506, + "logps/chosen": -82.6038589477539, + "logps/ref_chosen": -59.10633087158203, + "logps/ref_rejected": -111.67280578613281, + "logps/rejected": -170.34124755859375, + "loss": 0.3618, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13445577025413513, + "margin_dpo/beta_margin_grad_std": 0.18969406187534332, + "margin_dpo/beta_margin_mean": 3.5170915126800537, + "margin_dpo/loss_margin_mean": 35.17091369628906, + "margin_dpo/margin_mean": 35.17091369628906, + "margin_dpo/margin_std": 27.047245025634766, + "step": 551 + }, + { + "epoch": 0.8105726872246696, + "grad_norm": 36.09619903564453, + "learning_rate": 5.363104864490034e-08, + "logits/chosen": -0.6584379076957703, + "logits/rejected": -0.6297129392623901, + "logps/chosen": -82.9939193725586, + "logps/ref_chosen": -62.35459899902344, + "logps/ref_rejected": -104.56210327148438, + "logps/rejected": -164.9587860107422, + "loss": 0.2475, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10201341658830643, + "margin_dpo/beta_margin_grad_std": 0.1296149343252182, + "margin_dpo/beta_margin_mean": 3.975735902786255, + "margin_dpo/loss_margin_mean": 39.757354736328125, + "margin_dpo/margin_mean": 39.757354736328125, + "margin_dpo/margin_std": 30.61846923828125, + "step": 552 + }, + { + "epoch": 0.8120411160058737, + "grad_norm": 62.299354553222656, + "learning_rate": 5.2839396041230415e-08, + "logits/chosen": -0.5835554599761963, + "logits/rejected": -0.5560900568962097, + "logps/chosen": -89.63333129882812, + "logps/ref_chosen": -68.25881958007812, + "logps/ref_rejected": -98.0971450805664, + "logps/rejected": -150.1568603515625, + "loss": 0.4084, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15132924914360046, + "margin_dpo/beta_margin_grad_std": 0.1949571967124939, + "margin_dpo/beta_margin_mean": 3.0685200691223145, + "margin_dpo/loss_margin_mean": 30.685199737548828, + "margin_dpo/margin_mean": 30.685199737548828, + "margin_dpo/margin_std": 24.393556594848633, + "step": 553 + }, + { + "epoch": 0.8135095447870778, + "grad_norm": 70.59496307373047, + "learning_rate": 5.205293880283551e-08, + "logits/chosen": -0.5978009104728699, + "logits/rejected": -0.5454249382019043, + "logps/chosen": -91.25200653076172, + "logps/ref_chosen": -67.94767761230469, + "logps/ref_rejected": -89.78272247314453, + "logps/rejected": -154.95721435546875, + "loss": 0.4373, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12750211358070374, + "margin_dpo/beta_margin_grad_std": 0.22240933775901794, + "margin_dpo/beta_margin_mean": 4.187016487121582, + "margin_dpo/loss_margin_mean": 41.87016677856445, + "margin_dpo/margin_mean": 41.87016677856445, + "margin_dpo/margin_std": 30.95236587524414, + "step": 554 + }, + { + "epoch": 0.8149779735682819, + "grad_norm": 61.74562454223633, + "learning_rate": 5.127169765359515e-08, + "logits/chosen": -0.5948277115821838, + "logits/rejected": -0.5893919467926025, + "logps/chosen": -75.4261245727539, + "logps/ref_chosen": -53.33049011230469, + "logps/ref_rejected": -108.47937774658203, + "logps/rejected": -165.4979248046875, + "loss": 0.4571, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14716145396232605, + "margin_dpo/beta_margin_grad_std": 0.20644359290599823, + "margin_dpo/beta_margin_mean": 3.4922895431518555, + "margin_dpo/loss_margin_mean": 34.92289733886719, + "margin_dpo/margin_mean": 34.92289733886719, + "margin_dpo/margin_std": 27.98041534423828, + "step": 555 + }, + { + "epoch": 0.8164464023494861, + "grad_norm": 72.64017486572266, + "learning_rate": 5.049569317994012e-08, + "logits/chosen": -0.5797896385192871, + "logits/rejected": -0.5396873950958252, + "logps/chosen": -80.73486328125, + "logps/ref_chosen": -58.64447021484375, + "logps/ref_rejected": -101.34040832519531, + "logps/rejected": -154.17111206054688, + "loss": 0.5343, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1822032928466797, + "margin_dpo/beta_margin_grad_std": 0.2356235682964325, + "margin_dpo/beta_margin_mean": 3.074030876159668, + "margin_dpo/loss_margin_mean": 30.740306854248047, + "margin_dpo/margin_mean": 30.740306854248047, + "margin_dpo/margin_std": 27.73691177368164, + "step": 556 + }, + { + "epoch": 0.8179148311306902, + "grad_norm": 52.41410446166992, + "learning_rate": 4.9724945830310144e-08, + "logits/chosen": -0.6446192264556885, + "logits/rejected": -0.6262944936752319, + "logps/chosen": -89.59944152832031, + "logps/ref_chosen": -67.84066009521484, + "logps/ref_rejected": -109.93966674804688, + "logps/rejected": -162.2843475341797, + "loss": 0.4552, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16416671872138977, + "margin_dpo/beta_margin_grad_std": 0.19996830821037292, + "margin_dpo/beta_margin_mean": 3.0585899353027344, + "margin_dpo/loss_margin_mean": 30.585901260375977, + "margin_dpo/margin_mean": 30.585901260375977, + "margin_dpo/margin_std": 26.19734001159668, + "step": 557 + }, + { + "epoch": 0.8193832599118943, + "grad_norm": 38.192787170410156, + "learning_rate": 4.8959475914614554e-08, + "logits/chosen": -0.6551119089126587, + "logits/rejected": -0.6068642139434814, + "logps/chosen": -81.69489288330078, + "logps/ref_chosen": -62.36824035644531, + "logps/ref_rejected": -102.16102600097656, + "logps/rejected": -162.4652862548828, + "loss": 0.2874, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10797977447509766, + "margin_dpo/beta_margin_grad_std": 0.14597085118293762, + "margin_dpo/beta_margin_mean": 4.0977606773376465, + "margin_dpo/loss_margin_mean": 40.97760772705078, + "margin_dpo/margin_mean": 40.97760772705078, + "margin_dpo/margin_std": 30.455211639404297, + "step": 558 + }, + { + "epoch": 0.8208516886930984, + "grad_norm": 55.569332122802734, + "learning_rate": 4.8199303603697614e-08, + "logits/chosen": -0.6832656860351562, + "logits/rejected": -0.6301894187927246, + "logps/chosen": -80.5696029663086, + "logps/ref_chosen": -60.75232696533203, + "logps/ref_rejected": -93.4422836303711, + "logps/rejected": -146.19882202148438, + "loss": 0.4325, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15588068962097168, + "margin_dpo/beta_margin_grad_std": 0.20822836458683014, + "margin_dpo/beta_margin_mean": 3.2939257621765137, + "margin_dpo/loss_margin_mean": 32.93925857543945, + "margin_dpo/margin_mean": 32.93925476074219, + "margin_dpo/margin_std": 27.555404663085938, + "step": 559 + }, + { + "epoch": 0.8223201174743024, + "grad_norm": 67.84832000732422, + "learning_rate": 4.7444448928806615e-08, + "logits/chosen": -0.5837658643722534, + "logits/rejected": -0.5339952707290649, + "logps/chosen": -79.17695617675781, + "logps/ref_chosen": -58.10382080078125, + "logps/ref_rejected": -79.99122619628906, + "logps/rejected": -130.1254425048828, + "loss": 0.4489, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16372855007648468, + "margin_dpo/beta_margin_grad_std": 0.1933506578207016, + "margin_dpo/beta_margin_mean": 2.906108856201172, + "margin_dpo/loss_margin_mean": 29.061086654663086, + "margin_dpo/margin_mean": 29.061086654663086, + "margin_dpo/margin_std": 24.71479034423828, + "step": 560 + }, + { + "epoch": 0.8237885462555066, + "grad_norm": 66.11046600341797, + "learning_rate": 4.669493178106432e-08, + "logits/chosen": -0.6318497657775879, + "logits/rejected": -0.6243282556533813, + "logps/chosen": -76.038330078125, + "logps/ref_chosen": -50.91287612915039, + "logps/ref_rejected": -99.06857299804688, + "logps/rejected": -153.46937561035156, + "loss": 0.4945, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17272219061851501, + "margin_dpo/beta_margin_grad_std": 0.21409326791763306, + "margin_dpo/beta_margin_mean": 2.927535057067871, + "margin_dpo/loss_margin_mean": 29.275352478027344, + "margin_dpo/margin_mean": 29.275352478027344, + "margin_dpo/margin_std": 26.028850555419922, + "step": 561 + }, + { + "epoch": 0.8252569750367107, + "grad_norm": 34.92936706542969, + "learning_rate": 4.5950771910944596e-08, + "logits/chosen": -0.651642382144928, + "logits/rejected": -0.604433536529541, + "logps/chosen": -78.28529357910156, + "logps/ref_chosen": -59.46440124511719, + "logps/ref_rejected": -96.54266357421875, + "logps/rejected": -153.3458709716797, + "loss": 0.2435, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.0989568680524826, + "margin_dpo/beta_margin_grad_std": 0.1261216104030609, + "margin_dpo/beta_margin_mean": 3.7982311248779297, + "margin_dpo/loss_margin_mean": 37.9823112487793, + "margin_dpo/margin_mean": 37.9823112487793, + "margin_dpo/margin_std": 26.726564407348633, + "step": 562 + }, + { + "epoch": 0.8267254038179148, + "grad_norm": 63.81352233886719, + "learning_rate": 4.521198892775202e-08, + "logits/chosen": -0.5930050611495972, + "logits/rejected": -0.5729939937591553, + "logps/chosen": -83.12980651855469, + "logps/ref_chosen": -60.60819625854492, + "logps/ref_rejected": -94.56770324707031, + "logps/rejected": -147.40249633789062, + "loss": 0.4148, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15109089016914368, + "margin_dpo/beta_margin_grad_std": 0.1935727894306183, + "margin_dpo/beta_margin_mean": 3.031318426132202, + "margin_dpo/loss_margin_mean": 30.313182830810547, + "margin_dpo/margin_mean": 30.313182830810547, + "margin_dpo/margin_std": 23.21819496154785, + "step": 563 + }, + { + "epoch": 0.8281938325991189, + "grad_norm": 47.72722244262695, + "learning_rate": 4.447860229910544e-08, + "logits/chosen": -0.656052827835083, + "logits/rejected": -0.5981060862541199, + "logps/chosen": -96.48939514160156, + "logps/ref_chosen": -74.26837921142578, + "logps/ref_rejected": -93.2381820678711, + "logps/rejected": -147.96966552734375, + "loss": 0.368, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13481834530830383, + "margin_dpo/beta_margin_grad_std": 0.1763157844543457, + "margin_dpo/beta_margin_mean": 3.2510476112365723, + "margin_dpo/loss_margin_mean": 32.510475158691406, + "margin_dpo/margin_mean": 32.510475158691406, + "margin_dpo/margin_std": 22.74962043762207, + "step": 564 + }, + { + "epoch": 0.8296622613803231, + "grad_norm": 44.3295783996582, + "learning_rate": 4.375063135042445e-08, + "logits/chosen": -0.6097604036331177, + "logits/rejected": -0.5671969652175903, + "logps/chosen": -91.07102966308594, + "logps/ref_chosen": -69.0199203491211, + "logps/ref_rejected": -85.7789306640625, + "logps/rejected": -143.09686279296875, + "loss": 0.3731, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.141332745552063, + "margin_dpo/beta_margin_grad_std": 0.18416938185691833, + "margin_dpo/beta_margin_mean": 3.5266833305358887, + "margin_dpo/loss_margin_mean": 35.2668342590332, + "margin_dpo/margin_mean": 35.2668342590332, + "margin_dpo/margin_std": 30.624713897705078, + "step": 565 + }, + { + "epoch": 0.8311306901615272, + "grad_norm": 56.34800338745117, + "learning_rate": 4.3028095264420525e-08, + "logits/chosen": -0.5949935913085938, + "logits/rejected": -0.5808389186859131, + "logps/chosen": -87.20069885253906, + "logps/ref_chosen": -66.5453109741211, + "logps/ref_rejected": -103.86931610107422, + "logps/rejected": -158.5188751220703, + "loss": 0.4755, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16708871722221375, + "margin_dpo/beta_margin_grad_std": 0.21157479286193848, + "margin_dpo/beta_margin_mean": 3.399416923522949, + "margin_dpo/loss_margin_mean": 33.994171142578125, + "margin_dpo/margin_mean": 33.994171142578125, + "margin_dpo/margin_std": 29.911640167236328, + "step": 566 + }, + { + "epoch": 0.8325991189427313, + "grad_norm": 82.15995025634766, + "learning_rate": 4.231101308059165e-08, + "logits/chosen": -0.6804023385047913, + "logits/rejected": -0.6269962787628174, + "logps/chosen": -75.24916076660156, + "logps/ref_chosen": -52.858299255371094, + "logps/ref_rejected": -85.37095642089844, + "logps/rejected": -140.0916748046875, + "loss": 0.5883, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17573551833629608, + "margin_dpo/beta_margin_grad_std": 0.24265018105506897, + "margin_dpo/beta_margin_mean": 3.2329859733581543, + "margin_dpo/loss_margin_mean": 32.329856872558594, + "margin_dpo/margin_mean": 32.32986068725586, + "margin_dpo/margin_std": 28.114917755126953, + "step": 567 + }, + { + "epoch": 0.8340675477239354, + "grad_norm": 43.57807159423828, + "learning_rate": 4.1599403694720145e-08, + "logits/chosen": -0.580660343170166, + "logits/rejected": -0.5636056065559387, + "logps/chosen": -67.96955108642578, + "logps/ref_chosen": -45.1923828125, + "logps/ref_rejected": -89.09236145019531, + "logps/rejected": -149.81170654296875, + "loss": 0.3489, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12049505114555359, + "margin_dpo/beta_margin_grad_std": 0.17438393831253052, + "margin_dpo/beta_margin_mean": 3.7942161560058594, + "margin_dpo/loss_margin_mean": 37.942161560058594, + "margin_dpo/margin_mean": 37.942161560058594, + "margin_dpo/margin_std": 26.538555145263672, + "step": 568 + }, + { + "epoch": 0.8355359765051396, + "grad_norm": 63.59123229980469, + "learning_rate": 4.089328585837512e-08, + "logits/chosen": -0.6394084692001343, + "logits/rejected": -0.6091455817222595, + "logps/chosen": -86.40789794921875, + "logps/ref_chosen": -63.72056198120117, + "logps/ref_rejected": -79.10325622558594, + "logps/rejected": -131.8647918701172, + "loss": 0.5032, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17486035823822021, + "margin_dpo/beta_margin_grad_std": 0.22121158242225647, + "margin_dpo/beta_margin_mean": 3.007420063018799, + "margin_dpo/loss_margin_mean": 30.074199676513672, + "margin_dpo/margin_mean": 30.074199676513672, + "margin_dpo/margin_std": 27.18084716796875, + "step": 569 + }, + { + "epoch": 0.8370044052863436, + "grad_norm": 53.639320373535156, + "learning_rate": 4.019267817841834e-08, + "logits/chosen": -0.674132764339447, + "logits/rejected": -0.6270936131477356, + "logps/chosen": -82.65512084960938, + "logps/ref_chosen": -61.61454772949219, + "logps/ref_rejected": -82.1418685913086, + "logps/rejected": -138.31561279296875, + "loss": 0.329, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1217803955078125, + "margin_dpo/beta_margin_grad_std": 0.18320615589618683, + "margin_dpo/beta_margin_mean": 3.5133180618286133, + "margin_dpo/loss_margin_mean": 35.1331787109375, + "margin_dpo/margin_mean": 35.1331787109375, + "margin_dpo/margin_std": 25.58907127380371, + "step": 570 + }, + { + "epoch": 0.8384728340675477, + "grad_norm": 61.772037506103516, + "learning_rate": 3.9497599116513705e-08, + "logits/chosen": -0.5997041463851929, + "logits/rejected": -0.5761772990226746, + "logps/chosen": -75.53142547607422, + "logps/ref_chosen": -53.05406188964844, + "logps/ref_rejected": -91.33682250976562, + "logps/rejected": -148.2672119140625, + "loss": 0.3786, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1382187306880951, + "margin_dpo/beta_margin_grad_std": 0.19462428987026215, + "margin_dpo/beta_margin_mean": 3.445303440093994, + "margin_dpo/loss_margin_mean": 34.453033447265625, + "margin_dpo/margin_mean": 34.453033447265625, + "margin_dpo/margin_std": 27.182416915893555, + "step": 571 + }, + { + "epoch": 0.8399412628487518, + "grad_norm": 78.96542358398438, + "learning_rate": 3.880806698864086e-08, + "logits/chosen": -0.5895199775695801, + "logits/rejected": -0.5727903246879578, + "logps/chosen": -75.78829193115234, + "logps/ref_chosen": -48.459285736083984, + "logps/ref_rejected": -83.5570297241211, + "logps/rejected": -143.24444580078125, + "loss": 0.6978, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18673813343048096, + "margin_dpo/beta_margin_grad_std": 0.25194963812828064, + "margin_dpo/beta_margin_mean": 3.23583984375, + "margin_dpo/loss_margin_mean": 32.3583984375, + "margin_dpo/margin_mean": 32.3583984375, + "margin_dpo/margin_std": 31.786035537719727, + "step": 572 + }, + { + "epoch": 0.8414096916299559, + "grad_norm": 59.523719787597656, + "learning_rate": 3.812409996461275e-08, + "logits/chosen": -0.6645894050598145, + "logits/rejected": -0.6301409602165222, + "logps/chosen": -73.29808044433594, + "logps/ref_chosen": -51.62262725830078, + "logps/ref_rejected": -85.32499694824219, + "logps/rejected": -141.5244903564453, + "loss": 0.4423, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1509319394826889, + "margin_dpo/beta_margin_grad_std": 0.21270796656608582, + "margin_dpo/beta_margin_mean": 3.452404499053955, + "margin_dpo/loss_margin_mean": 34.5240478515625, + "margin_dpo/margin_mean": 34.5240478515625, + "margin_dpo/margin_std": 26.213939666748047, + "step": 573 + }, + { + "epoch": 0.8428781204111601, + "grad_norm": 71.15605926513672, + "learning_rate": 3.74457160675965e-08, + "logits/chosen": -0.6428389549255371, + "logits/rejected": -0.6086920499801636, + "logps/chosen": -74.59834289550781, + "logps/ref_chosen": -51.04446029663086, + "logps/ref_rejected": -92.80640411376953, + "logps/rejected": -150.51730346679688, + "loss": 0.4548, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14354148507118225, + "margin_dpo/beta_margin_grad_std": 0.1961897611618042, + "margin_dpo/beta_margin_mean": 3.415701389312744, + "margin_dpo/loss_margin_mean": 34.157012939453125, + "margin_dpo/margin_mean": 34.157012939453125, + "margin_dpo/margin_std": 27.831592559814453, + "step": 574 + }, + { + "epoch": 0.8443465491923642, + "grad_norm": 86.26287078857422, + "learning_rate": 3.677293317363864e-08, + "logits/chosen": -0.5673672556877136, + "logits/rejected": -0.5325363874435425, + "logps/chosen": -97.13941955566406, + "logps/ref_chosen": -71.79014587402344, + "logps/ref_rejected": -95.38619995117188, + "logps/rejected": -157.06790161132812, + "loss": 0.6399, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1683315932750702, + "margin_dpo/beta_margin_grad_std": 0.2584590017795563, + "margin_dpo/beta_margin_mean": 3.6332435607910156, + "margin_dpo/loss_margin_mean": 36.332435607910156, + "margin_dpo/margin_mean": 36.332435607910156, + "margin_dpo/margin_std": 31.414226531982422, + "step": 575 + }, + { + "epoch": 0.8458149779735683, + "grad_norm": 49.230098724365234, + "learning_rate": 3.6105769011194224e-08, + "logits/chosen": -0.5987046957015991, + "logits/rejected": -0.5964124202728271, + "logps/chosen": -77.82243347167969, + "logps/ref_chosen": -54.262969970703125, + "logps/ref_rejected": -100.7542724609375, + "logps/rejected": -159.103515625, + "loss": 0.444, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14397433400154114, + "margin_dpo/beta_margin_grad_std": 0.19858963787555695, + "margin_dpo/beta_margin_mean": 3.4789772033691406, + "margin_dpo/loss_margin_mean": 34.789772033691406, + "margin_dpo/margin_mean": 34.789772033691406, + "margin_dpo/margin_std": 29.601974487304688, + "step": 576 + }, + { + "epoch": 0.8472834067547724, + "grad_norm": 48.130558013916016, + "learning_rate": 3.5444241160659304e-08, + "logits/chosen": -0.6501774787902832, + "logits/rejected": -0.6030783653259277, + "logps/chosen": -81.96438598632812, + "logps/ref_chosen": -61.909706115722656, + "logps/ref_rejected": -84.07069396972656, + "logps/rejected": -142.32540893554688, + "loss": 0.348, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11993271112442017, + "margin_dpo/beta_margin_grad_std": 0.1745702028274536, + "margin_dpo/beta_margin_mean": 3.8200042247772217, + "margin_dpo/loss_margin_mean": 38.200042724609375, + "margin_dpo/margin_mean": 38.200042724609375, + "margin_dpo/margin_std": 27.837221145629883, + "step": 577 + }, + { + "epoch": 0.8487518355359766, + "grad_norm": 56.244651794433594, + "learning_rate": 3.478836705390808e-08, + "logits/chosen": -0.5622389912605286, + "logits/rejected": -0.5450348854064941, + "logps/chosen": -76.16979217529297, + "logps/ref_chosen": -49.26368713378906, + "logps/ref_rejected": -83.43626403808594, + "logps/rejected": -145.69003295898438, + "loss": 0.3651, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13420046865940094, + "margin_dpo/beta_margin_grad_std": 0.1781080663204193, + "margin_dpo/beta_margin_mean": 3.5347681045532227, + "margin_dpo/loss_margin_mean": 35.347679138183594, + "margin_dpo/margin_mean": 35.347679138183594, + "margin_dpo/margin_std": 26.89832878112793, + "step": 578 + }, + { + "epoch": 0.8502202643171806, + "grad_norm": 54.51181411743164, + "learning_rate": 3.41381639738331e-08, + "logits/chosen": -0.6210640668869019, + "logits/rejected": -0.5942162871360779, + "logps/chosen": -80.40229797363281, + "logps/ref_chosen": -58.88581848144531, + "logps/ref_rejected": -94.78762817382812, + "logps/rejected": -147.00790405273438, + "loss": 0.3713, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14311912655830383, + "margin_dpo/beta_margin_grad_std": 0.1770986020565033, + "margin_dpo/beta_margin_mean": 3.0703792572021484, + "margin_dpo/loss_margin_mean": 30.70379066467285, + "margin_dpo/margin_mean": 30.70379066467285, + "margin_dpo/margin_std": 23.245943069458008, + "step": 579 + }, + { + "epoch": 0.8516886930983847, + "grad_norm": 47.365726470947266, + "learning_rate": 3.349364905389032e-08, + "logits/chosen": -0.6056051254272461, + "logits/rejected": -0.5679988265037537, + "logps/chosen": -67.93992614746094, + "logps/ref_chosen": -48.70684051513672, + "logps/ref_rejected": -81.7583999633789, + "logps/rejected": -141.37774658203125, + "loss": 0.3507, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1173824667930603, + "margin_dpo/beta_margin_grad_std": 0.2006831169128418, + "margin_dpo/beta_margin_mean": 4.0386271476745605, + "margin_dpo/loss_margin_mean": 40.38627243041992, + "margin_dpo/margin_mean": 40.38627243041992, + "margin_dpo/margin_std": 30.28713607788086, + "step": 580 + }, + { + "epoch": 0.8531571218795888, + "grad_norm": 52.634037017822266, + "learning_rate": 3.285483927764726e-08, + "logits/chosen": -0.5740267634391785, + "logits/rejected": -0.5509278774261475, + "logps/chosen": -83.43389892578125, + "logps/ref_chosen": -62.22235107421875, + "logps/ref_rejected": -91.73568725585938, + "logps/rejected": -144.02926635742188, + "loss": 0.4291, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15440425276756287, + "margin_dpo/beta_margin_grad_std": 0.203893780708313, + "margin_dpo/beta_margin_mean": 3.1082029342651367, + "margin_dpo/loss_margin_mean": 31.082029342651367, + "margin_dpo/margin_mean": 31.082029342651367, + "margin_dpo/margin_std": 24.809860229492188, + "step": 581 + }, + { + "epoch": 0.8546255506607929, + "grad_norm": 66.31195068359375, + "learning_rate": 3.222175147833556e-08, + "logits/chosen": -0.605322003364563, + "logits/rejected": -0.6055228114128113, + "logps/chosen": -77.13755798339844, + "logps/ref_chosen": -58.228660583496094, + "logps/ref_rejected": -110.06959533691406, + "logps/rejected": -164.6455078125, + "loss": 0.4097, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14925749599933624, + "margin_dpo/beta_margin_grad_std": 0.20233149826526642, + "margin_dpo/beta_margin_mean": 3.5667009353637695, + "margin_dpo/loss_margin_mean": 35.66700744628906, + "margin_dpo/margin_mean": 35.66700744628906, + "margin_dpo/margin_std": 28.87442398071289, + "step": 582 + }, + { + "epoch": 0.856093979441997, + "grad_norm": 63.78881072998047, + "learning_rate": 3.159440233840763e-08, + "logits/chosen": -0.5751946568489075, + "logits/rejected": -0.5564270615577698, + "logps/chosen": -81.47348022460938, + "logps/ref_chosen": -56.86286163330078, + "logps/ref_rejected": -88.4039306640625, + "logps/rejected": -142.409423828125, + "loss": 0.5765, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18386687338352203, + "margin_dpo/beta_margin_grad_std": 0.2354869246482849, + "margin_dpo/beta_margin_mean": 2.939487934112549, + "margin_dpo/loss_margin_mean": 29.394878387451172, + "margin_dpo/margin_mean": 29.394878387451172, + "margin_dpo/margin_std": 29.66604995727539, + "step": 583 + }, + { + "epoch": 0.8575624082232012, + "grad_norm": 40.81183624267578, + "learning_rate": 3.0972808389096635e-08, + "logits/chosen": -0.6157029271125793, + "logits/rejected": -0.5580540299415588, + "logps/chosen": -74.85009002685547, + "logps/ref_chosen": -56.90068054199219, + "logps/ref_rejected": -97.63606262207031, + "logps/rejected": -154.90313720703125, + "loss": 0.2598, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10160969197750092, + "margin_dpo/beta_margin_grad_std": 0.1572841852903366, + "margin_dpo/beta_margin_mean": 3.931765556335449, + "margin_dpo/loss_margin_mean": 39.317657470703125, + "margin_dpo/margin_mean": 39.317657470703125, + "margin_dpo/margin_std": 26.282012939453125, + "step": 584 + }, + { + "epoch": 0.8590308370044053, + "grad_norm": 64.71321105957031, + "learning_rate": 3.035698600998121e-08, + "logits/chosen": -0.6199055314064026, + "logits/rejected": -0.5935189723968506, + "logps/chosen": -85.7191162109375, + "logps/ref_chosen": -60.973968505859375, + "logps/ref_rejected": -84.16952514648438, + "logps/rejected": -141.8712921142578, + "loss": 0.4802, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16080693900585175, + "margin_dpo/beta_margin_grad_std": 0.21611681580543518, + "margin_dpo/beta_margin_mean": 3.2956621646881104, + "margin_dpo/loss_margin_mean": 32.95662307739258, + "margin_dpo/margin_mean": 32.95662307739258, + "margin_dpo/margin_std": 28.009883880615234, + "step": 585 + }, + { + "epoch": 0.8604992657856094, + "grad_norm": 64.60726928710938, + "learning_rate": 2.974695142855388e-08, + "logits/chosen": -0.5863425731658936, + "logits/rejected": -0.5751093626022339, + "logps/chosen": -82.08023071289062, + "logps/ref_chosen": -56.85559844970703, + "logps/ref_rejected": -91.8026123046875, + "logps/rejected": -149.82192993164062, + "loss": 0.5533, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16516205668449402, + "margin_dpo/beta_margin_grad_std": 0.2322179675102234, + "margin_dpo/beta_margin_mean": 3.2794694900512695, + "margin_dpo/loss_margin_mean": 32.79469299316406, + "margin_dpo/margin_mean": 32.79469680786133, + "margin_dpo/margin_std": 29.784767150878906, + "step": 586 + }, + { + "epoch": 0.8619676945668135, + "grad_norm": 47.665504455566406, + "learning_rate": 2.9142720719793122e-08, + "logits/chosen": -0.6379122734069824, + "logits/rejected": -0.6201504468917847, + "logps/chosen": -62.888648986816406, + "logps/ref_chosen": -44.69159698486328, + "logps/ref_rejected": -82.62385559082031, + "logps/rejected": -131.4063720703125, + "loss": 0.501, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1841679811477661, + "margin_dpo/beta_margin_grad_std": 0.21286147832870483, + "margin_dpo/beta_margin_mean": 3.058547019958496, + "margin_dpo/loss_margin_mean": 30.58547019958496, + "margin_dpo/margin_mean": 30.585468292236328, + "margin_dpo/margin_std": 27.52269744873047, + "step": 587 + }, + { + "epoch": 0.8634361233480177, + "grad_norm": 63.907814025878906, + "learning_rate": 2.8544309805740018e-08, + "logits/chosen": -0.6512797474861145, + "logits/rejected": -0.6356316804885864, + "logps/chosen": -73.06784057617188, + "logps/ref_chosen": -50.294952392578125, + "logps/ref_rejected": -107.36988067626953, + "logps/rejected": -162.28692626953125, + "loss": 0.4775, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16899925470352173, + "margin_dpo/beta_margin_grad_std": 0.21543928980827332, + "margin_dpo/beta_margin_mean": 3.21441650390625, + "margin_dpo/loss_margin_mean": 32.1441650390625, + "margin_dpo/margin_mean": 32.1441650390625, + "margin_dpo/margin_std": 28.336963653564453, + "step": 588 + }, + { + "epoch": 0.8649045521292217, + "grad_norm": 42.07124710083008, + "learning_rate": 2.7951734455078786e-08, + "logits/chosen": -0.6398344039916992, + "logits/rejected": -0.6136231422424316, + "logps/chosen": -82.24392700195312, + "logps/ref_chosen": -59.929908752441406, + "logps/ref_rejected": -111.65534973144531, + "logps/rejected": -178.29017639160156, + "loss": 0.3172, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10654419660568237, + "margin_dpo/beta_margin_grad_std": 0.19332361221313477, + "margin_dpo/beta_margin_mean": 4.432080268859863, + "margin_dpo/loss_margin_mean": 44.32080078125, + "margin_dpo/margin_mean": 44.32080078125, + "margin_dpo/margin_std": 32.961795806884766, + "step": 589 + }, + { + "epoch": 0.8663729809104258, + "grad_norm": 38.59159851074219, + "learning_rate": 2.736501028272095e-08, + "logits/chosen": -0.6100300550460815, + "logits/rejected": -0.5847848057746887, + "logps/chosen": -77.71539306640625, + "logps/ref_chosen": -55.80979537963867, + "logps/ref_rejected": -106.06282043457031, + "logps/rejected": -166.61837768554688, + "loss": 0.2745, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10657128691673279, + "margin_dpo/beta_margin_grad_std": 0.1513645052909851, + "margin_dpo/beta_margin_mean": 3.8649959564208984, + "margin_dpo/loss_margin_mean": 38.64995574951172, + "margin_dpo/margin_mean": 38.64995574951172, + "margin_dpo/margin_std": 28.00396728515625, + "step": 590 + }, + { + "epoch": 0.8678414096916299, + "grad_norm": 63.60677719116211, + "learning_rate": 2.678415274939408e-08, + "logits/chosen": -0.6167633533477783, + "logits/rejected": -0.5552696585655212, + "logps/chosen": -81.19537353515625, + "logps/ref_chosen": -56.24061965942383, + "logps/ref_rejected": -83.78629302978516, + "logps/rejected": -145.19476318359375, + "loss": 0.3925, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12805438041687012, + "margin_dpo/beta_margin_grad_std": 0.21181520819664001, + "margin_dpo/beta_margin_mean": 3.645371913909912, + "margin_dpo/loss_margin_mean": 36.45372009277344, + "margin_dpo/margin_mean": 36.45372009277344, + "margin_dpo/margin_std": 25.672313690185547, + "step": 591 + }, + { + "epoch": 0.869309838472834, + "grad_norm": 86.11256408691406, + "learning_rate": 2.6209177161234442e-08, + "logits/chosen": -0.6086193323135376, + "logits/rejected": -0.5861480236053467, + "logps/chosen": -73.6089096069336, + "logps/ref_chosen": -47.94025421142578, + "logps/ref_rejected": -75.73287963867188, + "logps/rejected": -137.11573791503906, + "loss": 0.5921, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15364328026771545, + "margin_dpo/beta_margin_grad_std": 0.24902772903442383, + "margin_dpo/beta_margin_mean": 3.571420669555664, + "margin_dpo/loss_margin_mean": 35.71420669555664, + "margin_dpo/margin_mean": 35.71420669555664, + "margin_dpo/margin_std": 28.337505340576172, + "step": 592 + }, + { + "epoch": 0.8707782672540382, + "grad_norm": 82.06730651855469, + "learning_rate": 2.564009866938349e-08, + "logits/chosen": -0.5315680503845215, + "logits/rejected": -0.5048198699951172, + "logps/chosen": -72.25209045410156, + "logps/ref_chosen": -48.690757751464844, + "logps/ref_rejected": -60.90800476074219, + "logps/rejected": -114.44489288330078, + "loss": 0.6216, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18850964307785034, + "margin_dpo/beta_margin_grad_std": 0.2500463128089905, + "margin_dpo/beta_margin_mean": 2.9975552558898926, + "margin_dpo/loss_margin_mean": 29.97555160522461, + "margin_dpo/margin_mean": 29.97555160522461, + "margin_dpo/margin_std": 28.288480758666992, + "step": 593 + }, + { + "epoch": 0.8722466960352423, + "grad_norm": 60.2679443359375, + "learning_rate": 2.5076932269588708e-08, + "logits/chosen": -0.6228535175323486, + "logits/rejected": -0.5754865407943726, + "logps/chosen": -76.21943664550781, + "logps/ref_chosen": -54.93488693237305, + "logps/ref_rejected": -86.09967041015625, + "logps/rejected": -147.49954223632812, + "loss": 0.4954, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14612731337547302, + "margin_dpo/beta_margin_grad_std": 0.2071676254272461, + "margin_dpo/beta_margin_mean": 4.011531829833984, + "margin_dpo/loss_margin_mean": 40.11532211303711, + "margin_dpo/margin_mean": 40.115318298339844, + "margin_dpo/margin_std": 34.15007781982422, + "step": 594 + }, + { + "epoch": 0.8737151248164464, + "grad_norm": 43.721248626708984, + "learning_rate": 2.451969280180849e-08, + "logits/chosen": -0.5794812440872192, + "logits/rejected": -0.544758141040802, + "logps/chosen": -72.4796142578125, + "logps/ref_chosen": -49.42041778564453, + "logps/ref_rejected": -80.62731170654297, + "logps/rejected": -135.92617797851562, + "loss": 0.3821, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14666876196861267, + "margin_dpo/beta_margin_grad_std": 0.18497151136398315, + "margin_dpo/beta_margin_mean": 3.223968029022217, + "margin_dpo/loss_margin_mean": 32.23967742919922, + "margin_dpo/margin_mean": 32.23967742919922, + "margin_dpo/margin_std": 26.86066436767578, + "step": 595 + }, + { + "epoch": 0.8751835535976505, + "grad_norm": 64.42137908935547, + "learning_rate": 2.396839494982103e-08, + "logits/chosen": -0.5889699459075928, + "logits/rejected": -0.5423535704612732, + "logps/chosen": -81.71769714355469, + "logps/ref_chosen": -59.791683197021484, + "logps/ref_rejected": -80.09111785888672, + "logps/rejected": -137.01409912109375, + "loss": 0.4742, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16088539361953735, + "margin_dpo/beta_margin_grad_std": 0.20914097130298615, + "margin_dpo/beta_margin_mean": 3.49969744682312, + "margin_dpo/loss_margin_mean": 34.99697494506836, + "margin_dpo/margin_mean": 34.99697494506836, + "margin_dpo/margin_std": 29.85952377319336, + "step": 596 + }, + { + "epoch": 0.8766519823788547, + "grad_norm": 58.71807098388672, + "learning_rate": 2.3423053240837514e-08, + "logits/chosen": -0.5836566686630249, + "logits/rejected": -0.579143762588501, + "logps/chosen": -79.78302001953125, + "logps/ref_chosen": -57.26078796386719, + "logps/ref_rejected": -100.6937255859375, + "logps/rejected": -158.7677459716797, + "loss": 0.5176, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16815660893917084, + "margin_dpo/beta_margin_grad_std": 0.23717570304870605, + "margin_dpo/beta_margin_mean": 3.555178642272949, + "margin_dpo/loss_margin_mean": 35.551788330078125, + "margin_dpo/margin_mean": 35.55178451538086, + "margin_dpo/margin_std": 31.711952209472656, + "step": 597 + }, + { + "epoch": 0.8781204111600588, + "grad_norm": 66.43830871582031, + "learning_rate": 2.2883682045119062e-08, + "logits/chosen": -0.6284604072570801, + "logits/rejected": -0.5999557375907898, + "logps/chosen": -75.705078125, + "logps/ref_chosen": -52.51850509643555, + "logps/ref_rejected": -89.44385528564453, + "logps/rejected": -145.19064331054688, + "loss": 0.4578, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14189483225345612, + "margin_dpo/beta_margin_grad_std": 0.1853117048740387, + "margin_dpo/beta_margin_mean": 3.256021499633789, + "margin_dpo/loss_margin_mean": 32.560211181640625, + "margin_dpo/margin_mean": 32.560211181640625, + "margin_dpo/margin_std": 24.856882095336914, + "step": 598 + }, + { + "epoch": 0.8795888399412628, + "grad_norm": 59.694637298583984, + "learning_rate": 2.2350295575598367e-08, + "logits/chosen": -0.6019773483276367, + "logits/rejected": -0.5818980932235718, + "logps/chosen": -71.63743591308594, + "logps/ref_chosen": -49.802677154541016, + "logps/ref_rejected": -82.978515625, + "logps/rejected": -137.6220245361328, + "loss": 0.4546, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15810009837150574, + "margin_dpo/beta_margin_grad_std": 0.22325119376182556, + "margin_dpo/beta_margin_mean": 3.2808756828308105, + "margin_dpo/loss_margin_mean": 32.80875778198242, + "margin_dpo/margin_mean": 32.80875778198242, + "margin_dpo/margin_std": 25.88229751586914, + "step": 599 + }, + { + "epoch": 0.8810572687224669, + "grad_norm": 73.32799530029297, + "learning_rate": 2.1822907887504932e-08, + "logits/chosen": -0.6534677147865295, + "logits/rejected": -0.6272458434104919, + "logps/chosen": -88.00627899169922, + "logps/ref_chosen": -66.43487548828125, + "logps/ref_rejected": -85.45649719238281, + "logps/rejected": -137.1350555419922, + "loss": 0.4876, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.152080237865448, + "margin_dpo/beta_margin_grad_std": 0.21173834800720215, + "margin_dpo/beta_margin_mean": 3.0107154846191406, + "margin_dpo/loss_margin_mean": 30.107154846191406, + "margin_dpo/margin_mean": 30.107154846191406, + "margin_dpo/margin_std": 25.551097869873047, + "step": 600 + }, + { + "epoch": 0.8810572687224669, + "eval_logits/chosen": -0.6269975304603577, + "eval_logits/rejected": -0.6013357043266296, + "eval_logps/chosen": -105.93721771240234, + "eval_logps/ref_chosen": -79.05104064941406, + "eval_logps/ref_rejected": -86.79793548583984, + "eval_logps/rejected": -135.44046020507812, + "eval_loss": 0.4046096205711365, + "eval_margin_dpo/beta": 0.10000000149011612, + "eval_margin_dpo/beta_margin_grad_mean": -0.25697416067123413, + "eval_margin_dpo/beta_margin_grad_std": 0.25375545024871826, + "eval_margin_dpo/beta_margin_mean": 2.175632953643799, + "eval_margin_dpo/loss_margin_mean": 21.756330490112305, + "eval_margin_dpo/margin_mean": 21.756330490112305, + "eval_margin_dpo/margin_std": 26.337753295898438, + "eval_runtime": 39.8498, + "eval_samples_per_second": 58.695, + "eval_steps_per_second": 1.857, + "step": 600 + }, + { + "epoch": 0.882525697503671, + "grad_norm": 83.50579071044922, + "learning_rate": 2.1301532877994742e-08, + "logits/chosen": -0.6308251619338989, + "logits/rejected": -0.6028087139129639, + "logps/chosen": -85.07262420654297, + "logps/ref_chosen": -59.13360595703125, + "logps/ref_rejected": -94.69093322753906, + "logps/rejected": -154.66099548339844, + "loss": 0.5224, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1595621407032013, + "margin_dpo/beta_margin_grad_std": 0.24474212527275085, + "margin_dpo/beta_margin_mean": 3.403104543685913, + "margin_dpo/loss_margin_mean": 34.031044006347656, + "margin_dpo/margin_mean": 34.031044006347656, + "margin_dpo/margin_std": 28.710695266723633, + "step": 601 + }, + { + "epoch": 0.8839941262848752, + "grad_norm": 67.36071014404297, + "learning_rate": 2.0786184285784298e-08, + "logits/chosen": -0.6085352897644043, + "logits/rejected": -0.6077029705047607, + "logps/chosen": -66.83834838867188, + "logps/ref_chosen": -48.59352111816406, + "logps/ref_rejected": -87.6685562133789, + "logps/rejected": -143.52706909179688, + "loss": 0.3598, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12709340453147888, + "margin_dpo/beta_margin_grad_std": 0.19667461514472961, + "margin_dpo/beta_margin_mean": 3.7613697052001953, + "margin_dpo/loss_margin_mean": 37.61369705200195, + "margin_dpo/margin_mean": 37.61369705200195, + "margin_dpo/margin_std": 27.582782745361328, + "step": 602 + }, + { + "epoch": 0.8854625550660793, + "grad_norm": 65.35578918457031, + "learning_rate": 2.0276875690788204e-08, + "logits/chosen": -0.6445978879928589, + "logits/rejected": -0.6060948371887207, + "logps/chosen": -90.91526794433594, + "logps/ref_chosen": -70.41461944580078, + "logps/ref_rejected": -100.32560729980469, + "logps/rejected": -153.026611328125, + "loss": 0.4681, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16203567385673523, + "margin_dpo/beta_margin_grad_std": 0.22206860780715942, + "margin_dpo/beta_margin_mean": 3.2200357913970947, + "margin_dpo/loss_margin_mean": 32.200355529785156, + "margin_dpo/margin_mean": 32.200355529785156, + "margin_dpo/margin_std": 26.239582061767578, + "step": 603 + }, + { + "epoch": 0.8869309838472834, + "grad_norm": 65.85285949707031, + "learning_rate": 1.977362051376158e-08, + "logits/chosen": -0.6049788594245911, + "logits/rejected": -0.5948315858840942, + "logps/chosen": -65.44568634033203, + "logps/ref_chosen": -46.45808029174805, + "logps/ref_rejected": -91.8544921875, + "logps/rejected": -146.36270141601562, + "loss": 0.4541, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14404089748859406, + "margin_dpo/beta_margin_grad_std": 0.20777881145477295, + "margin_dpo/beta_margin_mean": 3.552060604095459, + "margin_dpo/loss_margin_mean": 35.520606994628906, + "margin_dpo/margin_mean": 35.520606994628906, + "margin_dpo/margin_std": 29.537954330444336, + "step": 604 + }, + { + "epoch": 0.8883994126284875, + "grad_norm": 62.26566696166992, + "learning_rate": 1.9276432015946446e-08, + "logits/chosen": -0.5922667384147644, + "logits/rejected": -0.5747475028038025, + "logps/chosen": -90.86492919921875, + "logps/ref_chosen": -66.24933624267578, + "logps/ref_rejected": -102.30496978759766, + "logps/rejected": -158.65435791015625, + "loss": 0.4596, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1484421193599701, + "margin_dpo/beta_margin_grad_std": 0.19956421852111816, + "margin_dpo/beta_margin_mean": 3.1733784675598145, + "margin_dpo/loss_margin_mean": 31.733783721923828, + "margin_dpo/margin_mean": 31.733783721923828, + "margin_dpo/margin_std": 29.25701141357422, + "step": 605 + }, + { + "epoch": 0.8898678414096917, + "grad_norm": 40.07181167602539, + "learning_rate": 1.8785323298722093e-08, + "logits/chosen": -0.6057391166687012, + "logits/rejected": -0.5747348070144653, + "logps/chosen": -76.91024780273438, + "logps/ref_chosen": -54.819122314453125, + "logps/ref_rejected": -98.37147521972656, + "logps/rejected": -157.16664123535156, + "loss": 0.2922, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11917038261890411, + "margin_dpo/beta_margin_grad_std": 0.14764106273651123, + "margin_dpo/beta_margin_mean": 3.6704044342041016, + "margin_dpo/loss_margin_mean": 36.704044342041016, + "margin_dpo/margin_mean": 36.704044342041016, + "margin_dpo/margin_std": 25.36406707763672, + "step": 606 + }, + { + "epoch": 0.8913362701908958, + "grad_norm": 52.19849395751953, + "learning_rate": 1.8300307303259904e-08, + "logits/chosen": -0.5950828194618225, + "logits/rejected": -0.560725212097168, + "logps/chosen": -79.23565673828125, + "logps/ref_chosen": -58.08403778076172, + "logps/ref_rejected": -79.777099609375, + "logps/rejected": -133.3597412109375, + "loss": 0.3369, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12986359000205994, + "margin_dpo/beta_margin_grad_std": 0.17091821134090424, + "margin_dpo/beta_margin_mean": 3.243102550506592, + "margin_dpo/loss_margin_mean": 32.43102264404297, + "margin_dpo/margin_mean": 32.43102264404297, + "margin_dpo/margin_std": 23.558351516723633, + "step": 607 + }, + { + "epoch": 0.8928046989720999, + "grad_norm": 59.17472839355469, + "learning_rate": 1.7821396810182437e-08, + "logits/chosen": -0.6549203395843506, + "logits/rejected": -0.6243371367454529, + "logps/chosen": -78.31192016601562, + "logps/ref_chosen": -57.450836181640625, + "logps/ref_rejected": -94.77339172363281, + "logps/rejected": -148.8663330078125, + "loss": 0.4835, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15288802981376648, + "margin_dpo/beta_margin_grad_std": 0.22536322474479675, + "margin_dpo/beta_margin_mean": 3.3231868743896484, + "margin_dpo/loss_margin_mean": 33.231868743896484, + "margin_dpo/margin_mean": 33.23186492919922, + "margin_dpo/margin_std": 26.378620147705078, + "step": 608 + }, + { + "epoch": 0.8942731277533039, + "grad_norm": 66.138427734375, + "learning_rate": 1.7348604439226617e-08, + "logits/chosen": -0.6333421468734741, + "logits/rejected": -0.5963184833526611, + "logps/chosen": -82.09093475341797, + "logps/ref_chosen": -58.805355072021484, + "logps/ref_rejected": -88.81600952148438, + "logps/rejected": -145.73898315429688, + "loss": 0.3546, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12897904217243195, + "margin_dpo/beta_margin_grad_std": 0.18817874789237976, + "margin_dpo/beta_margin_mean": 3.363740921020508, + "margin_dpo/loss_margin_mean": 33.63740539550781, + "margin_dpo/margin_mean": 33.63740539550781, + "margin_dpo/margin_std": 24.00457763671875, + "step": 609 + }, + { + "epoch": 0.895741556534508, + "grad_norm": 75.1207275390625, + "learning_rate": 1.6881942648911074e-08, + "logits/chosen": -0.5928279161453247, + "logits/rejected": -0.5319284200668335, + "logps/chosen": -90.37582397460938, + "logps/ref_chosen": -65.69503784179688, + "logps/ref_rejected": -83.4053955078125, + "logps/rejected": -140.68991088867188, + "loss": 0.4574, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1538185179233551, + "margin_dpo/beta_margin_grad_std": 0.21573176980018616, + "margin_dpo/beta_margin_mean": 3.26037335395813, + "margin_dpo/loss_margin_mean": 32.60373306274414, + "margin_dpo/margin_mean": 32.60373306274414, + "margin_dpo/margin_std": 24.9559326171875, + "step": 610 + }, + { + "epoch": 0.8972099853157122, + "grad_norm": 48.684600830078125, + "learning_rate": 1.6421423736208e-08, + "logits/chosen": -0.6442773342132568, + "logits/rejected": -0.6083732843399048, + "logps/chosen": -74.56732177734375, + "logps/ref_chosen": -52.59947204589844, + "logps/ref_rejected": -86.33099365234375, + "logps/rejected": -144.01742553710938, + "loss": 0.3964, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14588133990764618, + "margin_dpo/beta_margin_grad_std": 0.19904646277427673, + "margin_dpo/beta_margin_mean": 3.5718588829040527, + "margin_dpo/loss_margin_mean": 35.718589782714844, + "margin_dpo/margin_mean": 35.718589782714844, + "margin_dpo/margin_std": 28.007495880126953, + "step": 611 + }, + { + "epoch": 0.8986784140969163, + "grad_norm": 45.877662658691406, + "learning_rate": 1.5967059836219042e-08, + "logits/chosen": -0.6410280466079712, + "logits/rejected": -0.582598090171814, + "logps/chosen": -80.21870422363281, + "logps/ref_chosen": -59.32372283935547, + "logps/ref_rejected": -88.31239318847656, + "logps/rejected": -150.30587768554688, + "loss": 0.2722, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10045182704925537, + "margin_dpo/beta_margin_grad_std": 0.16613739728927612, + "margin_dpo/beta_margin_mean": 4.109850883483887, + "margin_dpo/loss_margin_mean": 41.0985107421875, + "margin_dpo/margin_mean": 41.0985107421875, + "margin_dpo/margin_std": 27.613842010498047, + "step": 612 + }, + { + "epoch": 0.9001468428781204, + "grad_norm": 50.60771942138672, + "learning_rate": 1.551886292185553e-08, + "logits/chosen": -0.6397769451141357, + "logits/rejected": -0.6355684995651245, + "logps/chosen": -80.78131866455078, + "logps/ref_chosen": -59.72996520996094, + "logps/ref_rejected": -105.10753631591797, + "logps/rejected": -161.82345581054688, + "loss": 0.3682, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13177156448364258, + "margin_dpo/beta_margin_grad_std": 0.19979646801948547, + "margin_dpo/beta_margin_mean": 3.566455841064453, + "margin_dpo/loss_margin_mean": 35.66455841064453, + "margin_dpo/margin_mean": 35.66455841064453, + "margin_dpo/margin_std": 27.262413024902344, + "step": 613 + }, + { + "epoch": 0.9016152716593245, + "grad_norm": 46.63825988769531, + "learning_rate": 1.507684480352292e-08, + "logits/chosen": -0.58838951587677, + "logits/rejected": -0.5823639035224915, + "logps/chosen": -76.70652770996094, + "logps/ref_chosen": -52.93898010253906, + "logps/ref_rejected": -104.67938232421875, + "logps/rejected": -164.14959716796875, + "loss": 0.3003, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11585717648267746, + "margin_dpo/beta_margin_grad_std": 0.16684673726558685, + "margin_dpo/beta_margin_mean": 3.57026743888855, + "margin_dpo/loss_margin_mean": 35.702674865722656, + "margin_dpo/margin_mean": 35.702674865722656, + "margin_dpo/margin_std": 25.349502563476562, + "step": 614 + }, + { + "epoch": 0.9030837004405287, + "grad_norm": 42.50800323486328, + "learning_rate": 1.4641017128809801e-08, + "logits/chosen": -0.5662412047386169, + "logits/rejected": -0.5331077575683594, + "logps/chosen": -87.12669372558594, + "logps/ref_chosen": -65.81727600097656, + "logps/ref_rejected": -95.17749786376953, + "logps/rejected": -146.67713928222656, + "loss": 0.4066, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1578460931777954, + "margin_dpo/beta_margin_grad_std": 0.1820681393146515, + "margin_dpo/beta_margin_mean": 3.019021987915039, + "margin_dpo/loss_margin_mean": 30.19021987915039, + "margin_dpo/margin_mean": 30.19021987915039, + "margin_dpo/margin_std": 23.05301284790039, + "step": 615 + }, + { + "epoch": 0.9045521292217328, + "grad_norm": 77.02394104003906, + "learning_rate": 1.4211391382180637e-08, + "logits/chosen": -0.5957802534103394, + "logits/rejected": -0.5418244004249573, + "logps/chosen": -88.68885040283203, + "logps/ref_chosen": -65.13285827636719, + "logps/ref_rejected": -74.70050048828125, + "logps/rejected": -130.87673950195312, + "loss": 0.4978, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15978117287158966, + "margin_dpo/beta_margin_grad_std": 0.22819003462791443, + "margin_dpo/beta_margin_mean": 3.26202392578125, + "margin_dpo/loss_margin_mean": 32.6202392578125, + "margin_dpo/margin_mean": 32.6202392578125, + "margin_dpo/margin_std": 29.516948699951172, + "step": 616 + }, + { + "epoch": 0.9060205580029369, + "grad_norm": 49.953460693359375, + "learning_rate": 1.378797888467345e-08, + "logits/chosen": -0.5749341249465942, + "logits/rejected": -0.53103107213974, + "logps/chosen": -87.75241088867188, + "logps/ref_chosen": -63.005550384521484, + "logps/ref_rejected": -64.234130859375, + "logps/rejected": -118.99295043945312, + "loss": 0.3848, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14862868189811707, + "margin_dpo/beta_margin_grad_std": 0.17751744389533997, + "margin_dpo/beta_margin_mean": 3.0011959075927734, + "margin_dpo/loss_margin_mean": 30.011959075927734, + "margin_dpo/margin_mean": 30.011959075927734, + "margin_dpo/margin_std": 23.59270477294922, + "step": 617 + }, + { + "epoch": 0.9074889867841409, + "grad_norm": 66.36804962158203, + "learning_rate": 1.3370790793601371e-08, + "logits/chosen": -0.6147041320800781, + "logits/rejected": -0.5852859616279602, + "logps/chosen": -90.93468475341797, + "logps/ref_chosen": -67.10135650634766, + "logps/ref_rejected": -92.15339660644531, + "logps/rejected": -146.77523803710938, + "loss": 0.4572, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16413499414920807, + "margin_dpo/beta_margin_grad_std": 0.19249024987220764, + "margin_dpo/beta_margin_mean": 3.0788497924804688, + "margin_dpo/loss_margin_mean": 30.78849983215332, + "margin_dpo/margin_mean": 30.788501739501953, + "margin_dpo/margin_std": 26.1810245513916, + "step": 618 + }, + { + "epoch": 0.908957415565345, + "grad_norm": 60.24756622314453, + "learning_rate": 1.2959838102258535e-08, + "logits/chosen": -0.5955780744552612, + "logits/rejected": -0.5634878873825073, + "logps/chosen": -79.19235229492188, + "logps/ref_chosen": -55.978233337402344, + "logps/ref_rejected": -93.1854019165039, + "logps/rejected": -149.55165100097656, + "loss": 0.4659, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1654718816280365, + "margin_dpo/beta_margin_grad_std": 0.21678967773914337, + "margin_dpo/beta_margin_mean": 3.3152127265930176, + "margin_dpo/loss_margin_mean": 33.152130126953125, + "margin_dpo/margin_mean": 33.152130126953125, + "margin_dpo/margin_std": 29.86014175415039, + "step": 619 + }, + { + "epoch": 0.9104258443465492, + "grad_norm": 35.56299591064453, + "learning_rate": 1.2555131639630567e-08, + "logits/chosen": -0.6245037317276001, + "logits/rejected": -0.5862281322479248, + "logps/chosen": -79.94758605957031, + "logps/ref_chosen": -59.79750061035156, + "logps/ref_rejected": -78.41075134277344, + "logps/rejected": -134.01303100585938, + "loss": 0.2607, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1101793646812439, + "margin_dpo/beta_margin_grad_std": 0.12655286490917206, + "margin_dpo/beta_margin_mean": 3.5452189445495605, + "margin_dpo/loss_margin_mean": 35.45219039916992, + "margin_dpo/margin_mean": 35.45219039916992, + "margin_dpo/margin_std": 25.871028900146484, + "step": 620 + }, + { + "epoch": 0.9118942731277533, + "grad_norm": 37.061790466308594, + "learning_rate": 1.2156682070109086e-08, + "logits/chosen": -0.606106698513031, + "logits/rejected": -0.5785382986068726, + "logps/chosen": -72.80471801757812, + "logps/ref_chosen": -53.933753967285156, + "logps/ref_rejected": -88.36952209472656, + "logps/rejected": -143.35723876953125, + "loss": 0.3122, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10982675105333328, + "margin_dpo/beta_margin_grad_std": 0.17607754468917847, + "margin_dpo/beta_margin_mean": 3.61167573928833, + "margin_dpo/loss_margin_mean": 36.11675262451172, + "margin_dpo/margin_mean": 36.11675262451172, + "margin_dpo/margin_std": 26.981311798095703, + "step": 621 + }, + { + "epoch": 0.9133627019089574, + "grad_norm": 49.68582534790039, + "learning_rate": 1.1764499893210878e-08, + "logits/chosen": -0.5509716272354126, + "logits/rejected": -0.49232321977615356, + "logps/chosen": -82.90745544433594, + "logps/ref_chosen": -60.28582000732422, + "logps/ref_rejected": -85.51873779296875, + "logps/rejected": -144.81259155273438, + "loss": 0.3885, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13941837847232819, + "margin_dpo/beta_margin_grad_std": 0.1975843608379364, + "margin_dpo/beta_margin_mean": 3.6672213077545166, + "margin_dpo/loss_margin_mean": 36.672210693359375, + "margin_dpo/margin_mean": 36.672210693359375, + "margin_dpo/margin_std": 28.6815185546875, + "step": 622 + }, + { + "epoch": 0.9148311306901615, + "grad_norm": 73.91598510742188, + "learning_rate": 1.1378595443300998e-08, + "logits/chosen": -0.632436990737915, + "logits/rejected": -0.5964562892913818, + "logps/chosen": -88.94279479980469, + "logps/ref_chosen": -64.15696716308594, + "logps/ref_rejected": -85.08304595947266, + "logps/rejected": -140.305419921875, + "loss": 0.5623, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18874922394752502, + "margin_dpo/beta_margin_grad_std": 0.23199373483657837, + "margin_dpo/beta_margin_mean": 3.0436534881591797, + "margin_dpo/loss_margin_mean": 30.436534881591797, + "margin_dpo/margin_mean": 30.436534881591797, + "margin_dpo/margin_std": 29.26456069946289, + "step": 623 + }, + { + "epoch": 0.9162995594713657, + "grad_norm": 69.39524841308594, + "learning_rate": 1.0998978889320582e-08, + "logits/chosen": -0.6819274425506592, + "logits/rejected": -0.6118913888931274, + "logps/chosen": -94.78079986572266, + "logps/ref_chosen": -71.91862487792969, + "logps/ref_rejected": -97.13203430175781, + "logps/rejected": -157.30023193359375, + "loss": 0.4915, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14777633547782898, + "margin_dpo/beta_margin_grad_std": 0.2476031631231308, + "margin_dpo/beta_margin_mean": 3.730602741241455, + "margin_dpo/loss_margin_mean": 37.30602264404297, + "margin_dpo/margin_mean": 37.30602264404297, + "margin_dpo/margin_std": 27.589237213134766, + "step": 624 + }, + { + "epoch": 0.9177679882525698, + "grad_norm": 48.65541458129883, + "learning_rate": 1.0625660234518913e-08, + "logits/chosen": -0.5835287570953369, + "logits/rejected": -0.5436596870422363, + "logps/chosen": -81.64682006835938, + "logps/ref_chosen": -58.342071533203125, + "logps/ref_rejected": -86.09038543701172, + "logps/rejected": -145.12460327148438, + "loss": 0.3486, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13256171345710754, + "margin_dpo/beta_margin_grad_std": 0.18188360333442688, + "margin_dpo/beta_margin_mean": 3.5729477405548096, + "margin_dpo/loss_margin_mean": 35.72947692871094, + "margin_dpo/margin_mean": 35.72947692871094, + "margin_dpo/margin_std": 28.54417610168457, + "step": 625 + }, + { + "epoch": 0.9192364170337739, + "grad_norm": 63.34779739379883, + "learning_rate": 1.0258649316189721e-08, + "logits/chosen": -0.5459762811660767, + "logits/rejected": -0.5098272562026978, + "logps/chosen": -99.06941223144531, + "logps/ref_chosen": -75.11260986328125, + "logps/ref_rejected": -99.18872833251953, + "logps/rejected": -153.42007446289062, + "loss": 0.5183, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18455414474010468, + "margin_dpo/beta_margin_grad_std": 0.21206972002983093, + "margin_dpo/beta_margin_mean": 3.0274548530578613, + "margin_dpo/loss_margin_mean": 30.274547576904297, + "margin_dpo/margin_mean": 30.274547576904297, + "margin_dpo/margin_std": 28.38648223876953, + "step": 626 + }, + { + "epoch": 0.920704845814978, + "grad_norm": 79.35140228271484, + "learning_rate": 9.897955805412e-09, + "logits/chosen": -0.5999346971511841, + "logits/rejected": -0.6070972681045532, + "logps/chosen": -69.15809631347656, + "logps/ref_chosen": -47.74314880371094, + "logps/ref_rejected": -106.75448608398438, + "logps/rejected": -162.14773559570312, + "loss": 0.6092, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1894698441028595, + "margin_dpo/beta_margin_grad_std": 0.2491467446088791, + "margin_dpo/beta_margin_mean": 3.397829055786133, + "margin_dpo/loss_margin_mean": 33.97829055786133, + "margin_dpo/margin_mean": 33.97828674316406, + "margin_dpo/margin_std": 34.131160736083984, + "step": 627 + }, + { + "epoch": 0.922173274596182, + "grad_norm": 40.400997161865234, + "learning_rate": 9.543589206795238e-09, + "logits/chosen": -0.5975438356399536, + "logits/rejected": -0.5776046514511108, + "logps/chosen": -82.31864929199219, + "logps/ref_chosen": -60.182945251464844, + "logps/ref_rejected": -101.55467224121094, + "logps/rejected": -159.41123962402344, + "loss": 0.298, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1191520020365715, + "margin_dpo/beta_margin_grad_std": 0.15398246049880981, + "margin_dpo/beta_margin_mean": 3.572086811065674, + "margin_dpo/loss_margin_mean": 35.72086715698242, + "margin_dpo/margin_mean": 35.72086715698242, + "margin_dpo/margin_std": 25.814367294311523, + "step": 628 + }, + { + "epoch": 0.9236417033773862, + "grad_norm": 62.328609466552734, + "learning_rate": 9.19555885822887e-09, + "logits/chosen": -0.6410259008407593, + "logits/rejected": -0.597222089767456, + "logps/chosen": -86.5196533203125, + "logps/ref_chosen": -64.21353912353516, + "logps/ref_rejected": -91.65367126464844, + "logps/rejected": -145.63623046875, + "loss": 0.4052, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14122983813285828, + "margin_dpo/beta_margin_grad_std": 0.1920766830444336, + "margin_dpo/beta_margin_mean": 3.167644500732422, + "margin_dpo/loss_margin_mean": 31.67644500732422, + "margin_dpo/margin_mean": 31.67644500732422, + "margin_dpo/margin_std": 24.94976043701172, + "step": 629 + }, + { + "epoch": 0.9251101321585903, + "grad_norm": 61.22914505004883, + "learning_rate": 8.85387393063622e-09, + "logits/chosen": -0.662344217300415, + "logits/rejected": -0.6153937578201294, + "logps/chosen": -79.80152130126953, + "logps/ref_chosen": -59.29100036621094, + "logps/ref_rejected": -83.59829711914062, + "logps/rejected": -134.3487091064453, + "loss": 0.464, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16477835178375244, + "margin_dpo/beta_margin_grad_std": 0.2072598934173584, + "margin_dpo/beta_margin_mean": 3.023988962173462, + "margin_dpo/loss_margin_mean": 30.23988914489746, + "margin_dpo/margin_mean": 30.23988914489746, + "margin_dpo/margin_std": 25.428054809570312, + "step": 630 + }, + { + "epoch": 0.9265785609397944, + "grad_norm": 93.05696105957031, + "learning_rate": 8.518543427732949e-09, + "logits/chosen": -0.6291791200637817, + "logits/rejected": -0.586702287197113, + "logps/chosen": -84.07586669921875, + "logps/ref_chosen": -59.45360565185547, + "logps/ref_rejected": -80.95157623291016, + "logps/rejected": -133.77267456054688, + "loss": 0.7308, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.19751125574111938, + "margin_dpo/beta_margin_grad_std": 0.26264500617980957, + "margin_dpo/beta_margin_mean": 2.8198840618133545, + "margin_dpo/loss_margin_mean": 28.198841094970703, + "margin_dpo/margin_mean": 28.198841094970703, + "margin_dpo/margin_std": 29.28610610961914, + "step": 631 + }, + { + "epoch": 0.9280469897209985, + "grad_norm": 86.49762725830078, + "learning_rate": 8.189576185789637e-09, + "logits/chosen": -0.6317383050918579, + "logits/rejected": -0.5975475311279297, + "logps/chosen": -85.93399047851562, + "logps/ref_chosen": -61.35155487060547, + "logps/ref_rejected": -86.16017150878906, + "logps/rejected": -143.37826538085938, + "loss": 0.7104, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16609230637550354, + "margin_dpo/beta_margin_grad_std": 0.26753705739974976, + "margin_dpo/beta_margin_mean": 3.263566255569458, + "margin_dpo/loss_margin_mean": 32.63566207885742, + "margin_dpo/margin_mean": 32.63566207885742, + "margin_dpo/margin_std": 29.368499755859375, + "step": 632 + }, + { + "epoch": 0.9295154185022027, + "grad_norm": 59.74918746948242, + "learning_rate": 7.866980873399015e-09, + "logits/chosen": -0.6477575898170471, + "logits/rejected": -0.6343536376953125, + "logps/chosen": -80.77423095703125, + "logps/ref_chosen": -57.278167724609375, + "logps/ref_rejected": -91.58395385742188, + "logps/rejected": -142.47764587402344, + "loss": 0.5478, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1920245885848999, + "margin_dpo/beta_margin_grad_std": 0.21717044711112976, + "margin_dpo/beta_margin_mean": 2.739762783050537, + "margin_dpo/loss_margin_mean": 27.397626876831055, + "margin_dpo/margin_mean": 27.397626876831055, + "margin_dpo/margin_std": 24.44476890563965, + "step": 633 + }, + { + "epoch": 0.9309838472834068, + "grad_norm": 71.1202392578125, + "learning_rate": 7.550765991247654e-09, + "logits/chosen": -0.5574454069137573, + "logits/rejected": -0.539508581161499, + "logps/chosen": -93.28065490722656, + "logps/ref_chosen": -66.61896514892578, + "logps/ref_rejected": -107.12565612792969, + "logps/rejected": -161.80874633789062, + "loss": 0.6534, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.20834138989448547, + "margin_dpo/beta_margin_grad_std": 0.24892690777778625, + "margin_dpo/beta_margin_mean": 2.802140235900879, + "margin_dpo/loss_margin_mean": 28.021400451660156, + "margin_dpo/margin_mean": 28.021400451660156, + "margin_dpo/margin_std": 29.189456939697266, + "step": 634 + }, + { + "epoch": 0.9324522760646109, + "grad_norm": 49.81635665893555, + "learning_rate": 7.240939871891699e-09, + "logits/chosen": -0.627153754234314, + "logits/rejected": -0.5792471170425415, + "logps/chosen": -96.72550201416016, + "logps/ref_chosen": -73.95551300048828, + "logps/ref_rejected": -82.50045776367188, + "logps/rejected": -133.87303161621094, + "loss": 0.412, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.155021071434021, + "margin_dpo/beta_margin_grad_std": 0.18796978890895844, + "margin_dpo/beta_margin_mean": 2.860257387161255, + "margin_dpo/loss_margin_mean": 28.60257339477539, + "margin_dpo/margin_mean": 28.60257339477539, + "margin_dpo/margin_std": 22.55862808227539, + "step": 635 + }, + { + "epoch": 0.933920704845815, + "grad_norm": 49.317588806152344, + "learning_rate": 6.937510679537628e-09, + "logits/chosen": -0.5662115812301636, + "logits/rejected": -0.5381814241409302, + "logps/chosen": -82.45319366455078, + "logps/ref_chosen": -59.628910064697266, + "logps/ref_rejected": -81.97883605957031, + "logps/rejected": -137.88497924804688, + "loss": 0.3993, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13900163769721985, + "margin_dpo/beta_margin_grad_std": 0.21276052296161652, + "margin_dpo/beta_margin_mean": 3.308185577392578, + "margin_dpo/loss_margin_mean": 33.08185958862305, + "margin_dpo/margin_mean": 33.08185577392578, + "margin_dpo/margin_std": 23.71514129638672, + "step": 636 + }, + { + "epoch": 0.9353891336270191, + "grad_norm": 53.16542434692383, + "learning_rate": 6.640486409826785e-09, + "logits/chosen": -0.5961561799049377, + "logits/rejected": -0.5736096501350403, + "logps/chosen": -73.35490417480469, + "logps/ref_chosen": -49.652687072753906, + "logps/ref_rejected": -98.40513610839844, + "logps/rejected": -155.1796112060547, + "loss": 0.3585, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1328592598438263, + "margin_dpo/beta_margin_grad_std": 0.18699194490909576, + "margin_dpo/beta_margin_mean": 3.3072257041931152, + "margin_dpo/loss_margin_mean": 33.0722541809082, + "margin_dpo/margin_mean": 33.0722541809082, + "margin_dpo/margin_std": 25.20583724975586, + "step": 637 + }, + { + "epoch": 0.9368575624082232, + "grad_norm": 42.64322280883789, + "learning_rate": 6.349874889624962e-09, + "logits/chosen": -0.5751190185546875, + "logits/rejected": -0.5282764434814453, + "logps/chosen": -78.66455841064453, + "logps/ref_chosen": -58.156646728515625, + "logps/ref_rejected": -79.3014907836914, + "logps/rejected": -136.99473571777344, + "loss": 0.3226, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12046533823013306, + "margin_dpo/beta_margin_grad_std": 0.1725090891122818, + "margin_dpo/beta_margin_mean": 3.718533515930176, + "margin_dpo/loss_margin_mean": 37.185333251953125, + "margin_dpo/margin_mean": 37.185333251953125, + "margin_dpo/margin_std": 27.281875610351562, + "step": 638 + }, + { + "epoch": 0.9383259911894273, + "grad_norm": 57.3282470703125, + "learning_rate": 6.065683776815933e-09, + "logits/chosen": -0.5723918676376343, + "logits/rejected": -0.5074343681335449, + "logps/chosen": -97.81383514404297, + "logps/ref_chosen": -72.32319641113281, + "logps/ref_rejected": -74.2749252319336, + "logps/rejected": -130.6868896484375, + "loss": 0.4425, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14578017592430115, + "margin_dpo/beta_margin_grad_std": 0.1988290250301361, + "margin_dpo/beta_margin_mean": 3.092132568359375, + "margin_dpo/loss_margin_mean": 30.921327590942383, + "margin_dpo/margin_mean": 30.921327590942383, + "margin_dpo/margin_std": 24.539897918701172, + "step": 639 + }, + { + "epoch": 0.9397944199706314, + "grad_norm": 45.74781036376953, + "learning_rate": 5.7879205600998296e-09, + "logits/chosen": -0.5912868976593018, + "logits/rejected": -0.5555776357650757, + "logps/chosen": -78.59037780761719, + "logps/ref_chosen": -56.13436508178711, + "logps/ref_rejected": -108.60014343261719, + "logps/rejected": -167.8488006591797, + "loss": 0.3056, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12402527034282684, + "margin_dpo/beta_margin_grad_std": 0.15019166469573975, + "margin_dpo/beta_margin_mean": 3.6792640686035156, + "margin_dpo/loss_margin_mean": 36.792640686035156, + "margin_dpo/margin_mean": 36.792640686035156, + "margin_dpo/margin_std": 29.762346267700195, + "step": 640 + }, + { + "epoch": 0.9412628487518355, + "grad_norm": 51.42761993408203, + "learning_rate": 5.516592558795746e-09, + "logits/chosen": -0.6235780715942383, + "logits/rejected": -0.5653523206710815, + "logps/chosen": -88.91046142578125, + "logps/ref_chosen": -64.99689483642578, + "logps/ref_rejected": -86.99232482910156, + "logps/rejected": -142.96499633789062, + "loss": 0.3758, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14689452946186066, + "margin_dpo/beta_margin_grad_std": 0.16710370779037476, + "margin_dpo/beta_margin_mean": 3.2059097290039062, + "margin_dpo/loss_margin_mean": 32.05909729003906, + "margin_dpo/margin_mean": 32.05909729003906, + "margin_dpo/margin_std": 29.564998626708984, + "step": 641 + }, + { + "epoch": 0.9427312775330396, + "grad_norm": 79.5660629272461, + "learning_rate": 5.251706922648868e-09, + "logits/chosen": -0.5625093579292297, + "logits/rejected": -0.5258715152740479, + "logps/chosen": -90.38214111328125, + "logps/ref_chosen": -65.68924713134766, + "logps/ref_rejected": -110.24205017089844, + "logps/rejected": -170.61810302734375, + "loss": 0.4846, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15378239750862122, + "margin_dpo/beta_margin_grad_std": 0.2159113883972168, + "margin_dpo/beta_margin_mean": 3.5683140754699707, + "margin_dpo/loss_margin_mean": 35.683143615722656, + "margin_dpo/margin_mean": 35.68313980102539, + "margin_dpo/margin_std": 30.738298416137695, + "step": 642 + }, + { + "epoch": 0.9441997063142438, + "grad_norm": 50.391510009765625, + "learning_rate": 4.993270631642038e-09, + "logits/chosen": -0.6333717107772827, + "logits/rejected": -0.6052130460739136, + "logps/chosen": -71.46492004394531, + "logps/ref_chosen": -51.94999694824219, + "logps/ref_rejected": -87.46833801269531, + "logps/rejected": -137.68312072753906, + "loss": 0.4285, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14480799436569214, + "margin_dpo/beta_margin_grad_std": 0.19451884925365448, + "margin_dpo/beta_margin_mean": 3.069986581802368, + "margin_dpo/loss_margin_mean": 30.699865341186523, + "margin_dpo/margin_mean": 30.699865341186523, + "margin_dpo/margin_std": 23.991680145263672, + "step": 643 + }, + { + "epoch": 0.9456681350954479, + "grad_norm": 77.25302124023438, + "learning_rate": 4.741290495811873e-09, + "logits/chosen": -0.562663197517395, + "logits/rejected": -0.5326156616210938, + "logps/chosen": -79.98289489746094, + "logps/ref_chosen": -59.017662048339844, + "logps/ref_rejected": -87.13668823242188, + "logps/rejected": -138.28848266601562, + "loss": 0.5598, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1840367615222931, + "margin_dpo/beta_margin_grad_std": 0.2329137921333313, + "margin_dpo/beta_margin_mean": 3.018655776977539, + "margin_dpo/loss_margin_mean": 30.18655776977539, + "margin_dpo/margin_mean": 30.18655776977539, + "margin_dpo/margin_std": 28.526702880859375, + "step": 644 + }, + { + "epoch": 0.947136563876652, + "grad_norm": 75.70096588134766, + "learning_rate": 4.495773155069299e-09, + "logits/chosen": -0.5764358043670654, + "logits/rejected": -0.5558615922927856, + "logps/chosen": -79.81644439697266, + "logps/ref_chosen": -55.87602233886719, + "logps/ref_rejected": -97.78080749511719, + "logps/rejected": -150.7290496826172, + "loss": 0.5337, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18957525491714478, + "margin_dpo/beta_margin_grad_std": 0.21459272503852844, + "margin_dpo/beta_margin_mean": 2.9007816314697266, + "margin_dpo/loss_margin_mean": 29.007814407348633, + "margin_dpo/margin_mean": 29.007816314697266, + "margin_dpo/margin_std": 27.730712890625, + "step": 645 + }, + { + "epoch": 0.9486049926578561, + "grad_norm": 50.839752197265625, + "learning_rate": 4.256725079024553e-09, + "logits/chosen": -0.6054178476333618, + "logits/rejected": -0.5551047325134277, + "logps/chosen": -84.0325927734375, + "logps/ref_chosen": -61.275787353515625, + "logps/ref_rejected": -77.50580596923828, + "logps/rejected": -133.4116668701172, + "loss": 0.3167, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11927846819162369, + "margin_dpo/beta_margin_grad_std": 0.160105362534523, + "margin_dpo/beta_margin_mean": 3.3149056434631348, + "margin_dpo/loss_margin_mean": 33.14905548095703, + "margin_dpo/margin_mean": 33.14905548095703, + "margin_dpo/margin_std": 22.489887237548828, + "step": 646 + }, + { + "epoch": 0.9500734214390602, + "grad_norm": 81.40752410888672, + "learning_rate": 4.024152566816791e-09, + "logits/chosen": -0.5593730807304382, + "logits/rejected": -0.5357339382171631, + "logps/chosen": -78.91641235351562, + "logps/ref_chosen": -54.852413177490234, + "logps/ref_rejected": -93.5194091796875, + "logps/rejected": -150.42044067382812, + "loss": 0.4999, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16081605851650238, + "margin_dpo/beta_margin_grad_std": 0.2314681112766266, + "margin_dpo/beta_margin_mean": 3.2837038040161133, + "margin_dpo/loss_margin_mean": 32.837039947509766, + "margin_dpo/margin_mean": 32.837039947509766, + "margin_dpo/margin_std": 26.819320678710938, + "step": 647 + }, + { + "epoch": 0.9515418502202643, + "grad_norm": 48.46821212768555, + "learning_rate": 3.798061746947995e-09, + "logits/chosen": -0.61167973279953, + "logits/rejected": -0.6027648448944092, + "logps/chosen": -74.04869842529297, + "logps/ref_chosen": -54.17146682739258, + "logps/ref_rejected": -98.71279907226562, + "logps/rejected": -159.05592346191406, + "loss": 0.3695, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1377037763595581, + "margin_dpo/beta_margin_grad_std": 0.1926315426826477, + "margin_dpo/beta_margin_mean": 4.046590328216553, + "margin_dpo/loss_margin_mean": 40.465904235839844, + "margin_dpo/margin_mean": 40.465904235839844, + "margin_dpo/margin_std": 34.20042037963867, + "step": 648 + }, + { + "epoch": 0.9530102790014684, + "grad_norm": 50.36833572387695, + "learning_rate": 3.5784585771215235e-09, + "logits/chosen": -0.6533620357513428, + "logits/rejected": -0.6218982934951782, + "logps/chosen": -83.10621643066406, + "logps/ref_chosen": -62.4803466796875, + "logps/ref_rejected": -80.07717895507812, + "logps/rejected": -129.4200897216797, + "loss": 0.5299, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1947104036808014, + "margin_dpo/beta_margin_grad_std": 0.2096787691116333, + "margin_dpo/beta_margin_mean": 2.871704339981079, + "margin_dpo/loss_margin_mean": 28.717042922973633, + "margin_dpo/margin_mean": 28.717044830322266, + "margin_dpo/margin_std": 28.58915138244629, + "step": 649 + }, + { + "epoch": 0.9544787077826725, + "grad_norm": 59.41923522949219, + "learning_rate": 3.3653488440851253e-09, + "logits/chosen": -0.5570046901702881, + "logits/rejected": -0.5465147495269775, + "logps/chosen": -80.50581359863281, + "logps/ref_chosen": -56.09281921386719, + "logps/ref_rejected": -98.26483917236328, + "logps/rejected": -159.12442016601562, + "loss": 0.3573, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1336677372455597, + "margin_dpo/beta_margin_grad_std": 0.1835484653711319, + "margin_dpo/beta_margin_mean": 3.6446590423583984, + "margin_dpo/loss_margin_mean": 36.44658660888672, + "margin_dpo/margin_mean": 36.44658660888672, + "margin_dpo/margin_std": 28.654094696044922, + "step": 650 + }, + { + "epoch": 0.9559471365638766, + "grad_norm": 38.3771858215332, + "learning_rate": 3.158738163478475e-09, + "logits/chosen": -0.607953667640686, + "logits/rejected": -0.6089369058609009, + "logps/chosen": -63.09129333496094, + "logps/ref_chosen": -43.42544937133789, + "logps/ref_rejected": -99.9579086303711, + "logps/rejected": -155.23358154296875, + "loss": 0.3196, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1269427090883255, + "margin_dpo/beta_margin_grad_std": 0.16582195460796356, + "margin_dpo/beta_margin_mean": 3.560983657836914, + "margin_dpo/loss_margin_mean": 35.609832763671875, + "margin_dpo/margin_mean": 35.609832763671875, + "margin_dpo/margin_std": 27.03875732421875, + "step": 651 + }, + { + "epoch": 0.9574155653450808, + "grad_norm": 42.76301956176758, + "learning_rate": 2.9586319796851555e-09, + "logits/chosen": -0.641417384147644, + "logits/rejected": -0.6177515983581543, + "logps/chosen": -78.98847961425781, + "logps/ref_chosen": -62.57680892944336, + "logps/ref_rejected": -111.76779174804688, + "logps/rejected": -163.6732177734375, + "loss": 0.3394, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1356583833694458, + "margin_dpo/beta_margin_grad_std": 0.16708874702453613, + "margin_dpo/beta_margin_mean": 3.5493762493133545, + "margin_dpo/loss_margin_mean": 35.49375915527344, + "margin_dpo/margin_mean": 35.49375915527344, + "margin_dpo/margin_std": 28.135786056518555, + "step": 652 + }, + { + "epoch": 0.9588839941262849, + "grad_norm": 52.41037368774414, + "learning_rate": 2.7650355656892166e-09, + "logits/chosen": -0.6357418298721313, + "logits/rejected": -0.6146754026412964, + "logps/chosen": -84.56446838378906, + "logps/ref_chosen": -61.11295700073242, + "logps/ref_rejected": -103.24960327148438, + "logps/rejected": -162.56381225585938, + "loss": 0.3193, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11913929879665375, + "margin_dpo/beta_margin_grad_std": 0.17990511655807495, + "margin_dpo/beta_margin_mean": 3.5862698554992676, + "margin_dpo/loss_margin_mean": 35.86269760131836, + "margin_dpo/margin_mean": 35.862701416015625, + "margin_dpo/margin_std": 25.911727905273438, + "step": 653 + }, + { + "epoch": 0.960352422907489, + "grad_norm": 70.10198974609375, + "learning_rate": 2.577954022936174e-09, + "logits/chosen": -0.6373894810676575, + "logits/rejected": -0.6333979368209839, + "logps/chosen": -87.05609130859375, + "logps/ref_chosen": -61.7281379699707, + "logps/ref_rejected": -98.7738037109375, + "logps/rejected": -153.52032470703125, + "loss": 0.5245, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17562022805213928, + "margin_dpo/beta_margin_grad_std": 0.2261282503604889, + "margin_dpo/beta_margin_mean": 2.9418554306030273, + "margin_dpo/loss_margin_mean": 29.418556213378906, + "margin_dpo/margin_mean": 29.418556213378906, + "margin_dpo/margin_std": 28.580772399902344, + "step": 654 + }, + { + "epoch": 0.9618208516886931, + "grad_norm": 69.47087860107422, + "learning_rate": 2.397392281198729e-09, + "logits/chosen": -0.6162744760513306, + "logits/rejected": -0.6169338226318359, + "logps/chosen": -71.07903289794922, + "logps/ref_chosen": -49.576812744140625, + "logps/ref_rejected": -98.29183197021484, + "logps/rejected": -150.40371704101562, + "loss": 0.5102, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.18301187455654144, + "margin_dpo/beta_margin_grad_std": 0.21452929079532623, + "margin_dpo/beta_margin_mean": 3.060966968536377, + "margin_dpo/loss_margin_mean": 30.609668731689453, + "margin_dpo/margin_mean": 30.609668731689453, + "margin_dpo/margin_std": 29.15388298034668, + "step": 655 + }, + { + "epoch": 0.9632892804698973, + "grad_norm": 40.41490936279297, + "learning_rate": 2.223355098446622e-09, + "logits/chosen": -0.5592731237411499, + "logits/rejected": -0.5631238222122192, + "logps/chosen": -73.43663024902344, + "logps/ref_chosen": -52.54943084716797, + "logps/ref_rejected": -113.67464447021484, + "logps/rejected": -176.38644409179688, + "loss": 0.2391, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.0935378447175026, + "margin_dpo/beta_margin_grad_std": 0.15634706616401672, + "margin_dpo/beta_margin_mean": 4.182460784912109, + "margin_dpo/loss_margin_mean": 41.824607849121094, + "margin_dpo/margin_mean": 41.824607849121094, + "margin_dpo/margin_std": 25.468910217285156, + "step": 656 + }, + { + "epoch": 0.9647577092511013, + "grad_norm": 45.6422233581543, + "learning_rate": 2.055847060721566e-09, + "logits/chosen": -0.5981370210647583, + "logits/rejected": -0.5761264562606812, + "logps/chosen": -68.69126892089844, + "logps/ref_chosen": -46.700538635253906, + "logps/ref_rejected": -97.91487121582031, + "logps/rejected": -157.28271484375, + "loss": 0.3403, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11615358293056488, + "margin_dpo/beta_margin_grad_std": 0.175856813788414, + "margin_dpo/beta_margin_mean": 3.737710952758789, + "margin_dpo/loss_margin_mean": 37.377105712890625, + "margin_dpo/margin_mean": 37.377105712890625, + "margin_dpo/margin_std": 28.65097427368164, + "step": 657 + }, + { + "epoch": 0.9662261380323054, + "grad_norm": 57.90339660644531, + "learning_rate": 1.8948725820160662e-09, + "logits/chosen": -0.6264636516571045, + "logits/rejected": -0.5879380702972412, + "logps/chosen": -86.60824584960938, + "logps/ref_chosen": -60.958213806152344, + "logps/ref_rejected": -95.93949127197266, + "logps/rejected": -156.69830322265625, + "loss": 0.4473, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1491597294807434, + "margin_dpo/beta_margin_grad_std": 0.21673035621643066, + "margin_dpo/beta_margin_mean": 3.5108790397644043, + "margin_dpo/loss_margin_mean": 35.10879135131836, + "margin_dpo/margin_mean": 35.108787536621094, + "margin_dpo/margin_std": 29.642911911010742, + "step": 658 + }, + { + "epoch": 0.9676945668135095, + "grad_norm": 56.96932601928711, + "learning_rate": 1.7404359041573723e-09, + "logits/chosen": -0.5769931077957153, + "logits/rejected": -0.5069276690483093, + "logps/chosen": -96.17684936523438, + "logps/ref_chosen": -76.74298095703125, + "logps/ref_rejected": -87.4709701538086, + "logps/rejected": -141.29806518554688, + "loss": 0.4928, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16625207662582397, + "margin_dpo/beta_margin_grad_std": 0.23161795735359192, + "margin_dpo/beta_margin_mean": 3.4393229484558105, + "margin_dpo/loss_margin_mean": 34.39323043823242, + "margin_dpo/margin_mean": 34.39323043823242, + "margin_dpo/margin_std": 29.151775360107422, + "step": 659 + }, + { + "epoch": 0.9691629955947136, + "grad_norm": 51.775634765625, + "learning_rate": 1.592541096695571e-09, + "logits/chosen": -0.6120225191116333, + "logits/rejected": -0.5639553070068359, + "logps/chosen": -80.46331787109375, + "logps/ref_chosen": -59.047882080078125, + "logps/ref_rejected": -75.96005249023438, + "logps/rejected": -135.1595458984375, + "loss": 0.2938, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11358514428138733, + "margin_dpo/beta_margin_grad_std": 0.17192693054676056, + "margin_dpo/beta_margin_mean": 3.778407096862793, + "margin_dpo/loss_margin_mean": 37.7840690612793, + "margin_dpo/margin_mean": 37.7840690612793, + "margin_dpo/margin_std": 27.653093338012695, + "step": 660 + }, + { + "epoch": 0.9706314243759178, + "grad_norm": 64.97528839111328, + "learning_rate": 1.4511920567963908e-09, + "logits/chosen": -0.5847969055175781, + "logits/rejected": -0.5379676818847656, + "logps/chosen": -71.49467468261719, + "logps/ref_chosen": -50.673973083496094, + "logps/ref_rejected": -86.00569152832031, + "logps/rejected": -141.72848510742188, + "loss": 0.4565, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.14623390138149261, + "margin_dpo/beta_margin_grad_std": 0.21268951892852783, + "margin_dpo/beta_margin_mean": 3.4902100563049316, + "margin_dpo/loss_margin_mean": 34.902099609375, + "margin_dpo/margin_mean": 34.902099609375, + "margin_dpo/margin_std": 29.28716278076172, + "step": 661 + }, + { + "epoch": 0.9720998531571219, + "grad_norm": 51.50436782836914, + "learning_rate": 1.3163925091384532e-09, + "logits/chosen": -0.6173335313796997, + "logits/rejected": -0.5659915208816528, + "logps/chosen": -93.51567077636719, + "logps/ref_chosen": -69.26106262207031, + "logps/ref_rejected": -89.05593872070312, + "logps/rejected": -144.10789489746094, + "loss": 0.3806, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1421985775232315, + "margin_dpo/beta_margin_grad_std": 0.1747194081544876, + "margin_dpo/beta_margin_mean": 3.0797348022460938, + "margin_dpo/loss_margin_mean": 30.797348022460938, + "margin_dpo/margin_mean": 30.797348022460938, + "margin_dpo/margin_std": 25.398778915405273, + "step": 662 + }, + { + "epoch": 0.973568281938326, + "grad_norm": 38.44890594482422, + "learning_rate": 1.1881460058152382e-09, + "logits/chosen": -0.647502064704895, + "logits/rejected": -0.6262093782424927, + "logps/chosen": -83.27813720703125, + "logps/ref_chosen": -64.87891387939453, + "logps/ref_rejected": -113.92536926269531, + "logps/rejected": -165.39273071289062, + "loss": 0.3283, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12441954016685486, + "margin_dpo/beta_margin_grad_std": 0.1589188277721405, + "margin_dpo/beta_margin_mean": 3.306814670562744, + "margin_dpo/loss_margin_mean": 33.068145751953125, + "margin_dpo/margin_mean": 33.068145751953125, + "margin_dpo/margin_std": 24.52547264099121, + "step": 663 + }, + { + "epoch": 0.9750367107195301, + "grad_norm": 69.16484832763672, + "learning_rate": 1.066455926241383e-09, + "logits/chosen": -0.5968215465545654, + "logits/rejected": -0.5680118799209595, + "logps/chosen": -84.52749633789062, + "logps/ref_chosen": -60.88847351074219, + "logps/ref_rejected": -105.521728515625, + "logps/rejected": -166.35784912109375, + "loss": 0.426, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11887051165103912, + "margin_dpo/beta_margin_grad_std": 0.19583344459533691, + "margin_dpo/beta_margin_mean": 3.7197093963623047, + "margin_dpo/loss_margin_mean": 37.19709014892578, + "margin_dpo/margin_mean": 37.19709014892578, + "margin_dpo/margin_std": 27.019386291503906, + "step": 664 + }, + { + "epoch": 0.9765051395007343, + "grad_norm": 42.6618537902832, + "learning_rate": 9.513254770636137e-10, + "logits/chosen": -0.6599475145339966, + "logits/rejected": -0.618366003036499, + "logps/chosen": -81.63275146484375, + "logps/ref_chosen": -60.56413269042969, + "logps/ref_rejected": -84.8088150024414, + "logps/rejected": -137.7115478515625, + "loss": 0.3485, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13592152297496796, + "margin_dpo/beta_margin_grad_std": 0.1685512661933899, + "margin_dpo/beta_margin_mean": 3.183411121368408, + "margin_dpo/loss_margin_mean": 31.834110260009766, + "margin_dpo/margin_mean": 31.834110260009766, + "margin_dpo/margin_std": 23.29065704345703, + "step": 665 + }, + { + "epoch": 0.9779735682819384, + "grad_norm": 60.117515563964844, + "learning_rate": 8.427576920763956e-10, + "logits/chosen": -0.5868717432022095, + "logits/rejected": -0.5477631688117981, + "logps/chosen": -88.28842163085938, + "logps/ref_chosen": -64.41996002197266, + "logps/ref_rejected": -95.89163208007812, + "logps/rejected": -154.99008178710938, + "loss": 0.4224, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1320444643497467, + "margin_dpo/beta_margin_grad_std": 0.18931497633457184, + "margin_dpo/beta_margin_mean": 3.522998809814453, + "margin_dpo/loss_margin_mean": 35.22998809814453, + "margin_dpo/margin_mean": 35.22998809814453, + "margin_dpo/margin_std": 25.960124969482422, + "step": 666 + }, + { + "epoch": 0.9794419970631424, + "grad_norm": 56.65580749511719, + "learning_rate": 7.407554321417764e-10, + "logits/chosen": -0.6088787317276001, + "logits/rejected": -0.5590524673461914, + "logps/chosen": -94.58509063720703, + "logps/ref_chosen": -69.27703094482422, + "logps/ref_rejected": -87.83549499511719, + "logps/rejected": -147.4503173828125, + "loss": 0.3265, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12345196306705475, + "margin_dpo/beta_margin_grad_std": 0.16755497455596924, + "margin_dpo/beta_margin_mean": 3.4306764602661133, + "margin_dpo/loss_margin_mean": 34.3067626953125, + "margin_dpo/margin_mean": 34.3067626953125, + "margin_dpo/margin_std": 24.47415542602539, + "step": 667 + }, + { + "epoch": 0.9809104258443465, + "grad_norm": 70.59870910644531, + "learning_rate": 6.453213851142225e-10, + "logits/chosen": -0.6269364356994629, + "logits/rejected": -0.5885031819343567, + "logps/chosen": -96.2440185546875, + "logps/ref_chosen": -72.60400390625, + "logps/ref_rejected": -103.73905181884766, + "logps/rejected": -160.45053100585938, + "loss": 0.4488, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1537022888660431, + "margin_dpo/beta_margin_grad_std": 0.21864807605743408, + "margin_dpo/beta_margin_mean": 3.3071470260620117, + "margin_dpo/loss_margin_mean": 33.07147216796875, + "margin_dpo/margin_mean": 33.07147216796875, + "margin_dpo/margin_std": 25.884002685546875, + "step": 668 + }, + { + "epoch": 0.9823788546255506, + "grad_norm": 68.7284927368164, + "learning_rate": 5.564580657695939e-10, + "logits/chosen": -0.6374561786651611, + "logits/rejected": -0.5934668183326721, + "logps/chosen": -65.82669067382812, + "logps/ref_chosen": -46.116416931152344, + "logps/ref_rejected": -77.92434692382812, + "logps/rejected": -135.83099365234375, + "loss": 0.5109, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.15599049627780914, + "margin_dpo/beta_margin_grad_std": 0.2399427890777588, + "margin_dpo/beta_margin_mean": 3.8196370601654053, + "margin_dpo/loss_margin_mean": 38.19636917114258, + "margin_dpo/margin_mean": 38.19636917114258, + "margin_dpo/margin_std": 32.66187286376953, + "step": 669 + }, + { + "epoch": 0.9838472834067548, + "grad_norm": 44.16929626464844, + "learning_rate": 4.741678157389739e-10, + "logits/chosen": -0.6017849445343018, + "logits/rejected": -0.5695161819458008, + "logps/chosen": -83.34823608398438, + "logps/ref_chosen": -62.34575653076172, + "logps/ref_rejected": -96.9405517578125, + "logps/rejected": -156.87435913085938, + "loss": 0.2739, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.10903677344322205, + "margin_dpo/beta_margin_grad_std": 0.15663883090019226, + "margin_dpo/beta_margin_mean": 3.8931329250335693, + "margin_dpo/loss_margin_mean": 38.93132781982422, + "margin_dpo/margin_mean": 38.93132781982422, + "margin_dpo/margin_std": 25.873010635375977, + "step": 670 + }, + { + "epoch": 0.9853157121879589, + "grad_norm": 55.043914794921875, + "learning_rate": 3.9845280344705245e-10, + "logits/chosen": -0.6250673532485962, + "logits/rejected": -0.5933674573898315, + "logps/chosen": -72.50352478027344, + "logps/ref_chosen": -48.00010681152344, + "logps/ref_rejected": -83.81932067871094, + "logps/rejected": -143.89370727539062, + "loss": 0.3551, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13739252090454102, + "margin_dpo/beta_margin_grad_std": 0.1742577999830246, + "margin_dpo/beta_margin_mean": 3.5570971965789795, + "margin_dpo/loss_margin_mean": 35.57096862792969, + "margin_dpo/margin_mean": 35.57096862792969, + "margin_dpo/margin_std": 28.424989700317383, + "step": 671 + }, + { + "epoch": 0.986784140969163, + "grad_norm": 66.3819351196289, + "learning_rate": 3.293150240547549e-10, + "logits/chosen": -0.6075701117515564, + "logits/rejected": -0.5688859820365906, + "logps/chosen": -82.87799072265625, + "logps/ref_chosen": -58.583290100097656, + "logps/ref_rejected": -93.14014434814453, + "logps/rejected": -149.91152954101562, + "loss": 0.479, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.17206090688705444, + "margin_dpo/beta_margin_grad_std": 0.21605950593948364, + "margin_dpo/beta_margin_mean": 3.2476677894592285, + "margin_dpo/loss_margin_mean": 32.47667694091797, + "margin_dpo/margin_mean": 32.47667694091797, + "margin_dpo/margin_std": 29.792341232299805, + "step": 672 + }, + { + "epoch": 0.9882525697503671, + "grad_norm": 41.944400787353516, + "learning_rate": 2.6675629940689504e-10, + "logits/chosen": -0.6093118786811829, + "logits/rejected": -0.5795783996582031, + "logps/chosen": -68.00104522705078, + "logps/ref_chosen": -46.72320556640625, + "logps/ref_rejected": -85.29623413085938, + "logps/rejected": -143.71388244628906, + "loss": 0.3057, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.12082840502262115, + "margin_dpo/beta_margin_grad_std": 0.16586080193519592, + "margin_dpo/beta_margin_mean": 3.7139804363250732, + "margin_dpo/loss_margin_mean": 37.139801025390625, + "margin_dpo/margin_mean": 37.13980484008789, + "margin_dpo/margin_std": 27.277408599853516, + "step": 673 + }, + { + "epoch": 0.9897209985315712, + "grad_norm": 37.851444244384766, + "learning_rate": 2.1077827798404725e-10, + "logits/chosen": -0.56818687915802, + "logits/rejected": -0.5401608943939209, + "logps/chosen": -67.67996215820312, + "logps/ref_chosen": -45.445526123046875, + "logps/ref_rejected": -70.04593658447266, + "logps/rejected": -130.27374267578125, + "loss": 0.2841, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11634500324726105, + "margin_dpo/beta_margin_grad_std": 0.15289074182510376, + "margin_dpo/beta_margin_mean": 3.799337387084961, + "margin_dpo/loss_margin_mean": 37.993370056152344, + "margin_dpo/margin_mean": 37.993370056152344, + "margin_dpo/margin_std": 28.400920867919922, + "step": 674 + }, + { + "epoch": 0.9911894273127754, + "grad_norm": 61.00739669799805, + "learning_rate": 1.6138243485910863e-10, + "logits/chosen": -0.5657342672348022, + "logits/rejected": -0.5385361909866333, + "logps/chosen": -65.09626770019531, + "logps/ref_chosen": -44.17628479003906, + "logps/ref_rejected": -74.09197998046875, + "logps/rejected": -134.56707763671875, + "loss": 0.3851, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.11753740161657333, + "margin_dpo/beta_margin_grad_std": 0.20709985494613647, + "margin_dpo/beta_margin_mean": 3.9555118083953857, + "margin_dpo/loss_margin_mean": 39.555118560791016, + "margin_dpo/margin_mean": 39.555118560791016, + "margin_dpo/margin_std": 27.490642547607422, + "step": 675 + }, + { + "epoch": 0.9926578560939795, + "grad_norm": 76.93489837646484, + "learning_rate": 1.1857007165852472e-10, + "logits/chosen": -0.639100968837738, + "logits/rejected": -0.6050753593444824, + "logps/chosen": -96.79466247558594, + "logps/ref_chosen": -71.39852142333984, + "logps/ref_rejected": -88.3587646484375, + "logps/rejected": -150.10198974609375, + "loss": 0.4104, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13633592426776886, + "margin_dpo/beta_margin_grad_std": 0.18744832277297974, + "margin_dpo/beta_margin_mean": 3.6347103118896484, + "margin_dpo/loss_margin_mean": 36.34709930419922, + "margin_dpo/margin_mean": 36.34709930419922, + "margin_dpo/margin_std": 28.50853729248047, + "step": 676 + }, + { + "epoch": 0.9941262848751835, + "grad_norm": 66.0276107788086, + "learning_rate": 8.23423165278725e-11, + "logits/chosen": -0.6146172285079956, + "logits/rejected": -0.565468966960907, + "logps/chosen": -79.82300567626953, + "logps/ref_chosen": -56.52743911743164, + "logps/ref_rejected": -78.22654724121094, + "logps/rejected": -138.8525390625, + "loss": 0.4429, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1386735886335373, + "margin_dpo/beta_margin_grad_std": 0.21394288539886475, + "margin_dpo/beta_margin_mean": 3.733041286468506, + "margin_dpo/loss_margin_mean": 37.330413818359375, + "margin_dpo/margin_mean": 37.330413818359375, + "margin_dpo/margin_std": 28.455005645751953, + "step": 677 + }, + { + "epoch": 0.9955947136563876, + "grad_norm": 50.60527801513672, + "learning_rate": 5.270012410216185e-11, + "logits/chosen": -0.5987369418144226, + "logits/rejected": -0.5752243995666504, + "logps/chosen": -68.12297058105469, + "logps/ref_chosen": -46.13447570800781, + "logps/ref_rejected": -80.60462951660156, + "logps/rejected": -139.23318481445312, + "loss": 0.4505, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.1656286120414734, + "margin_dpo/beta_margin_grad_std": 0.21251779794692993, + "margin_dpo/beta_margin_mean": 3.6640052795410156, + "margin_dpo/loss_margin_mean": 36.640052795410156, + "margin_dpo/margin_mean": 36.640052795410156, + "margin_dpo/margin_std": 31.15386390686035, + "step": 678 + }, + { + "epoch": 0.9970631424375918, + "grad_norm": 47.600643157958984, + "learning_rate": 2.9644275480772416e-11, + "logits/chosen": -0.6062077283859253, + "logits/rejected": -0.5733453035354614, + "logps/chosen": -72.9251937866211, + "logps/ref_chosen": -50.294921875, + "logps/ref_rejected": -76.59813690185547, + "logps/rejected": -136.07611083984375, + "loss": 0.3291, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.112078458070755, + "margin_dpo/beta_margin_grad_std": 0.17374977469444275, + "margin_dpo/beta_margin_mean": 3.6847691535949707, + "margin_dpo/loss_margin_mean": 36.84769058227539, + "margin_dpo/margin_mean": 36.84769058227539, + "margin_dpo/margin_std": 26.915252685546875, + "step": 679 + }, + { + "epoch": 0.9985315712187959, + "grad_norm": 55.98041915893555, + "learning_rate": 1.31753782067201e-11, + "logits/chosen": -0.6170350313186646, + "logits/rejected": -0.5841037034988403, + "logps/chosen": -99.70010375976562, + "logps/ref_chosen": -76.91569519042969, + "logps/ref_rejected": -112.384765625, + "logps/rejected": -171.31031799316406, + "loss": 0.3861, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.13496683537960052, + "margin_dpo/beta_margin_grad_std": 0.1988377571105957, + "margin_dpo/beta_margin_mean": 3.6141152381896973, + "margin_dpo/loss_margin_mean": 36.141151428222656, + "margin_dpo/margin_mean": 36.141151428222656, + "margin_dpo/margin_std": 29.207448959350586, + "step": 680 + }, + { + "epoch": 1.0, + "grad_norm": 52.392547607421875, + "learning_rate": 3.2938662507808745e-12, + "logits/chosen": -0.6597200632095337, + "logits/rejected": -0.6327718496322632, + "logps/chosen": -84.40997314453125, + "logps/ref_chosen": -60.957279205322266, + "logps/ref_rejected": -88.5579833984375, + "logps/rejected": -143.79640197753906, + "loss": 0.4602, + "margin_dpo/beta": 0.10000000149011612, + "margin_dpo/beta_margin_grad_mean": -0.16065430641174316, + "margin_dpo/beta_margin_grad_std": 0.20822513103485107, + "margin_dpo/beta_margin_mean": 3.1785736083984375, + "margin_dpo/loss_margin_mean": 31.785736083984375, + "margin_dpo/margin_mean": 31.785736083984375, + "margin_dpo/margin_std": 28.091190338134766, + "step": 681 + }, + { + "epoch": 1.0, + "step": 681, + "total_flos": 0.0, + "train_loss": 0.572698849610295, + "train_runtime": 1998.3785, + "train_samples_per_second": 21.817, + "train_steps_per_second": 0.341 + } + ], + "logging_steps": 1, + "max_steps": 681, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}