From 8ec22ea0ed0eb1f60d3f9c81fe5405f6c1dba300 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sat, 2 May 2026 03:26:59 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: EleutherAI/PinkElephants-OpenHermes-13B-DPO Source: Original Platform --- .gitattributes | 66 + README.md | 48 + config.json | 28 + configuration.json | 1 + generation_config.json | 8 + latest | 1 + model-00001-of-00006.safetensors | 3 + model-00002-of-00006.safetensors | 3 + model-00003-of-00006.safetensors | 3 + model-00004-of-00006.safetensors | 3 + model-00005-of-00006.safetensors | 3 + model-00006-of-00006.safetensors | 3 + model.safetensors.index.json | 370 ++ rng_state_0.pth | 3 + rng_state_1.pth | 3 + rng_state_2.pth | 3 + rng_state_3.pth | 3 + rng_state_4.pth | 3 + rng_state_5.pth | 3 + rng_state_6.pth | 3 + rng_state_7.pth | 3 + scheduler.pt | 3 + special_tokens_map.json | 30 + tokenizer.json | 3 + tokenizer.model | 3 + tokenizer_config.json | 44 + trainer_state.json | 10173 +++++++++++++++++++++++++++++ training_args.bin | 3 + zero_to_fp32.py | 587 ++ 29 files changed, 11410 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 config.json create mode 100644 configuration.json create mode 100644 generation_config.json create mode 100644 latest create mode 100644 model-00001-of-00006.safetensors create mode 100644 model-00002-of-00006.safetensors create mode 100644 model-00003-of-00006.safetensors create mode 100644 model-00004-of-00006.safetensors create mode 100644 model-00005-of-00006.safetensors create mode 100644 model-00006-of-00006.safetensors create mode 100644 model.safetensors.index.json create mode 100644 rng_state_0.pth create mode 100644 rng_state_1.pth create mode 100644 rng_state_2.pth create mode 100644 rng_state_3.pth create mode 100644 rng_state_4.pth create mode 100644 rng_state_5.pth create mode 100644 rng_state_6.pth create mode 100644 rng_state_7.pth create mode 100644 scheduler.pt create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer.model create mode 100644 tokenizer_config.json create mode 100644 trainer_state.json create mode 100644 training_args.bin create mode 100644 zero_to_fp32.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..55140f0 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,66 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text + + +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text + +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text + + +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +rng_state_5.pth filter=lfs diff=lfs merge=lfs -text +rng_state_6.pth filter=lfs diff=lfs merge=lfs -text +rng_state_2.pth filter=lfs diff=lfs merge=lfs -text +rng_state_1.pth filter=lfs diff=lfs merge=lfs -text +rng_state_0.pth filter=lfs diff=lfs merge=lfs -text +rng_state_4.pth filter=lfs diff=lfs merge=lfs -text +model-00004-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00003-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +model-00002-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +training_args.bin filter=lfs diff=lfs merge=lfs -text +model-00001-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +scheduler.pt filter=lfs diff=lfs merge=lfs -text +rng_state_7.pth filter=lfs diff=lfs merge=lfs -text +model-00006-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.model filter=lfs diff=lfs merge=lfs -text +rng_state_3.pth filter=lfs diff=lfs merge=lfs -text +model-00005-of-00006.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..64197d8 --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ +--- +license: Apache License 2.0 +tags: [] + +#model-type: +##如 gpt、phi、llama、chatglm、baichuan 等 +#- gpt + +#domain: +##如 nlp、cv、audio、multi-modal +#- nlp + +#language: +##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa +#- cn + +#metrics: +##如 CIDEr、Blue、ROUGE 等 +#- CIDEr + +#tags: +##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他 +#- pretrained + +#tools: +##如 vllm、fastchat、llamacpp、AdaSeq 等 +#- vllm +--- +### 当前模型的贡献者未提供更加详细的模型介绍。模型文件和权重,可浏览“模型文件”页面获取。 +#### 您可以通过如下git clone命令,或者ModelScope SDK来下载模型 + +SDK下载 +```bash +#安装ModelScope +pip install modelscope +``` +```python +#SDK模型下载 +from modelscope import snapshot_download +model_dir = snapshot_download('EleutherAI/PinkElephants-OpenHermes-13B-DPO') +``` +Git下载 +``` +#Git模型下载 +git clone https://www.modelscope.cn/EleutherAI/PinkElephants-OpenHermes-13B-DPO.git +``` + +

如果您是本模型的贡献者,我们邀请您根据模型贡献文档,及时完善模型卡片内容。

\ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..8512451 --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "_name_or_path": "teknium/OpenHermes-13B", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 4096, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "num_key_value_heads": 40, + "pad_token_id": 0, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.35.0", + "use_cache": false, + "vocab_size": 32000 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..972c9af --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.35.0", + "use_cache": false +} diff --git a/latest b/latest new file mode 100644 index 0000000..4745bfb --- /dev/null +++ b/latest @@ -0,0 +1 @@ +global_step6500 \ No newline at end of file diff --git a/model-00001-of-00006.safetensors b/model-00001-of-00006.safetensors new file mode 100644 index 0000000..fdcceea --- /dev/null +++ b/model-00001-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9494f81611b38277e07f1e5e93c63e89c6ef4717a94f9be941500e303f4e81e +size 4978265800 diff --git a/model-00002-of-00006.safetensors b/model-00002-of-00006.safetensors new file mode 100644 index 0000000..b802155 --- /dev/null +++ b/model-00002-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a2a212aa2b1b7492dc54f42273312820889a9ebd9f590a030c87637854e3606 +size 4970422232 diff --git a/model-00003-of-00006.safetensors b/model-00003-of-00006.safetensors new file mode 100644 index 0000000..d3a878b --- /dev/null +++ b/model-00003-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa9b88f73398e139ca9c55826b3de777f110416328b963ec0637860aa2cbe22 +size 4970422256 diff --git a/model-00004-of-00006.safetensors b/model-00004-of-00006.safetensors new file mode 100644 index 0000000..03e78cf --- /dev/null +++ b/model-00004-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82e91ffdcf62872f0f872bc3a9661c8837f09c95a8a3f672b3c6b0f1009e8dbf +size 4933701504 diff --git a/model-00005-of-00006.safetensors b/model-00005-of-00006.safetensors new file mode 100644 index 0000000..8818393 --- /dev/null +++ b/model-00005-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1db400dbf365123c3c41f32ff50f9d380983e67c5f155eaafc82d2bb8be5016 +size 4933722216 diff --git a/model-00006-of-00006.safetensors b/model-00006-of-00006.safetensors new file mode 100644 index 0000000..68d78de --- /dev/null +++ b/model-00006-of-00006.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05949d39a9931faee77308f43e0208c9011d70aaa1716be622561b4df8fd35a9 +size 1245236920 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..ef4ed90 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,370 @@ +{ + "metadata": { + "total_size": 26031728640 + }, + "weight_map": { + "lm_head.weight": "model-00006-of-00006.safetensors", + "model.embed_tokens.weight": "model-00001-of-00006.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.18.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.19.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00006.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00006.safetensors", + "model.layers.23.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.24.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.25.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.26.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.27.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.28.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.29.input_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00006.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.30.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00006.safetensors", + "model.layers.31.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.32.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.33.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.34.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.35.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.36.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.37.input_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00005-of-00006.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.38.input_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00005-of-00006.safetensors", + "model.layers.39.input_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00006-of-00006.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00006-of-00006.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00006.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00006.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00006.safetensors", + "model.norm.weight": "model-00006-of-00006.safetensors" + } +} diff --git a/rng_state_0.pth b/rng_state_0.pth new file mode 100644 index 0000000..0d826d3 --- /dev/null +++ b/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d10fe450773f1bfc442c051433c46b75203beb4f261ea106640dd02b730dfcd +size 15920 diff --git a/rng_state_1.pth b/rng_state_1.pth new file mode 100644 index 0000000..d40d722 --- /dev/null +++ b/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d6913afebee5ea7c0b375d24c5aae84d15d4e69523b629695d83987d9c5e484 +size 15920 diff --git a/rng_state_2.pth b/rng_state_2.pth new file mode 100644 index 0000000..f0275ae --- /dev/null +++ b/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b16e7360977fc3a7be9740450e17584b42a77ed49def679c85413fb84041e74 +size 15920 diff --git a/rng_state_3.pth b/rng_state_3.pth new file mode 100644 index 0000000..04f10f3 --- /dev/null +++ b/rng_state_3.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36cd5127e4b1946476f3a1ce696a6802d13f9c062c3eb0f460c5ad2b1002c155 +size 15920 diff --git a/rng_state_4.pth b/rng_state_4.pth new file mode 100644 index 0000000..8797e2c --- /dev/null +++ b/rng_state_4.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:978b5131b1fe3a9610265d74480f807116bb044fedb28283e0029567ed596060 +size 15920 diff --git a/rng_state_5.pth b/rng_state_5.pth new file mode 100644 index 0000000..895ef8c --- /dev/null +++ b/rng_state_5.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e4b0e66c15eda4ee7435a0783459f2a9bb3c769d9de8389f0fae2c15e37404f +size 15920 diff --git a/rng_state_6.pth b/rng_state_6.pth new file mode 100644 index 0000000..40d99e3 --- /dev/null +++ b/rng_state_6.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8db72bb0c9f2d3c97cfc7b20b301a71ebc503c80a578361050df420e046dbed +size 15920 diff --git a/rng_state_7.pth b/rng_state_7.pth new file mode 100644 index 0000000..91b90ed --- /dev/null +++ b/rng_state_7.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97f53acf6df19e9c1160129575dd0e091690f405ec63cd48a44ad5453020c35 +size 15920 diff --git a/scheduler.pt b/scheduler.pt new file mode 100644 index 0000000..8f09598 --- /dev/null +++ b/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bfbe4d211503feff2ea8dbc44623bc1d327cbbf36eed4024007c27047a437a8 +size 1064 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..8bedc05 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..21779d6 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd04f0eadf90287bd26e1a183ac487d8a141b09b06aecb7725bbdd343640f2e +size 1842767 diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000..6c00c74 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..67e4172 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,44 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": false, + "model_max_length": 2048, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": "", + "use_default_system_prompt": true, + "use_fast": true +} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..4d8d8c7 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,10173 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.967020426794477, + "eval_steps": 100, + "global_step": 6500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.5662100456621e-10, + "logits/chosen": -2.4946703910827637, + "logits/rejected": -2.335416316986084, + "logps/chosen": -85.90689086914062, + "logps/rejected": -62.35003662109375, + "loss": 0.6931, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4.5662100456621e-09, + "logits/chosen": -2.267062187194824, + "logits/rejected": -1.9090423583984375, + "logps/chosen": -88.730712890625, + "logps/rejected": -68.25267028808594, + "loss": 0.6976, + "rewards/accuracies": 0.5138888955116272, + "rewards/chosen": 0.0035505560226738453, + "rewards/margins": 0.019558124244213104, + "rewards/rejected": -0.016007568687200546, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 9.1324200913242e-09, + "logits/chosen": -2.2504024505615234, + "logits/rejected": -1.9175926446914673, + "logps/chosen": -94.94258880615234, + "logps/rejected": -70.55738067626953, + "loss": 0.6949, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.02249746397137642, + "rewards/margins": 0.018389523029327393, + "rewards/rejected": 0.004107940010726452, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 1.36986301369863e-08, + "logits/chosen": -2.2938480377197266, + "logits/rejected": -1.9231021404266357, + "logps/chosen": -89.45714569091797, + "logps/rejected": -70.13645935058594, + "loss": 0.6899, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": 0.025471080094575882, + "rewards/margins": 0.030790437012910843, + "rewards/rejected": -0.005319356918334961, + "step": 30 + }, + { + "epoch": 0.02, + "learning_rate": 1.82648401826484e-08, + "logits/chosen": -2.323638439178467, + "logits/rejected": -1.9527565240859985, + "logps/chosen": -92.12770080566406, + "logps/rejected": -68.82365417480469, + "loss": 0.6845, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.022075748071074486, + "rewards/margins": 0.03521919250488281, + "rewards/rejected": -0.013143444433808327, + "step": 40 + }, + { + "epoch": 0.02, + "learning_rate": 2.28310502283105e-08, + "logits/chosen": -2.302112340927124, + "logits/rejected": -1.9624712467193604, + "logps/chosen": -89.31111907958984, + "logps/rejected": -67.56095123291016, + "loss": 0.6716, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.02959294244647026, + "rewards/margins": 0.04926164075732231, + "rewards/rejected": -0.01966869831085205, + "step": 50 + }, + { + "epoch": 0.03, + "learning_rate": 2.73972602739726e-08, + "logits/chosen": -2.3690874576568604, + "logits/rejected": -2.003035068511963, + "logps/chosen": -94.92839050292969, + "logps/rejected": -72.64738464355469, + "loss": 0.6588, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.028924476355314255, + "rewards/margins": 0.0910693034529686, + "rewards/rejected": -0.06214482709765434, + "step": 60 + }, + { + "epoch": 0.03, + "learning_rate": 3.19634703196347e-08, + "logits/chosen": -2.251183032989502, + "logits/rejected": -1.8819090127944946, + "logps/chosen": -91.9073257446289, + "logps/rejected": -69.90731811523438, + "loss": 0.6357, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": 0.06686282157897949, + "rewards/margins": 0.17646007239818573, + "rewards/rejected": -0.10959725081920624, + "step": 70 + }, + { + "epoch": 0.04, + "learning_rate": 3.65296803652968e-08, + "logits/chosen": -2.2135210037231445, + "logits/rejected": -1.8786967992782593, + "logps/chosen": -96.14964294433594, + "logps/rejected": -70.20441436767578, + "loss": 0.5988, + "rewards/accuracies": 0.8125, + "rewards/chosen": 0.11508840322494507, + "rewards/margins": 0.2152937352657318, + "rewards/rejected": -0.10020533949136734, + "step": 80 + }, + { + "epoch": 0.04, + "learning_rate": 4.10958904109589e-08, + "logits/chosen": -2.2189507484436035, + "logits/rejected": -1.9139974117279053, + "logps/chosen": -83.40045928955078, + "logps/rejected": -64.86124420166016, + "loss": 0.5451, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.12101010233163834, + "rewards/margins": 0.3227211534976959, + "rewards/rejected": -0.2017110288143158, + "step": 90 + }, + { + "epoch": 0.05, + "learning_rate": 4.5662100456621e-08, + "logits/chosen": -2.3013663291931152, + "logits/rejected": -1.9398345947265625, + "logps/chosen": -89.51341247558594, + "logps/rejected": -67.42799377441406, + "loss": 0.5166, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1514771282672882, + "rewards/margins": 0.42826494574546814, + "rewards/rejected": -0.27678781747817993, + "step": 100 + }, + { + "epoch": 0.05, + "eval_logits/chosen": -2.261293411254883, + "eval_logits/rejected": -1.9334125518798828, + "eval_logps/chosen": -88.47880554199219, + "eval_logps/rejected": -67.38658905029297, + "eval_loss": 0.5020039081573486, + "eval_rewards/accuracies": 0.9497206807136536, + "eval_rewards/chosen": 0.17589102685451508, + "eval_rewards/margins": 0.4537213146686554, + "eval_rewards/rejected": -0.2778303027153015, + "eval_runtime": 310.5221, + "eval_samples_per_second": 9.217, + "eval_steps_per_second": 0.576, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 5.02283105022831e-08, + "logits/chosen": -2.3438382148742676, + "logits/rejected": -1.9776771068572998, + "logps/chosen": -84.6239013671875, + "logps/rejected": -64.25120544433594, + "loss": 0.4884, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.23512157797813416, + "rewards/margins": 0.4826792776584625, + "rewards/rejected": -0.24755771458148956, + "step": 110 + }, + { + "epoch": 0.05, + "learning_rate": 5.47945205479452e-08, + "logits/chosen": -2.305763006210327, + "logits/rejected": -1.9242515563964844, + "logps/chosen": -93.78631591796875, + "logps/rejected": -71.03646087646484, + "loss": 0.452, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.25619563460350037, + "rewards/margins": 0.7088645100593567, + "rewards/rejected": -0.45266884565353394, + "step": 120 + }, + { + "epoch": 0.06, + "learning_rate": 5.93607305936073e-08, + "logits/chosen": -2.1994524002075195, + "logits/rejected": -1.8348830938339233, + "logps/chosen": -93.8993911743164, + "logps/rejected": -67.64842224121094, + "loss": 0.3697, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.37854477763175964, + "rewards/margins": 0.9585170745849609, + "rewards/rejected": -0.5799722671508789, + "step": 130 + }, + { + "epoch": 0.06, + "learning_rate": 6.39269406392694e-08, + "logits/chosen": -2.339900493621826, + "logits/rejected": -1.9880163669586182, + "logps/chosen": -91.68956756591797, + "logps/rejected": -70.57650756835938, + "loss": 0.3139, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4103256165981293, + "rewards/margins": 1.110944390296936, + "rewards/rejected": -0.7006188631057739, + "step": 140 + }, + { + "epoch": 0.07, + "learning_rate": 6.84931506849315e-08, + "logits/chosen": -2.3096134662628174, + "logits/rejected": -1.9507678747177124, + "logps/chosen": -88.14106750488281, + "logps/rejected": -69.87593841552734, + "loss": 0.2834, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5618699789047241, + "rewards/margins": 1.299116849899292, + "rewards/rejected": -0.7372468113899231, + "step": 150 + }, + { + "epoch": 0.07, + "learning_rate": 7.30593607305936e-08, + "logits/chosen": -2.2230865955352783, + "logits/rejected": -1.8755073547363281, + "logps/chosen": -93.29566955566406, + "logps/rejected": -71.75144958496094, + "loss": 0.2673, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.5255166292190552, + "rewards/margins": 1.4965600967407227, + "rewards/rejected": -0.9710434079170227, + "step": 160 + }, + { + "epoch": 0.08, + "learning_rate": 7.76255707762557e-08, + "logits/chosen": -2.1584527492523193, + "logits/rejected": -1.9142663478851318, + "logps/chosen": -84.84449768066406, + "logps/rejected": -71.8950424194336, + "loss": 0.2645, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.43970975279808044, + "rewards/margins": 1.3599587678909302, + "rewards/rejected": -0.9202489852905273, + "step": 170 + }, + { + "epoch": 0.08, + "learning_rate": 8.21917808219178e-08, + "logits/chosen": -2.317610740661621, + "logits/rejected": -1.8754488229751587, + "logps/chosen": -94.67909240722656, + "logps/rejected": -69.68304443359375, + "loss": 0.2408, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.7285944819450378, + "rewards/margins": 1.5651540756225586, + "rewards/rejected": -0.8365596532821655, + "step": 180 + }, + { + "epoch": 0.09, + "learning_rate": 8.67579908675799e-08, + "logits/chosen": -2.3835504055023193, + "logits/rejected": -2.005877733230591, + "logps/chosen": -89.22205352783203, + "logps/rejected": -72.03932189941406, + "loss": 0.2142, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7577417492866516, + "rewards/margins": 1.8872630596160889, + "rewards/rejected": -1.1295212507247925, + "step": 190 + }, + { + "epoch": 0.09, + "learning_rate": 9.1324200913242e-08, + "logits/chosen": -2.2697577476501465, + "logits/rejected": -1.892371416091919, + "logps/chosen": -89.17048645019531, + "logps/rejected": -69.54945373535156, + "loss": 0.1947, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7742541432380676, + "rewards/margins": 1.9032939672470093, + "rewards/rejected": -1.1290397644042969, + "step": 200 + }, + { + "epoch": 0.09, + "eval_logits/chosen": -2.2516348361968994, + "eval_logits/rejected": -1.9177496433258057, + "eval_logps/chosen": -87.51004028320312, + "eval_logps/rejected": -69.3072738647461, + "eval_loss": 0.19914411008358002, + "eval_rewards/accuracies": 0.9748603105545044, + "eval_rewards/chosen": 0.6602736115455627, + "eval_rewards/margins": 1.898452877998352, + "eval_rewards/rejected": -1.2381792068481445, + "eval_runtime": 222.9167, + "eval_samples_per_second": 12.839, + "eval_steps_per_second": 0.803, + "step": 200 + }, + { + "epoch": 0.1, + "learning_rate": 9.58904109589041e-08, + "logits/chosen": -2.25944185256958, + "logits/rejected": -1.9700183868408203, + "logps/chosen": -84.00562286376953, + "logps/rejected": -68.82413482666016, + "loss": 0.1838, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.6912348866462708, + "rewards/margins": 1.9548532962799072, + "rewards/rejected": -1.2636187076568604, + "step": 210 + }, + { + "epoch": 0.1, + "learning_rate": 1.004566210045662e-07, + "logits/chosen": -2.2063567638397217, + "logits/rejected": -1.9247395992279053, + "logps/chosen": -83.2778091430664, + "logps/rejected": -68.1301498413086, + "loss": 0.1654, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.7392381429672241, + "rewards/margins": 2.103783130645752, + "rewards/rejected": -1.3645451068878174, + "step": 220 + }, + { + "epoch": 0.1, + "learning_rate": 1.050228310502283e-07, + "logits/chosen": -2.196837902069092, + "logits/rejected": -1.8360687494277954, + "logps/chosen": -90.70558166503906, + "logps/rejected": -71.97917175292969, + "loss": 0.138, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.6917696595191956, + "rewards/margins": 2.567314386367798, + "rewards/rejected": -1.875544786453247, + "step": 230 + }, + { + "epoch": 0.11, + "learning_rate": 1.095890410958904e-07, + "logits/chosen": -2.269440174102783, + "logits/rejected": -1.8552277088165283, + "logps/chosen": -91.484130859375, + "logps/rejected": -67.85257720947266, + "loss": 0.1148, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.0015159845352173, + "rewards/margins": 2.8821816444396973, + "rewards/rejected": -1.8806654214859009, + "step": 240 + }, + { + "epoch": 0.11, + "learning_rate": 1.141552511415525e-07, + "logits/chosen": -2.2505125999450684, + "logits/rejected": -1.8530277013778687, + "logps/chosen": -91.98687744140625, + "logps/rejected": -76.97599792480469, + "loss": 0.0979, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.9848436117172241, + "rewards/margins": 3.093383550643921, + "rewards/rejected": -2.1085400581359863, + "step": 250 + }, + { + "epoch": 0.12, + "learning_rate": 1.187214611872146e-07, + "logits/chosen": -2.3471381664276123, + "logits/rejected": -1.9955825805664062, + "logps/chosen": -89.78094482421875, + "logps/rejected": -75.41387939453125, + "loss": 0.0891, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.1348358392715454, + "rewards/margins": 3.3899853229522705, + "rewards/rejected": -2.2551493644714355, + "step": 260 + }, + { + "epoch": 0.12, + "learning_rate": 1.232876712328767e-07, + "logits/chosen": -2.2680814266204834, + "logits/rejected": -1.8932344913482666, + "logps/chosen": -90.12066650390625, + "logps/rejected": -74.62397766113281, + "loss": 0.0802, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1657724380493164, + "rewards/margins": 3.604767322540283, + "rewards/rejected": -2.438995122909546, + "step": 270 + }, + { + "epoch": 0.13, + "learning_rate": 1.278538812785388e-07, + "logits/chosen": -2.276060104370117, + "logits/rejected": -1.9517319202423096, + "logps/chosen": -91.58979797363281, + "logps/rejected": -80.03755187988281, + "loss": 0.067, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0883013010025024, + "rewards/margins": 4.193854808807373, + "rewards/rejected": -3.1055538654327393, + "step": 280 + }, + { + "epoch": 0.13, + "learning_rate": 1.324200913242009e-07, + "logits/chosen": -2.288156509399414, + "logits/rejected": -1.8913819789886475, + "logps/chosen": -92.2828598022461, + "logps/rejected": -77.56204986572266, + "loss": 0.0674, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.2855546474456787, + "rewards/margins": 3.8462796211242676, + "rewards/rejected": -2.560725212097168, + "step": 290 + }, + { + "epoch": 0.14, + "learning_rate": 1.36986301369863e-07, + "logits/chosen": -2.221187114715576, + "logits/rejected": -1.8750922679901123, + "logps/chosen": -84.05517578125, + "logps/rejected": -73.2014389038086, + "loss": 0.0662, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3618335723876953, + "rewards/margins": 4.3457865715026855, + "rewards/rejected": -2.9839529991149902, + "step": 300 + }, + { + "epoch": 0.14, + "eval_logits/chosen": -2.245551586151123, + "eval_logits/rejected": -1.9041495323181152, + "eval_logps/chosen": -86.4066162109375, + "eval_logps/rejected": -73.35151672363281, + "eval_loss": 0.06390678137540817, + "eval_rewards/accuracies": 0.9804469347000122, + "eval_rewards/chosen": 1.2119877338409424, + "eval_rewards/margins": 4.47228479385376, + "eval_rewards/rejected": -3.260296106338501, + "eval_runtime": 197.1717, + "eval_samples_per_second": 14.515, + "eval_steps_per_second": 0.908, + "step": 300 + }, + { + "epoch": 0.14, + "learning_rate": 1.415525114155251e-07, + "logits/chosen": -2.24013090133667, + "logits/rejected": -1.8544371128082275, + "logps/chosen": -88.1822280883789, + "logps/rejected": -72.43907928466797, + "loss": 0.0602, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3825366497039795, + "rewards/margins": 4.868518829345703, + "rewards/rejected": -3.4859824180603027, + "step": 310 + }, + { + "epoch": 0.15, + "learning_rate": 1.461187214611872e-07, + "logits/chosen": -2.2278833389282227, + "logits/rejected": -1.8756189346313477, + "logps/chosen": -93.53022003173828, + "logps/rejected": -76.30734252929688, + "loss": 0.0517, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.2675020694732666, + "rewards/margins": 4.621975898742676, + "rewards/rejected": -3.35447359085083, + "step": 320 + }, + { + "epoch": 0.15, + "learning_rate": 1.506849315068493e-07, + "logits/chosen": -2.2458999156951904, + "logits/rejected": -1.9483330249786377, + "logps/chosen": -84.49108123779297, + "logps/rejected": -74.85401153564453, + "loss": 0.0488, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4710012674331665, + "rewards/margins": 5.068787574768066, + "rewards/rejected": -3.5977866649627686, + "step": 330 + }, + { + "epoch": 0.16, + "learning_rate": 1.552511415525114e-07, + "logits/chosen": -2.2143704891204834, + "logits/rejected": -1.8886018991470337, + "logps/chosen": -85.12834167480469, + "logps/rejected": -73.5596694946289, + "loss": 0.0535, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3029371500015259, + "rewards/margins": 5.119471073150635, + "rewards/rejected": -3.8165335655212402, + "step": 340 + }, + { + "epoch": 0.16, + "learning_rate": 1.598173515981735e-07, + "logits/chosen": -2.2415523529052734, + "logits/rejected": -1.9033533334732056, + "logps/chosen": -81.90745544433594, + "logps/rejected": -78.7509536743164, + "loss": 0.0511, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.247246503829956, + "rewards/margins": 5.400745391845703, + "rewards/rejected": -4.153499126434326, + "step": 350 + }, + { + "epoch": 0.16, + "learning_rate": 1.643835616438356e-07, + "logits/chosen": -2.198545455932617, + "logits/rejected": -1.8948042392730713, + "logps/chosen": -85.43161010742188, + "logps/rejected": -76.58003997802734, + "loss": 0.0389, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.3219624757766724, + "rewards/margins": 5.153651237487793, + "rewards/rejected": -3.8316891193389893, + "step": 360 + }, + { + "epoch": 0.17, + "learning_rate": 1.689497716894977e-07, + "logits/chosen": -2.288311004638672, + "logits/rejected": -1.879122018814087, + "logps/chosen": -88.61750793457031, + "logps/rejected": -73.41519927978516, + "loss": 0.0448, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.605739951133728, + "rewards/margins": 5.448731899261475, + "rewards/rejected": -3.8429923057556152, + "step": 370 + }, + { + "epoch": 0.17, + "learning_rate": 1.735159817351598e-07, + "logits/chosen": -2.2476701736450195, + "logits/rejected": -1.837656021118164, + "logps/chosen": -88.90941619873047, + "logps/rejected": -76.27139282226562, + "loss": 0.027, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.609424352645874, + "rewards/margins": 6.268812656402588, + "rewards/rejected": -4.659388065338135, + "step": 380 + }, + { + "epoch": 0.18, + "learning_rate": 1.780821917808219e-07, + "logits/chosen": -2.256505012512207, + "logits/rejected": -1.9018818140029907, + "logps/chosen": -87.40512084960938, + "logps/rejected": -78.49671936035156, + "loss": 0.0365, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3038495779037476, + "rewards/margins": 5.88823938369751, + "rewards/rejected": -4.584390163421631, + "step": 390 + }, + { + "epoch": 0.18, + "learning_rate": 1.82648401826484e-07, + "logits/chosen": -2.2472777366638184, + "logits/rejected": -1.9034316539764404, + "logps/chosen": -87.68411254882812, + "logps/rejected": -83.73296356201172, + "loss": 0.0284, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.4865331649780273, + "rewards/margins": 6.618298530578613, + "rewards/rejected": -5.131765842437744, + "step": 400 + }, + { + "epoch": 0.18, + "eval_logits/chosen": -2.2571330070495605, + "eval_logits/rejected": -1.9162304401397705, + "eval_logps/chosen": -86.22880554199219, + "eval_logps/rejected": -77.06842803955078, + "eval_loss": 0.034718479961156845, + "eval_rewards/accuracies": 0.9832402467727661, + "eval_rewards/chosen": 1.300887942314148, + "eval_rewards/margins": 6.419642925262451, + "eval_rewards/rejected": -5.118754863739014, + "eval_runtime": 283.8557, + "eval_samples_per_second": 10.083, + "eval_steps_per_second": 0.631, + "step": 400 + }, + { + "epoch": 0.19, + "learning_rate": 1.872146118721461e-07, + "logits/chosen": -2.230461597442627, + "logits/rejected": -1.7934761047363281, + "logps/chosen": -95.64036560058594, + "logps/rejected": -79.05517578125, + "loss": 0.028, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 1.7179588079452515, + "rewards/margins": 6.512901306152344, + "rewards/rejected": -4.7949419021606445, + "step": 410 + }, + { + "epoch": 0.19, + "learning_rate": 1.917808219178082e-07, + "logits/chosen": -2.286062240600586, + "logits/rejected": -1.9140151739120483, + "logps/chosen": -85.09687042236328, + "logps/rejected": -79.01274871826172, + "loss": 0.0303, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2032204866409302, + "rewards/margins": 6.1523003578186035, + "rewards/rejected": -4.949079990386963, + "step": 420 + }, + { + "epoch": 0.2, + "learning_rate": 1.963470319634703e-07, + "logits/chosen": -2.296107053756714, + "logits/rejected": -2.0091071128845215, + "logps/chosen": -82.93006896972656, + "logps/rejected": -80.10784912109375, + "loss": 0.0268, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 1.1454616785049438, + "rewards/margins": 6.973064422607422, + "rewards/rejected": -5.827603340148926, + "step": 430 + }, + { + "epoch": 0.2, + "learning_rate": 2.009132420091324e-07, + "logits/chosen": -2.2207303047180176, + "logits/rejected": -1.8763904571533203, + "logps/chosen": -82.83280181884766, + "logps/rejected": -79.92660522460938, + "loss": 0.0291, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.1346409320831299, + "rewards/margins": 7.1732892990112305, + "rewards/rejected": -6.03864860534668, + "step": 440 + }, + { + "epoch": 0.21, + "learning_rate": 2.054794520547945e-07, + "logits/chosen": -2.164816379547119, + "logits/rejected": -1.7991693019866943, + "logps/chosen": -91.07575988769531, + "logps/rejected": -82.17328643798828, + "loss": 0.0295, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.567754864692688, + "rewards/margins": 8.110410690307617, + "rewards/rejected": -6.542654991149902, + "step": 450 + }, + { + "epoch": 0.21, + "learning_rate": 2.100456621004566e-07, + "logits/chosen": -2.278972864151001, + "logits/rejected": -1.9182837009429932, + "logps/chosen": -91.42366790771484, + "logps/rejected": -88.95767974853516, + "loss": 0.0314, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1288248300552368, + "rewards/margins": 7.684876441955566, + "rewards/rejected": -6.556051731109619, + "step": 460 + }, + { + "epoch": 0.21, + "learning_rate": 2.146118721461187e-07, + "logits/chosen": -2.3190386295318604, + "logits/rejected": -1.961912751197815, + "logps/chosen": -86.6361312866211, + "logps/rejected": -81.09564971923828, + "loss": 0.0202, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.5480232238769531, + "rewards/margins": 7.930706024169922, + "rewards/rejected": -6.382682800292969, + "step": 470 + }, + { + "epoch": 0.22, + "learning_rate": 2.191780821917808e-07, + "logits/chosen": -2.3331542015075684, + "logits/rejected": -1.9054124355316162, + "logps/chosen": -91.41481018066406, + "logps/rejected": -83.68817138671875, + "loss": 0.0218, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3129695653915405, + "rewards/margins": 8.34068775177002, + "rewards/rejected": -7.027717590332031, + "step": 480 + }, + { + "epoch": 0.22, + "learning_rate": 2.237442922374429e-07, + "logits/chosen": -2.2767910957336426, + "logits/rejected": -1.91777765750885, + "logps/chosen": -90.29415130615234, + "logps/rejected": -84.94145965576172, + "loss": 0.017, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.661865234375, + "rewards/margins": 8.428221702575684, + "rewards/rejected": -6.766357421875, + "step": 490 + }, + { + "epoch": 0.23, + "learning_rate": 2.28310502283105e-07, + "logits/chosen": -2.2446134090423584, + "logits/rejected": -1.8638511896133423, + "logps/chosen": -88.12340545654297, + "logps/rejected": -83.9640884399414, + "loss": 0.0164, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.3303194046020508, + "rewards/margins": 8.892141342163086, + "rewards/rejected": -7.561821937561035, + "step": 500 + }, + { + "epoch": 0.23, + "eval_logits/chosen": -2.269374132156372, + "eval_logits/rejected": -1.9262473583221436, + "eval_logps/chosen": -86.5768814086914, + "eval_logps/rejected": -82.61319732666016, + "eval_loss": 0.018967095762491226, + "eval_rewards/accuracies": 0.9860334992408752, + "eval_rewards/chosen": 1.126853346824646, + "eval_rewards/margins": 9.017987251281738, + "eval_rewards/rejected": -7.891134262084961, + "eval_runtime": 180.5741, + "eval_samples_per_second": 15.849, + "eval_steps_per_second": 0.991, + "step": 500 + }, + { + "epoch": 0.23, + "learning_rate": 2.328767123287671e-07, + "logits/chosen": -2.2802085876464844, + "logits/rejected": -1.9185457229614258, + "logps/chosen": -81.77207946777344, + "logps/rejected": -80.97681427001953, + "loss": 0.0197, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.417055368423462, + "rewards/margins": 9.611557960510254, + "rewards/rejected": -8.194501876831055, + "step": 510 + }, + { + "epoch": 0.24, + "learning_rate": 2.374429223744292e-07, + "logits/chosen": -2.312295436859131, + "logits/rejected": -1.9153810739517212, + "logps/chosen": -91.66407775878906, + "logps/rejected": -87.04930877685547, + "loss": 0.0133, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.3187003135681152, + "rewards/margins": 9.435572624206543, + "rewards/rejected": -8.11687183380127, + "step": 520 + }, + { + "epoch": 0.24, + "learning_rate": 2.420091324200913e-07, + "logits/chosen": -2.347360134124756, + "logits/rejected": -1.9737800359725952, + "logps/chosen": -92.10830688476562, + "logps/rejected": -89.24776458740234, + "loss": 0.016, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5000791549682617, + "rewards/margins": 10.65346622467041, + "rewards/rejected": -9.153387069702148, + "step": 530 + }, + { + "epoch": 0.25, + "learning_rate": 2.465753424657534e-07, + "logits/chosen": -2.2161240577697754, + "logits/rejected": -1.819411039352417, + "logps/chosen": -89.9305648803711, + "logps/rejected": -87.90519714355469, + "loss": 0.0117, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.882406234741211, + "rewards/margins": 10.416455268859863, + "rewards/rejected": -8.534049034118652, + "step": 540 + }, + { + "epoch": 0.25, + "learning_rate": 2.511415525114155e-07, + "logits/chosen": -2.3072893619537354, + "logits/rejected": -1.928308129310608, + "logps/chosen": -88.01408386230469, + "logps/rejected": -83.06895446777344, + "loss": 0.0133, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1510813236236572, + "rewards/margins": 10.02571964263916, + "rewards/rejected": -8.87463665008545, + "step": 550 + }, + { + "epoch": 0.26, + "learning_rate": 2.557077625570776e-07, + "logits/chosen": -2.2718453407287598, + "logits/rejected": -1.8856366872787476, + "logps/chosen": -90.56663513183594, + "logps/rejected": -86.86662292480469, + "loss": 0.0137, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.4503662586212158, + "rewards/margins": 11.014634132385254, + "rewards/rejected": -9.5642671585083, + "step": 560 + }, + { + "epoch": 0.26, + "learning_rate": 2.602739726027397e-07, + "logits/chosen": -2.2816548347473145, + "logits/rejected": -1.8435026407241821, + "logps/chosen": -94.76255798339844, + "logps/rejected": -88.23798370361328, + "loss": 0.0171, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4174392223358154, + "rewards/margins": 10.936891555786133, + "rewards/rejected": -9.519450187683105, + "step": 570 + }, + { + "epoch": 0.26, + "learning_rate": 2.648401826484018e-07, + "logits/chosen": -2.298529863357544, + "logits/rejected": -1.8752195835113525, + "logps/chosen": -88.09405517578125, + "logps/rejected": -88.1880874633789, + "loss": 0.0106, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.359735131263733, + "rewards/margins": 11.649232864379883, + "rewards/rejected": -10.289498329162598, + "step": 580 + }, + { + "epoch": 0.27, + "learning_rate": 2.694063926940639e-07, + "logits/chosen": -2.2168471813201904, + "logits/rejected": -1.908395528793335, + "logps/chosen": -89.10639953613281, + "logps/rejected": -90.54367065429688, + "loss": 0.0091, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3336710929870605, + "rewards/margins": 12.000402450561523, + "rewards/rejected": -10.666732788085938, + "step": 590 + }, + { + "epoch": 0.27, + "learning_rate": 2.73972602739726e-07, + "logits/chosen": -2.3078534603118896, + "logits/rejected": -1.9278669357299805, + "logps/chosen": -85.4991683959961, + "logps/rejected": -89.22947692871094, + "loss": 0.0115, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5495243072509766, + "rewards/margins": 12.214988708496094, + "rewards/rejected": -10.665464401245117, + "step": 600 + }, + { + "epoch": 0.27, + "eval_logits/chosen": -2.258920431137085, + "eval_logits/rejected": -1.9018107652664185, + "eval_logps/chosen": -86.94231414794922, + "eval_logps/rejected": -88.5583267211914, + "eval_loss": 0.01211754884570837, + "eval_rewards/accuracies": 0.9860334992408752, + "eval_rewards/chosen": 0.9441364407539368, + "eval_rewards/margins": 11.807838439941406, + "eval_rewards/rejected": -10.863702774047852, + "eval_runtime": 220.3515, + "eval_samples_per_second": 12.988, + "eval_steps_per_second": 0.812, + "step": 600 + }, + { + "epoch": 0.28, + "learning_rate": 2.785388127853881e-07, + "logits/chosen": -2.3034229278564453, + "logits/rejected": -1.9104959964752197, + "logps/chosen": -89.28177642822266, + "logps/rejected": -89.79473114013672, + "loss": 0.0112, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.2887121438980103, + "rewards/margins": 11.897204399108887, + "rewards/rejected": -10.608492851257324, + "step": 610 + }, + { + "epoch": 0.28, + "learning_rate": 2.831050228310502e-07, + "logits/chosen": -2.3329930305480957, + "logits/rejected": -1.9547516107559204, + "logps/chosen": -87.00670623779297, + "logps/rejected": -91.48796081542969, + "loss": 0.0116, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.2360261678695679, + "rewards/margins": 12.600028991699219, + "rewards/rejected": -11.364001274108887, + "step": 620 + }, + { + "epoch": 0.29, + "learning_rate": 2.876712328767123e-07, + "logits/chosen": -2.298924446105957, + "logits/rejected": -1.964695930480957, + "logps/chosen": -91.54945373535156, + "logps/rejected": -96.8219985961914, + "loss": 0.0097, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5358260869979858, + "rewards/margins": 13.666036605834961, + "rewards/rejected": -12.130212783813477, + "step": 630 + }, + { + "epoch": 0.29, + "learning_rate": 2.922374429223744e-07, + "logits/chosen": -2.3074567317962646, + "logits/rejected": -1.9056323766708374, + "logps/chosen": -89.87752532958984, + "logps/rejected": -90.3302230834961, + "loss": 0.0129, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.6936411261558533, + "rewards/margins": 11.324541091918945, + "rewards/rejected": -10.630899429321289, + "step": 640 + }, + { + "epoch": 0.3, + "learning_rate": 2.968036529680365e-07, + "logits/chosen": -2.310959815979004, + "logits/rejected": -1.8844079971313477, + "logps/chosen": -89.85198974609375, + "logps/rejected": -88.82755279541016, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.92197585105896, + "rewards/margins": 13.281471252441406, + "rewards/rejected": -11.359495162963867, + "step": 650 + }, + { + "epoch": 0.3, + "learning_rate": 2.998477929984779e-07, + "logits/chosen": -2.326324462890625, + "logits/rejected": -1.9201898574829102, + "logps/chosen": -86.74824523925781, + "logps/rejected": -93.16512298583984, + "loss": 0.0083, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1350452899932861, + "rewards/margins": 13.680140495300293, + "rewards/rejected": -12.545095443725586, + "step": 660 + }, + { + "epoch": 0.31, + "learning_rate": 2.993404363267377e-07, + "logits/chosen": -2.307976484298706, + "logits/rejected": -1.9049756526947021, + "logps/chosen": -91.18403625488281, + "logps/rejected": -93.76152801513672, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3413939476013184, + "rewards/margins": 14.000112533569336, + "rewards/rejected": -12.658717155456543, + "step": 670 + }, + { + "epoch": 0.31, + "learning_rate": 2.9883307965499743e-07, + "logits/chosen": -2.2899010181427, + "logits/rejected": -1.9124386310577393, + "logps/chosen": -91.78596496582031, + "logps/rejected": -97.47313690185547, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.643972635269165, + "rewards/margins": 14.497393608093262, + "rewards/rejected": -12.853422164916992, + "step": 680 + }, + { + "epoch": 0.31, + "learning_rate": 2.983257229832572e-07, + "logits/chosen": -2.278542995452881, + "logits/rejected": -1.8497650623321533, + "logps/chosen": -89.2038803100586, + "logps/rejected": -89.34355926513672, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.009284496307373, + "rewards/margins": 14.792158126831055, + "rewards/rejected": -12.782875061035156, + "step": 690 + }, + { + "epoch": 0.32, + "learning_rate": 2.9781836631151696e-07, + "logits/chosen": -2.255171060562134, + "logits/rejected": -1.9044491052627563, + "logps/chosen": -89.71915435791016, + "logps/rejected": -97.62041473388672, + "loss": 0.006, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.1625474691390991, + "rewards/margins": 13.750628471374512, + "rewards/rejected": -12.588080406188965, + "step": 700 + }, + { + "epoch": 0.32, + "eval_logits/chosen": -2.2662999629974365, + "eval_logits/rejected": -1.9091564416885376, + "eval_logps/chosen": -86.36480712890625, + "eval_logps/rejected": -93.07726287841797, + "eval_loss": 0.00890163704752922, + "eval_rewards/accuracies": 0.9888268113136292, + "eval_rewards/chosen": 1.2328906059265137, + "eval_rewards/margins": 14.356060981750488, + "eval_rewards/rejected": -13.123170852661133, + "eval_runtime": 201.1096, + "eval_samples_per_second": 14.231, + "eval_steps_per_second": 0.89, + "step": 700 + }, + { + "epoch": 0.32, + "learning_rate": 2.9731100963977676e-07, + "logits/chosen": -2.2573180198669434, + "logits/rejected": -1.8857762813568115, + "logps/chosen": -86.557861328125, + "logps/rejected": -94.78699493408203, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.315459132194519, + "rewards/margins": 14.209829330444336, + "rewards/rejected": -12.894371032714844, + "step": 710 + }, + { + "epoch": 0.33, + "learning_rate": 2.968036529680365e-07, + "logits/chosen": -2.325437068939209, + "logits/rejected": -1.954026460647583, + "logps/chosen": -85.6775131225586, + "logps/rejected": -95.70713806152344, + "loss": 0.0078, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7716138362884521, + "rewards/margins": 15.646145820617676, + "rewards/rejected": -13.874531745910645, + "step": 720 + }, + { + "epoch": 0.33, + "learning_rate": 2.962962962962963e-07, + "logits/chosen": -2.3093421459198, + "logits/rejected": -1.8865350484848022, + "logps/chosen": -86.63150787353516, + "logps/rejected": -93.17677307128906, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0186963081359863, + "rewards/margins": 15.363825798034668, + "rewards/rejected": -13.345129013061523, + "step": 730 + }, + { + "epoch": 0.34, + "learning_rate": 2.9578893962455603e-07, + "logits/chosen": -2.2457454204559326, + "logits/rejected": -1.8727748394012451, + "logps/chosen": -84.77641296386719, + "logps/rejected": -90.2676010131836, + "loss": 0.0081, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4204827547073364, + "rewards/margins": 14.20788288116455, + "rewards/rejected": -12.787399291992188, + "step": 740 + }, + { + "epoch": 0.34, + "learning_rate": 2.952815829528158e-07, + "logits/chosen": -2.308354616165161, + "logits/rejected": -1.9623510837554932, + "logps/chosen": -83.89095306396484, + "logps/rejected": -93.6553955078125, + "loss": 0.0102, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9429199695587158, + "rewards/margins": 14.841961860656738, + "rewards/rejected": -12.899042129516602, + "step": 750 + }, + { + "epoch": 0.35, + "learning_rate": 2.9477422628107556e-07, + "logits/chosen": -2.3661086559295654, + "logits/rejected": -1.9924736022949219, + "logps/chosen": -87.33646392822266, + "logps/rejected": -95.00645446777344, + "loss": 0.0145, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.3574576377868652, + "rewards/margins": 14.781530380249023, + "rewards/rejected": -12.424072265625, + "step": 760 + }, + { + "epoch": 0.35, + "learning_rate": 2.9426686960933536e-07, + "logits/chosen": -2.2825839519500732, + "logits/rejected": -1.8536045551300049, + "logps/chosen": -93.97390747070312, + "logps/rejected": -94.27307891845703, + "loss": 0.008, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.272422194480896, + "rewards/margins": 14.426435470581055, + "rewards/rejected": -13.154012680053711, + "step": 770 + }, + { + "epoch": 0.36, + "learning_rate": 2.937595129375951e-07, + "logits/chosen": -2.228724956512451, + "logits/rejected": -1.8590246438980103, + "logps/chosen": -86.09639739990234, + "logps/rejected": -92.32320404052734, + "loss": 0.0058, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.6447618007659912, + "rewards/margins": 14.473971366882324, + "rewards/rejected": -12.82921028137207, + "step": 780 + }, + { + "epoch": 0.36, + "learning_rate": 2.932521562658549e-07, + "logits/chosen": -2.298750400543213, + "logits/rejected": -1.9722936153411865, + "logps/chosen": -88.01515197753906, + "logps/rejected": -96.683349609375, + "loss": 0.0066, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.2021758556365967, + "rewards/margins": 15.400471687316895, + "rewards/rejected": -13.198295593261719, + "step": 790 + }, + { + "epoch": 0.37, + "learning_rate": 2.9274479959411463e-07, + "logits/chosen": -2.2984695434570312, + "logits/rejected": -1.9286425113677979, + "logps/chosen": -84.20467376708984, + "logps/rejected": -96.1876449584961, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.15755558013916, + "rewards/margins": 15.103589057922363, + "rewards/rejected": -12.946032524108887, + "step": 800 + }, + { + "epoch": 0.37, + "eval_logits/chosen": -2.2589728832244873, + "eval_logits/rejected": -1.9111113548278809, + "eval_logps/chosen": -85.02589416503906, + "eval_logps/rejected": -92.66677856445312, + "eval_loss": 0.00817781314253807, + "eval_rewards/accuracies": 0.9888268113136292, + "eval_rewards/chosen": 1.902347207069397, + "eval_rewards/margins": 14.820280075073242, + "eval_rewards/rejected": -12.917930603027344, + "eval_runtime": 235.1892, + "eval_samples_per_second": 12.169, + "eval_steps_per_second": 0.761, + "step": 800 + }, + { + "epoch": 0.37, + "learning_rate": 2.922374429223744e-07, + "logits/chosen": -2.246121883392334, + "logits/rejected": -1.878603219985962, + "logps/chosen": -84.80223083496094, + "logps/rejected": -96.83042907714844, + "loss": 0.0154, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4696574211120605, + "rewards/margins": 15.41651439666748, + "rewards/rejected": -12.946856498718262, + "step": 810 + }, + { + "epoch": 0.37, + "learning_rate": 2.9173008625063416e-07, + "logits/chosen": -2.2706353664398193, + "logits/rejected": -1.885000467300415, + "logps/chosen": -88.46263122558594, + "logps/rejected": -95.60710144042969, + "loss": 0.0061, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.5691077709198, + "rewards/margins": 16.42782211303711, + "rewards/rejected": -13.858716011047363, + "step": 820 + }, + { + "epoch": 0.38, + "learning_rate": 2.9122272957889396e-07, + "logits/chosen": -2.2218751907348633, + "logits/rejected": -1.8037551641464233, + "logps/chosen": -89.38809204101562, + "logps/rejected": -99.0447006225586, + "loss": 0.0107, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.3626868724823, + "rewards/margins": 15.406193733215332, + "rewards/rejected": -13.04350757598877, + "step": 830 + }, + { + "epoch": 0.38, + "learning_rate": 2.907153729071537e-07, + "logits/chosen": -2.2497596740722656, + "logits/rejected": -1.7971522808074951, + "logps/chosen": -91.6142807006836, + "logps/rejected": -95.75736236572266, + "loss": 0.0077, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9224580526351929, + "rewards/margins": 16.020370483398438, + "rewards/rejected": -14.09791088104248, + "step": 840 + }, + { + "epoch": 0.39, + "learning_rate": 2.902080162354135e-07, + "logits/chosen": -2.2165656089782715, + "logits/rejected": -1.902951955795288, + "logps/chosen": -79.86293029785156, + "logps/rejected": -97.59088134765625, + "loss": 0.0073, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5969226360321045, + "rewards/margins": 15.788568496704102, + "rewards/rejected": -14.191644668579102, + "step": 850 + }, + { + "epoch": 0.39, + "learning_rate": 2.8970065956367323e-07, + "logits/chosen": -2.272202968597412, + "logits/rejected": -1.8873332738876343, + "logps/chosen": -87.25418853759766, + "logps/rejected": -99.46211242675781, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7722752094268799, + "rewards/margins": 16.73154640197754, + "rewards/rejected": -14.959269523620605, + "step": 860 + }, + { + "epoch": 0.4, + "learning_rate": 2.89193302891933e-07, + "logits/chosen": -2.255979061126709, + "logits/rejected": -1.9493907690048218, + "logps/chosen": -86.21080017089844, + "logps/rejected": -106.2105941772461, + "loss": 0.008, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.9701576232910156, + "rewards/margins": 18.300521850585938, + "rewards/rejected": -17.330366134643555, + "step": 870 + }, + { + "epoch": 0.4, + "learning_rate": 2.8868594622019276e-07, + "logits/chosen": -2.2402877807617188, + "logits/rejected": -1.801593542098999, + "logps/chosen": -91.86135864257812, + "logps/rejected": -99.11498260498047, + "loss": 0.0049, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.0486648082733154, + "rewards/margins": 17.628559112548828, + "rewards/rejected": -15.579893112182617, + "step": 880 + }, + { + "epoch": 0.41, + "learning_rate": 2.8817858954845256e-07, + "logits/chosen": -2.2116286754608154, + "logits/rejected": -1.8907750844955444, + "logps/chosen": -86.34685516357422, + "logps/rejected": -105.34354400634766, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.269397735595703, + "rewards/margins": 18.360477447509766, + "rewards/rejected": -16.091083526611328, + "step": 890 + }, + { + "epoch": 0.41, + "learning_rate": 2.876712328767123e-07, + "logits/chosen": -2.2881391048431396, + "logits/rejected": -1.9034688472747803, + "logps/chosen": -85.5262222290039, + "logps/rejected": -103.30674743652344, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0433995723724365, + "rewards/margins": 19.068531036376953, + "rewards/rejected": -17.025129318237305, + "step": 900 + }, + { + "epoch": 0.41, + "eval_logits/chosen": -2.2466113567352295, + "eval_logits/rejected": -1.893018364906311, + "eval_logps/chosen": -85.62364959716797, + "eval_logps/rejected": -100.12027740478516, + "eval_loss": 0.007255914621055126, + "eval_rewards/accuracies": 0.9888268113136292, + "eval_rewards/chosen": 1.6034660339355469, + "eval_rewards/margins": 18.248144149780273, + "eval_rewards/rejected": -16.644678115844727, + "eval_runtime": 217.5149, + "eval_samples_per_second": 13.158, + "eval_steps_per_second": 0.823, + "step": 900 + }, + { + "epoch": 0.42, + "learning_rate": 2.871638762049721e-07, + "logits/chosen": -2.2870774269104004, + "logits/rejected": -1.9350963830947876, + "logps/chosen": -92.18787384033203, + "logps/rejected": -107.0401611328125, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6550416946411133, + "rewards/margins": 18.991727828979492, + "rewards/rejected": -17.336687088012695, + "step": 910 + }, + { + "epoch": 0.42, + "learning_rate": 2.8665651953323183e-07, + "logits/chosen": -2.281367063522339, + "logits/rejected": -1.8545904159545898, + "logps/chosen": -85.82614135742188, + "logps/rejected": -100.07356262207031, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7310631275177, + "rewards/margins": 19.34614372253418, + "rewards/rejected": -16.615079879760742, + "step": 920 + }, + { + "epoch": 0.42, + "learning_rate": 2.861491628614916e-07, + "logits/chosen": -2.313906192779541, + "logits/rejected": -1.910033941268921, + "logps/chosen": -85.05974578857422, + "logps/rejected": -101.42959594726562, + "loss": 0.0071, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.475045919418335, + "rewards/margins": 17.93846893310547, + "rewards/rejected": -16.463422775268555, + "step": 930 + }, + { + "epoch": 0.43, + "learning_rate": 2.8564180618975136e-07, + "logits/chosen": -2.234340190887451, + "logits/rejected": -1.8709052801132202, + "logps/chosen": -90.11567687988281, + "logps/rejected": -107.35555267333984, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.179823398590088, + "rewards/margins": 19.71477699279785, + "rewards/rejected": -17.53495216369629, + "step": 940 + }, + { + "epoch": 0.43, + "learning_rate": 2.8513444951801116e-07, + "logits/chosen": -2.2462267875671387, + "logits/rejected": -1.8993927240371704, + "logps/chosen": -86.99429321289062, + "logps/rejected": -108.23291015625, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4878250360488892, + "rewards/margins": 20.112104415893555, + "rewards/rejected": -18.624279022216797, + "step": 950 + }, + { + "epoch": 0.44, + "learning_rate": 2.846270928462709e-07, + "logits/chosen": -2.196100950241089, + "logits/rejected": -1.8292429447174072, + "logps/chosen": -89.53839874267578, + "logps/rejected": -103.94474029541016, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.670432686805725, + "rewards/margins": 18.502605438232422, + "rewards/rejected": -16.832172393798828, + "step": 960 + }, + { + "epoch": 0.44, + "learning_rate": 2.841197361745307e-07, + "logits/chosen": -2.222852945327759, + "logits/rejected": -1.827455759048462, + "logps/chosen": -87.63113403320312, + "logps/rejected": -101.9697265625, + "loss": 0.0086, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.7835509777069092, + "rewards/margins": 19.11504364013672, + "rewards/rejected": -17.331493377685547, + "step": 970 + }, + { + "epoch": 0.45, + "learning_rate": 2.8361237950279043e-07, + "logits/chosen": -2.178133726119995, + "logits/rejected": -1.8010485172271729, + "logps/chosen": -87.01272583007812, + "logps/rejected": -104.30177307128906, + "loss": 0.0123, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2377567291259766, + "rewards/margins": 18.971233367919922, + "rewards/rejected": -16.733478546142578, + "step": 980 + }, + { + "epoch": 0.45, + "learning_rate": 2.831050228310502e-07, + "logits/chosen": -2.2163853645324707, + "logits/rejected": -1.8224786520004272, + "logps/chosen": -85.6602554321289, + "logps/rejected": -103.28083801269531, + "loss": 0.006, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.684990882873535, + "rewards/margins": 18.973764419555664, + "rewards/rejected": -16.288774490356445, + "step": 990 + }, + { + "epoch": 0.46, + "learning_rate": 2.8259766615930996e-07, + "logits/chosen": -2.2510056495666504, + "logits/rejected": -1.7727775573730469, + "logps/chosen": -87.45413970947266, + "logps/rejected": -97.86138916015625, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.894382953643799, + "rewards/margins": 19.050537109375, + "rewards/rejected": -16.15615463256836, + "step": 1000 + }, + { + "epoch": 0.46, + "eval_logits/chosen": -2.2001523971557617, + "eval_logits/rejected": -1.8469951152801514, + "eval_logps/chosen": -84.38460540771484, + "eval_logps/rejected": -99.33098602294922, + "eval_loss": 0.0070889778435230255, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.222993850708008, + "eval_rewards/margins": 18.473024368286133, + "eval_rewards/rejected": -16.250030517578125, + "eval_runtime": 165.1867, + "eval_samples_per_second": 17.326, + "eval_steps_per_second": 1.084, + "step": 1000 + }, + { + "epoch": 0.46, + "learning_rate": 2.8209030948756976e-07, + "logits/chosen": -2.2496349811553955, + "logits/rejected": -1.8998152017593384, + "logps/chosen": -84.53297424316406, + "logps/rejected": -103.37425231933594, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.839362621307373, + "rewards/margins": 18.76103401184082, + "rewards/rejected": -15.921670913696289, + "step": 1010 + }, + { + "epoch": 0.47, + "learning_rate": 2.815829528158295e-07, + "logits/chosen": -2.176231861114502, + "logits/rejected": -1.815509557723999, + "logps/chosen": -83.98689270019531, + "logps/rejected": -102.61014556884766, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5853066444396973, + "rewards/margins": 19.177194595336914, + "rewards/rejected": -16.591888427734375, + "step": 1020 + }, + { + "epoch": 0.47, + "learning_rate": 2.810755961440893e-07, + "logits/chosen": -2.2483859062194824, + "logits/rejected": -1.837993860244751, + "logps/chosen": -85.09949493408203, + "logps/rejected": -105.0072021484375, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.028395175933838, + "rewards/margins": 19.074552536010742, + "rewards/rejected": -17.046157836914062, + "step": 1030 + }, + { + "epoch": 0.47, + "learning_rate": 2.8056823947234903e-07, + "logits/chosen": -2.1420371532440186, + "logits/rejected": -1.7370542287826538, + "logps/chosen": -87.30947875976562, + "logps/rejected": -97.69217681884766, + "loss": 0.0078, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.3234381675720215, + "rewards/margins": 18.487682342529297, + "rewards/rejected": -15.16424560546875, + "step": 1040 + }, + { + "epoch": 0.48, + "learning_rate": 2.800608828006088e-07, + "logits/chosen": -2.2500481605529785, + "logits/rejected": -1.8463836908340454, + "logps/chosen": -90.0582275390625, + "logps/rejected": -103.7062759399414, + "loss": 0.0053, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3003077507019043, + "rewards/margins": 19.799549102783203, + "rewards/rejected": -17.49924087524414, + "step": 1050 + }, + { + "epoch": 0.48, + "learning_rate": 2.7955352612886856e-07, + "logits/chosen": -2.208354949951172, + "logits/rejected": -1.8978168964385986, + "logps/chosen": -77.57411193847656, + "logps/rejected": -103.87590026855469, + "loss": 0.007, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.4448658227920532, + "rewards/margins": 20.587467193603516, + "rewards/rejected": -19.142602920532227, + "step": 1060 + }, + { + "epoch": 0.49, + "learning_rate": 2.7904616945712836e-07, + "logits/chosen": -2.1659793853759766, + "logits/rejected": -1.7798793315887451, + "logps/chosen": -83.9134521484375, + "logps/rejected": -107.64317321777344, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1447739601135254, + "rewards/margins": 21.296142578125, + "rewards/rejected": -19.151369094848633, + "step": 1070 + }, + { + "epoch": 0.49, + "learning_rate": 2.785388127853881e-07, + "logits/chosen": -2.2320501804351807, + "logits/rejected": -1.8187439441680908, + "logps/chosen": -88.16758728027344, + "logps/rejected": -105.4022445678711, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6898410320281982, + "rewards/margins": 21.423625946044922, + "rewards/rejected": -18.733787536621094, + "step": 1080 + }, + { + "epoch": 0.5, + "learning_rate": 2.780314561136479e-07, + "logits/chosen": -2.150700092315674, + "logits/rejected": -1.8009631633758545, + "logps/chosen": -88.3268051147461, + "logps/rejected": -107.76560974121094, + "loss": 0.0057, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.4143996238708496, + "rewards/margins": 20.30838394165039, + "rewards/rejected": -18.893983840942383, + "step": 1090 + }, + { + "epoch": 0.5, + "learning_rate": 2.7752409944190763e-07, + "logits/chosen": -2.2560830116271973, + "logits/rejected": -1.8415091037750244, + "logps/chosen": -88.06275939941406, + "logps/rejected": -104.66233825683594, + "loss": 0.0082, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9098188877105713, + "rewards/margins": 20.39904022216797, + "rewards/rejected": -18.489221572875977, + "step": 1100 + }, + { + "epoch": 0.5, + "eval_logits/chosen": -2.208942413330078, + "eval_logits/rejected": -1.8563501834869385, + "eval_logps/chosen": -84.56623840332031, + "eval_logps/rejected": -100.85079956054688, + "eval_loss": 0.007256262004375458, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.132176637649536, + "eval_rewards/margins": 19.14211654663086, + "eval_rewards/rejected": -17.00994110107422, + "eval_runtime": 179.5596, + "eval_samples_per_second": 15.939, + "eval_steps_per_second": 0.997, + "step": 1100 + }, + { + "epoch": 0.51, + "learning_rate": 2.770167427701674e-07, + "logits/chosen": -2.148176431655884, + "logits/rejected": -1.811261534690857, + "logps/chosen": -90.28803253173828, + "logps/rejected": -105.98939514160156, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.97528874874115, + "rewards/margins": 18.622446060180664, + "rewards/rejected": -16.647159576416016, + "step": 1110 + }, + { + "epoch": 0.51, + "learning_rate": 2.7650938609842716e-07, + "logits/chosen": -2.226710796356201, + "logits/rejected": -1.8027299642562866, + "logps/chosen": -88.32550048828125, + "logps/rejected": -98.92311096191406, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.893244504928589, + "rewards/margins": 20.122953414916992, + "rewards/rejected": -17.22970962524414, + "step": 1120 + }, + { + "epoch": 0.52, + "learning_rate": 2.7600202942668696e-07, + "logits/chosen": -2.259479284286499, + "logits/rejected": -1.845926284790039, + "logps/chosen": -90.93453216552734, + "logps/rejected": -101.05252075195312, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7073707580566406, + "rewards/margins": 19.56781578063965, + "rewards/rejected": -16.860445022583008, + "step": 1130 + }, + { + "epoch": 0.52, + "learning_rate": 2.754946727549467e-07, + "logits/chosen": -2.253920793533325, + "logits/rejected": -1.9071108102798462, + "logps/chosen": -89.0732650756836, + "logps/rejected": -106.84078216552734, + "loss": 0.0072, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.451443672180176, + "rewards/margins": 19.57352638244629, + "rewards/rejected": -17.122081756591797, + "step": 1140 + }, + { + "epoch": 0.52, + "learning_rate": 2.749873160832065e-07, + "logits/chosen": -2.217794418334961, + "logits/rejected": -1.8264172077178955, + "logps/chosen": -84.98257446289062, + "logps/rejected": -103.6314926147461, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.75927734375, + "rewards/margins": 20.883607864379883, + "rewards/rejected": -18.12432861328125, + "step": 1150 + }, + { + "epoch": 0.53, + "learning_rate": 2.7447995941146623e-07, + "logits/chosen": -2.23350191116333, + "logits/rejected": -1.8721933364868164, + "logps/chosen": -88.2120361328125, + "logps/rejected": -107.16935729980469, + "loss": 0.0196, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 2.1735239028930664, + "rewards/margins": 19.927579879760742, + "rewards/rejected": -17.75405502319336, + "step": 1160 + }, + { + "epoch": 0.53, + "learning_rate": 2.73972602739726e-07, + "logits/chosen": -2.191521167755127, + "logits/rejected": -1.8549985885620117, + "logps/chosen": -88.76947784423828, + "logps/rejected": -109.21342468261719, + "loss": 0.0061, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.1197288036346436, + "rewards/margins": 21.133193969726562, + "rewards/rejected": -19.013463973999023, + "step": 1170 + }, + { + "epoch": 0.54, + "learning_rate": 2.7346524606798576e-07, + "logits/chosen": -2.223162889480591, + "logits/rejected": -1.8733783960342407, + "logps/chosen": -88.88787841796875, + "logps/rejected": -108.9013671875, + "loss": 0.0062, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.40854811668396, + "rewards/margins": 22.095478057861328, + "rewards/rejected": -19.686931610107422, + "step": 1180 + }, + { + "epoch": 0.54, + "learning_rate": 2.7295788939624556e-07, + "logits/chosen": -2.197874069213867, + "logits/rejected": -1.7851364612579346, + "logps/chosen": -85.47915649414062, + "logps/rejected": -101.67639923095703, + "loss": 0.0049, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8164796829223633, + "rewards/margins": 20.382543563842773, + "rewards/rejected": -17.566064834594727, + "step": 1190 + }, + { + "epoch": 0.55, + "learning_rate": 2.724505327245053e-07, + "logits/chosen": -2.2712106704711914, + "logits/rejected": -1.9540023803710938, + "logps/chosen": -88.18670654296875, + "logps/rejected": -107.9418716430664, + "loss": 0.0243, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.7379881143569946, + "rewards/margins": 19.269012451171875, + "rewards/rejected": -17.531024932861328, + "step": 1200 + }, + { + "epoch": 0.55, + "eval_logits/chosen": -2.1799449920654297, + "eval_logits/rejected": -1.832864761352539, + "eval_logps/chosen": -83.93651580810547, + "eval_logps/rejected": -97.79573822021484, + "eval_loss": 0.007186249829828739, + "eval_rewards/accuracies": 0.9888268113136292, + "eval_rewards/chosen": 2.4470374584198, + "eval_rewards/margins": 17.92945098876953, + "eval_rewards/rejected": -15.482412338256836, + "eval_runtime": 229.0536, + "eval_samples_per_second": 12.495, + "eval_steps_per_second": 0.781, + "step": 1200 + }, + { + "epoch": 0.55, + "learning_rate": 2.719431760527651e-07, + "logits/chosen": -2.1936728954315186, + "logits/rejected": -1.8306325674057007, + "logps/chosen": -84.01075744628906, + "logps/rejected": -96.85215759277344, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3379154205322266, + "rewards/margins": 16.724027633666992, + "rewards/rejected": -14.38611125946045, + "step": 1210 + }, + { + "epoch": 0.56, + "learning_rate": 2.7143581938102483e-07, + "logits/chosen": -2.2218477725982666, + "logits/rejected": -1.848719835281372, + "logps/chosen": -81.31999969482422, + "logps/rejected": -99.01549530029297, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9159164428710938, + "rewards/margins": 18.738767623901367, + "rewards/rejected": -15.822851181030273, + "step": 1220 + }, + { + "epoch": 0.56, + "learning_rate": 2.709284627092846e-07, + "logits/chosen": -2.1815645694732666, + "logits/rejected": -1.8792476654052734, + "logps/chosen": -81.18501281738281, + "logps/rejected": -99.95542907714844, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6159422397613525, + "rewards/margins": 18.44463348388672, + "rewards/rejected": -15.828694343566895, + "step": 1230 + }, + { + "epoch": 0.57, + "learning_rate": 2.7042110603754436e-07, + "logits/chosen": -2.199174642562866, + "logits/rejected": -1.7883743047714233, + "logps/chosen": -86.32429504394531, + "logps/rejected": -99.67549133300781, + "loss": 0.0066, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.1772170066833496, + "rewards/margins": 19.528255462646484, + "rewards/rejected": -16.35103988647461, + "step": 1240 + }, + { + "epoch": 0.57, + "learning_rate": 2.6991374936580416e-07, + "logits/chosen": -2.1487929821014404, + "logits/rejected": -1.789044737815857, + "logps/chosen": -85.71726989746094, + "logps/rejected": -104.7240219116211, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.469191312789917, + "rewards/margins": 19.757835388183594, + "rewards/rejected": -17.28864288330078, + "step": 1250 + }, + { + "epoch": 0.58, + "learning_rate": 2.694063926940639e-07, + "logits/chosen": -2.1753883361816406, + "logits/rejected": -1.798710823059082, + "logps/chosen": -87.18423461914062, + "logps/rejected": -101.5936279296875, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.384445905685425, + "rewards/margins": 19.821514129638672, + "rewards/rejected": -17.43706512451172, + "step": 1260 + }, + { + "epoch": 0.58, + "learning_rate": 2.688990360223237e-07, + "logits/chosen": -2.170189619064331, + "logits/rejected": -1.8727480173110962, + "logps/chosen": -84.32581329345703, + "logps/rejected": -109.3947982788086, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.941753625869751, + "rewards/margins": 20.492143630981445, + "rewards/rejected": -18.55038833618164, + "step": 1270 + }, + { + "epoch": 0.58, + "learning_rate": 2.6839167935058343e-07, + "logits/chosen": -2.334543466567993, + "logits/rejected": -1.9388000965118408, + "logps/chosen": -83.55552673339844, + "logps/rejected": -104.6715316772461, + "loss": 0.0091, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.3667588233947754, + "rewards/margins": 21.2448787689209, + "rewards/rejected": -18.87812042236328, + "step": 1280 + }, + { + "epoch": 0.59, + "learning_rate": 2.678843226788432e-07, + "logits/chosen": -2.1919655799865723, + "logits/rejected": -1.857973337173462, + "logps/chosen": -89.53315734863281, + "logps/rejected": -108.6443099975586, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.031522274017334, + "rewards/margins": 21.71250343322754, + "rewards/rejected": -19.680980682373047, + "step": 1290 + }, + { + "epoch": 0.59, + "learning_rate": 2.6737696600710296e-07, + "logits/chosen": -2.2731220722198486, + "logits/rejected": -1.894622802734375, + "logps/chosen": -80.3070297241211, + "logps/rejected": -106.09849548339844, + "loss": 0.0053, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.022993564605713, + "rewards/margins": 22.860183715820312, + "rewards/rejected": -19.837190628051758, + "step": 1300 + }, + { + "epoch": 0.59, + "eval_logits/chosen": -2.197847366333008, + "eval_logits/rejected": -1.8479573726654053, + "eval_logps/chosen": -85.01256561279297, + "eval_logps/rejected": -106.13009643554688, + "eval_loss": 0.007005217019468546, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 1.9090105295181274, + "eval_rewards/margins": 21.558591842651367, + "eval_rewards/rejected": -19.64958381652832, + "eval_runtime": 244.0952, + "eval_samples_per_second": 11.725, + "eval_steps_per_second": 0.733, + "step": 1300 + }, + { + "epoch": 0.6, + "learning_rate": 2.6686960933536276e-07, + "logits/chosen": -2.257427215576172, + "logits/rejected": -1.9010261297225952, + "logps/chosen": -85.78707885742188, + "logps/rejected": -110.6099853515625, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4400665760040283, + "rewards/margins": 22.056560516357422, + "rewards/rejected": -19.616491317749023, + "step": 1310 + }, + { + "epoch": 0.6, + "learning_rate": 2.663622526636225e-07, + "logits/chosen": -2.1522250175476074, + "logits/rejected": -1.8013957738876343, + "logps/chosen": -85.6783447265625, + "logps/rejected": -111.1842269897461, + "loss": 0.0088, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.6612367630004883, + "rewards/margins": 22.052217483520508, + "rewards/rejected": -20.390979766845703, + "step": 1320 + }, + { + "epoch": 0.61, + "learning_rate": 2.658548959918823e-07, + "logits/chosen": -2.207953691482544, + "logits/rejected": -1.8331537246704102, + "logps/chosen": -89.45965576171875, + "logps/rejected": -109.67543029785156, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7361319065093994, + "rewards/margins": 22.78788185119629, + "rewards/rejected": -20.051748275756836, + "step": 1330 + }, + { + "epoch": 0.61, + "learning_rate": 2.6534753932014203e-07, + "logits/chosen": -2.213942050933838, + "logits/rejected": -1.8521515130996704, + "logps/chosen": -85.9124526977539, + "logps/rejected": -104.89479064941406, + "loss": 0.0096, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.157468557357788, + "rewards/margins": 21.968364715576172, + "rewards/rejected": -19.810897827148438, + "step": 1340 + }, + { + "epoch": 0.62, + "learning_rate": 2.648401826484018e-07, + "logits/chosen": -2.2531790733337402, + "logits/rejected": -1.883050560951233, + "logps/chosen": -83.91947937011719, + "logps/rejected": -117.13653564453125, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.926668405532837, + "rewards/margins": 24.423267364501953, + "rewards/rejected": -22.496599197387695, + "step": 1350 + }, + { + "epoch": 0.62, + "learning_rate": 2.6433282597666156e-07, + "logits/chosen": -2.1308257579803467, + "logits/rejected": -1.7693058252334595, + "logps/chosen": -89.39836120605469, + "logps/rejected": -115.6990737915039, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7298908233642578, + "rewards/margins": 23.361858367919922, + "rewards/rejected": -21.631967544555664, + "step": 1360 + }, + { + "epoch": 0.63, + "learning_rate": 2.6382546930492135e-07, + "logits/chosen": -2.2151737213134766, + "logits/rejected": -1.8560287952423096, + "logps/chosen": -87.30859375, + "logps/rejected": -111.68087005615234, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5496299266815186, + "rewards/margins": 22.752437591552734, + "rewards/rejected": -21.202808380126953, + "step": 1370 + }, + { + "epoch": 0.63, + "learning_rate": 2.633181126331811e-07, + "logits/chosen": -2.2534372806549072, + "logits/rejected": -1.9297653436660767, + "logps/chosen": -83.9007568359375, + "logps/rejected": -111.0853500366211, + "loss": 0.0061, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.8405357599258423, + "rewards/margins": 22.654870986938477, + "rewards/rejected": -20.814334869384766, + "step": 1380 + }, + { + "epoch": 0.63, + "learning_rate": 2.628107559614409e-07, + "logits/chosen": -2.269160747528076, + "logits/rejected": -1.8955166339874268, + "logps/chosen": -85.63101196289062, + "logps/rejected": -112.61944580078125, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2250313758850098, + "rewards/margins": 22.996959686279297, + "rewards/rejected": -20.771930694580078, + "step": 1390 + }, + { + "epoch": 0.64, + "learning_rate": 2.6230339928970063e-07, + "logits/chosen": -2.2950210571289062, + "logits/rejected": -1.9462471008300781, + "logps/chosen": -88.02154541015625, + "logps/rejected": -110.0986557006836, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7983818054199219, + "rewards/margins": 22.378314971923828, + "rewards/rejected": -20.57993507385254, + "step": 1400 + }, + { + "epoch": 0.64, + "eval_logits/chosen": -2.209514856338501, + "eval_logits/rejected": -1.8633878231048584, + "eval_logps/chosen": -85.35096740722656, + "eval_logps/rejected": -108.35541534423828, + "eval_loss": 0.007250170689076185, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.7398098707199097, + "eval_rewards/margins": 22.50205421447754, + "eval_rewards/rejected": -20.762245178222656, + "eval_runtime": 186.4436, + "eval_samples_per_second": 15.35, + "eval_steps_per_second": 0.96, + "step": 1400 + }, + { + "epoch": 0.64, + "learning_rate": 2.617960426179604e-07, + "logits/chosen": -2.224945306777954, + "logits/rejected": -1.8185851573944092, + "logps/chosen": -91.3012466430664, + "logps/rejected": -112.8142318725586, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3502469062805176, + "rewards/margins": 24.02829933166504, + "rewards/rejected": -21.678050994873047, + "step": 1410 + }, + { + "epoch": 0.65, + "learning_rate": 2.6128868594622016e-07, + "logits/chosen": -2.163133144378662, + "logits/rejected": -1.816178560256958, + "logps/chosen": -85.406494140625, + "logps/rejected": -105.5617904663086, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.699741005897522, + "rewards/margins": 21.171184539794922, + "rewards/rejected": -19.47144317626953, + "step": 1420 + }, + { + "epoch": 0.65, + "learning_rate": 2.6078132927447995e-07, + "logits/chosen": -2.240720748901367, + "logits/rejected": -1.8999313116073608, + "logps/chosen": -86.69328308105469, + "logps/rejected": -114.80928802490234, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3369460105895996, + "rewards/margins": 24.97016143798828, + "rewards/rejected": -22.633216857910156, + "step": 1430 + }, + { + "epoch": 0.66, + "learning_rate": 2.602739726027397e-07, + "logits/chosen": -2.2251267433166504, + "logits/rejected": -1.8492538928985596, + "logps/chosen": -85.41273498535156, + "logps/rejected": -113.72379302978516, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7388935089111328, + "rewards/margins": 24.013940811157227, + "rewards/rejected": -22.27504539489746, + "step": 1440 + }, + { + "epoch": 0.66, + "learning_rate": 2.597666159309995e-07, + "logits/chosen": -2.196199655532837, + "logits/rejected": -1.8432044982910156, + "logps/chosen": -84.84178924560547, + "logps/rejected": -113.02108001708984, + "loss": 0.0143, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.910146713256836, + "rewards/margins": 22.436695098876953, + "rewards/rejected": -20.52655029296875, + "step": 1450 + }, + { + "epoch": 0.67, + "learning_rate": 2.5925925925925923e-07, + "logits/chosen": -2.2136662006378174, + "logits/rejected": -1.8691514730453491, + "logps/chosen": -87.48178100585938, + "logps/rejected": -113.52275085449219, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.627357840538025, + "rewards/margins": 21.706161499023438, + "rewards/rejected": -20.07880210876465, + "step": 1460 + }, + { + "epoch": 0.67, + "learning_rate": 2.58751902587519e-07, + "logits/chosen": -2.1794159412384033, + "logits/rejected": -1.8281217813491821, + "logps/chosen": -91.26468658447266, + "logps/rejected": -111.00882720947266, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3432388305664062, + "rewards/margins": 22.417926788330078, + "rewards/rejected": -20.074687957763672, + "step": 1470 + }, + { + "epoch": 0.68, + "learning_rate": 2.5824454591577876e-07, + "logits/chosen": -2.2314205169677734, + "logits/rejected": -1.8225589990615845, + "logps/chosen": -88.60221862792969, + "logps/rejected": -109.89363098144531, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.573441982269287, + "rewards/margins": 24.366378784179688, + "rewards/rejected": -21.79293441772461, + "step": 1480 + }, + { + "epoch": 0.68, + "learning_rate": 2.5773718924403855e-07, + "logits/chosen": -2.1291966438293457, + "logits/rejected": -1.784570336341858, + "logps/chosen": -87.0352554321289, + "logps/rejected": -114.33548736572266, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9369707107543945, + "rewards/margins": 24.22989845275879, + "rewards/rejected": -22.292926788330078, + "step": 1490 + }, + { + "epoch": 0.68, + "learning_rate": 2.572298325722983e-07, + "logits/chosen": -2.148015260696411, + "logits/rejected": -1.8161808252334595, + "logps/chosen": -87.71135711669922, + "logps/rejected": -112.76509094238281, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6630229949951172, + "rewards/margins": 23.89804458618164, + "rewards/rejected": -22.235023498535156, + "step": 1500 + }, + { + "epoch": 0.68, + "eval_logits/chosen": -2.1937787532806396, + "eval_logits/rejected": -1.8467158079147339, + "eval_logps/chosen": -85.40876007080078, + "eval_logps/rejected": -109.89164733886719, + "eval_loss": 0.007178114727139473, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.7109133005142212, + "eval_rewards/margins": 23.241273880004883, + "eval_rewards/rejected": -21.530363082885742, + "eval_runtime": 183.2701, + "eval_samples_per_second": 15.616, + "eval_steps_per_second": 0.977, + "step": 1500 + }, + { + "epoch": 0.69, + "learning_rate": 2.567224759005581e-07, + "logits/chosen": -2.22023344039917, + "logits/rejected": -1.8317985534667969, + "logps/chosen": -81.38858795166016, + "logps/rejected": -102.5715560913086, + "loss": 0.0068, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.5601766109466553, + "rewards/margins": 20.727497100830078, + "rewards/rejected": -19.167320251464844, + "step": 1510 + }, + { + "epoch": 0.69, + "learning_rate": 2.5621511922881783e-07, + "logits/chosen": -2.194122791290283, + "logits/rejected": -1.7103767395019531, + "logps/chosen": -86.33879089355469, + "logps/rejected": -105.571533203125, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7159111499786377, + "rewards/margins": 23.524215698242188, + "rewards/rejected": -19.808303833007812, + "step": 1520 + }, + { + "epoch": 0.7, + "learning_rate": 2.557077625570776e-07, + "logits/chosen": -2.1951489448547363, + "logits/rejected": -1.881566047668457, + "logps/chosen": -86.02349853515625, + "logps/rejected": -113.3117446899414, + "loss": 0.0061, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.7853310108184814, + "rewards/margins": 21.609209060668945, + "rewards/rejected": -19.823875427246094, + "step": 1530 + }, + { + "epoch": 0.7, + "learning_rate": 2.5520040588533736e-07, + "logits/chosen": -2.233779191970825, + "logits/rejected": -1.821406602859497, + "logps/chosen": -89.24073028564453, + "logps/rejected": -110.7870101928711, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8013709783554077, + "rewards/margins": 23.02773666381836, + "rewards/rejected": -21.226367950439453, + "step": 1540 + }, + { + "epoch": 0.71, + "learning_rate": 2.5469304921359715e-07, + "logits/chosen": -2.2160837650299072, + "logits/rejected": -1.8794729709625244, + "logps/chosen": -85.04659271240234, + "logps/rejected": -111.51092529296875, + "loss": 0.0047, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.755566358566284, + "rewards/margins": 22.784481048583984, + "rewards/rejected": -20.02891731262207, + "step": 1550 + }, + { + "epoch": 0.71, + "learning_rate": 2.541856925418569e-07, + "logits/chosen": -2.1777358055114746, + "logits/rejected": -1.827588438987732, + "logps/chosen": -83.27925109863281, + "logps/rejected": -109.00111389160156, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.335489511489868, + "rewards/margins": 22.536785125732422, + "rewards/rejected": -20.201297760009766, + "step": 1560 + }, + { + "epoch": 0.72, + "learning_rate": 2.536783358701167e-07, + "logits/chosen": -2.227769374847412, + "logits/rejected": -1.8616615533828735, + "logps/chosen": -83.50282287597656, + "logps/rejected": -108.1055908203125, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6894235610961914, + "rewards/margins": 23.30007553100586, + "rewards/rejected": -20.610652923583984, + "step": 1570 + }, + { + "epoch": 0.72, + "learning_rate": 2.5317097919837643e-07, + "logits/chosen": -2.2765088081359863, + "logits/rejected": -1.8756946325302124, + "logps/chosen": -86.42229461669922, + "logps/rejected": -113.13868713378906, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7841994762420654, + "rewards/margins": 23.93669319152832, + "rewards/rejected": -22.152494430541992, + "step": 1580 + }, + { + "epoch": 0.73, + "learning_rate": 2.526636225266362e-07, + "logits/chosen": -2.2187094688415527, + "logits/rejected": -1.8117910623550415, + "logps/chosen": -90.1899185180664, + "logps/rejected": -115.62690734863281, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5328062176704407, + "rewards/margins": 23.20620346069336, + "rewards/rejected": -22.673397064208984, + "step": 1590 + }, + { + "epoch": 0.73, + "learning_rate": 2.5215626585489596e-07, + "logits/chosen": -2.2092316150665283, + "logits/rejected": -1.886833906173706, + "logps/chosen": -83.89598083496094, + "logps/rejected": -113.24908447265625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.40551048517227173, + "rewards/margins": 22.66997528076172, + "rewards/rejected": -22.264461517333984, + "step": 1600 + }, + { + "epoch": 0.73, + "eval_logits/chosen": -2.203066110610962, + "eval_logits/rejected": -1.853004813194275, + "eval_logps/chosen": -87.39132690429688, + "eval_logps/rejected": -111.6284408569336, + "eval_loss": 0.006391549948602915, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.7196283340454102, + "eval_rewards/margins": 23.118391036987305, + "eval_rewards/rejected": -22.398759841918945, + "eval_runtime": 187.7477, + "eval_samples_per_second": 15.244, + "eval_steps_per_second": 0.953, + "step": 1600 + }, + { + "epoch": 0.73, + "learning_rate": 2.5164890918315575e-07, + "logits/chosen": -2.2956223487854004, + "logits/rejected": -1.944941759109497, + "logps/chosen": -89.06498718261719, + "logps/rejected": -114.08534240722656, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5906304121017456, + "rewards/margins": 23.316919326782227, + "rewards/rejected": -22.726289749145508, + "step": 1610 + }, + { + "epoch": 0.74, + "learning_rate": 2.511415525114155e-07, + "logits/chosen": -2.2368171215057373, + "logits/rejected": -1.9505856037139893, + "logps/chosen": -89.01737213134766, + "logps/rejected": -112.30778503417969, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.34434378147125244, + "rewards/margins": 21.64006996154785, + "rewards/rejected": -21.295726776123047, + "step": 1620 + }, + { + "epoch": 0.74, + "learning_rate": 2.506341958396753e-07, + "logits/chosen": -2.1280384063720703, + "logits/rejected": -1.7620325088500977, + "logps/chosen": -89.35337829589844, + "logps/rejected": -113.1550521850586, + "loss": 0.0077, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.42981046438217163, + "rewards/margins": 23.936683654785156, + "rewards/rejected": -23.506874084472656, + "step": 1630 + }, + { + "epoch": 0.75, + "learning_rate": 2.5012683916793503e-07, + "logits/chosen": -2.142151355743408, + "logits/rejected": -1.7131553888320923, + "logps/chosen": -92.11378479003906, + "logps/rejected": -114.9017562866211, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4623570442199707, + "rewards/margins": 24.9403133392334, + "rewards/rejected": -22.47795867919922, + "step": 1640 + }, + { + "epoch": 0.75, + "learning_rate": 2.496194824961948e-07, + "logits/chosen": -2.2092690467834473, + "logits/rejected": -1.8049709796905518, + "logps/chosen": -87.24295806884766, + "logps/rejected": -112.48702239990234, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.603916645050049, + "rewards/margins": 24.936174392700195, + "rewards/rejected": -22.332256317138672, + "step": 1650 + }, + { + "epoch": 0.76, + "learning_rate": 2.4911212582445456e-07, + "logits/chosen": -2.184044599533081, + "logits/rejected": -1.7883659601211548, + "logps/chosen": -84.82064056396484, + "logps/rejected": -112.11551666259766, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.598346710205078, + "rewards/margins": 24.60263442993164, + "rewards/rejected": -22.004289627075195, + "step": 1660 + }, + { + "epoch": 0.76, + "learning_rate": 2.4860476915271435e-07, + "logits/chosen": -2.2834110260009766, + "logits/rejected": -1.8969223499298096, + "logps/chosen": -84.3614273071289, + "logps/rejected": -114.11153411865234, + "loss": 0.0108, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1697537899017334, + "rewards/margins": 25.301849365234375, + "rewards/rejected": -23.132095336914062, + "step": 1670 + }, + { + "epoch": 0.77, + "learning_rate": 2.480974124809741e-07, + "logits/chosen": -2.228867769241333, + "logits/rejected": -1.776439905166626, + "logps/chosen": -92.27311706542969, + "logps/rejected": -115.7362289428711, + "loss": 0.0031, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.5189244747161865, + "rewards/margins": 23.18109703063965, + "rewards/rejected": -20.662174224853516, + "step": 1680 + }, + { + "epoch": 0.77, + "learning_rate": 2.475900558092339e-07, + "logits/chosen": -2.2656030654907227, + "logits/rejected": -1.8951988220214844, + "logps/chosen": -87.36007690429688, + "logps/rejected": -105.27301025390625, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5274977684020996, + "rewards/margins": 22.20186424255371, + "rewards/rejected": -19.674365997314453, + "step": 1690 + }, + { + "epoch": 0.78, + "learning_rate": 2.4708269913749363e-07, + "logits/chosen": -2.2209548950195312, + "logits/rejected": -1.8660656213760376, + "logps/chosen": -84.46993255615234, + "logps/rejected": -111.65545654296875, + "loss": 0.0034, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.7445751428604126, + "rewards/margins": 22.078369140625, + "rewards/rejected": -20.333797454833984, + "step": 1700 + }, + { + "epoch": 0.78, + "eval_logits/chosen": -2.1876187324523926, + "eval_logits/rejected": -1.8381489515304565, + "eval_logps/chosen": -85.6910171508789, + "eval_logps/rejected": -108.63225555419922, + "eval_loss": 0.0056876870803534985, + "eval_rewards/accuracies": 0.9972066879272461, + "eval_rewards/chosen": 1.5697858333587646, + "eval_rewards/margins": 22.470449447631836, + "eval_rewards/rejected": -20.900663375854492, + "eval_runtime": 262.3348, + "eval_samples_per_second": 10.91, + "eval_steps_per_second": 0.682, + "step": 1700 + }, + { + "epoch": 0.78, + "learning_rate": 2.465753424657534e-07, + "logits/chosen": -2.1472651958465576, + "logits/rejected": -1.737198829650879, + "logps/chosen": -85.45567321777344, + "logps/rejected": -110.42171478271484, + "loss": 0.0016, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.159968614578247, + "rewards/margins": 23.5798397064209, + "rewards/rejected": -21.419872283935547, + "step": 1710 + }, + { + "epoch": 0.79, + "learning_rate": 2.4606798579401316e-07, + "logits/chosen": -2.1188554763793945, + "logits/rejected": -1.82101571559906, + "logps/chosen": -85.29725646972656, + "logps/rejected": -111.76287841796875, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.8330132961273193, + "rewards/margins": 21.782207489013672, + "rewards/rejected": -19.949195861816406, + "step": 1720 + }, + { + "epoch": 0.79, + "learning_rate": 2.4556062912227295e-07, + "logits/chosen": -2.1919398307800293, + "logits/rejected": -1.777989149093628, + "logps/chosen": -89.14543151855469, + "logps/rejected": -109.812744140625, + "loss": 0.004, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.5910837650299072, + "rewards/margins": 22.191823959350586, + "rewards/rejected": -19.60074234008789, + "step": 1730 + }, + { + "epoch": 0.79, + "learning_rate": 2.450532724505327e-07, + "logits/chosen": -2.2214889526367188, + "logits/rejected": -1.8154666423797607, + "logps/chosen": -84.61921691894531, + "logps/rejected": -109.73350524902344, + "loss": 0.0047, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.7251133918762207, + "rewards/margins": 23.308391571044922, + "rewards/rejected": -20.58327865600586, + "step": 1740 + }, + { + "epoch": 0.8, + "learning_rate": 2.445459157787925e-07, + "logits/chosen": -2.266470432281494, + "logits/rejected": -1.869768500328064, + "logps/chosen": -90.2508316040039, + "logps/rejected": -113.48309326171875, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.311676502227783, + "rewards/margins": 23.228078842163086, + "rewards/rejected": -20.916400909423828, + "step": 1750 + }, + { + "epoch": 0.8, + "learning_rate": 2.4403855910705223e-07, + "logits/chosen": -2.214456081390381, + "logits/rejected": -1.8925609588623047, + "logps/chosen": -82.62592315673828, + "logps/rejected": -108.47261047363281, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.60259211063385, + "rewards/margins": 21.090885162353516, + "rewards/rejected": -19.488292694091797, + "step": 1760 + }, + { + "epoch": 0.81, + "learning_rate": 2.43531202435312e-07, + "logits/chosen": -2.18231463432312, + "logits/rejected": -1.7262542247772217, + "logps/chosen": -94.32820892333984, + "logps/rejected": -114.97127532958984, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.17089581489563, + "rewards/margins": 24.209266662597656, + "rewards/rejected": -22.038372039794922, + "step": 1770 + }, + { + "epoch": 0.81, + "learning_rate": 2.4302384576357176e-07, + "logits/chosen": -2.221064329147339, + "logits/rejected": -1.8696062564849854, + "logps/chosen": -90.65652465820312, + "logps/rejected": -123.33599853515625, + "loss": 0.0012, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.8242883682250977, + "rewards/margins": 25.92755126953125, + "rewards/rejected": -24.103261947631836, + "step": 1780 + }, + { + "epoch": 0.82, + "learning_rate": 2.4251648909183155e-07, + "logits/chosen": -2.2197823524475098, + "logits/rejected": -1.8690903186798096, + "logps/chosen": -87.0594482421875, + "logps/rejected": -113.86370849609375, + "loss": 0.0062, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.1697230339050293, + "rewards/margins": 24.501766204833984, + "rewards/rejected": -21.332040786743164, + "step": 1790 + }, + { + "epoch": 0.82, + "learning_rate": 2.420091324200913e-07, + "logits/chosen": -2.231548309326172, + "logits/rejected": -1.9493480920791626, + "logps/chosen": -86.16682434082031, + "logps/rejected": -116.98976135253906, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1170125007629395, + "rewards/margins": 23.978233337402344, + "rewards/rejected": -22.861225128173828, + "step": 1800 + }, + { + "epoch": 0.82, + "eval_logits/chosen": -2.1833367347717285, + "eval_logits/rejected": -1.8318486213684082, + "eval_logps/chosen": -85.54547119140625, + "eval_logps/rejected": -111.36595153808594, + "eval_loss": 0.005670672748237848, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 1.6425559520721436, + "eval_rewards/margins": 23.910072326660156, + "eval_rewards/rejected": -22.267513275146484, + "eval_runtime": 194.2178, + "eval_samples_per_second": 14.736, + "eval_steps_per_second": 0.922, + "step": 1800 + }, + { + "epoch": 0.83, + "learning_rate": 2.415017757483511e-07, + "logits/chosen": -2.2005953788757324, + "logits/rejected": -1.7761281728744507, + "logps/chosen": -94.50230407714844, + "logps/rejected": -119.65816497802734, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.642756700515747, + "rewards/margins": 27.622684478759766, + "rewards/rejected": -24.979928970336914, + "step": 1810 + }, + { + "epoch": 0.83, + "learning_rate": 2.409944190766108e-07, + "logits/chosen": -2.2361111640930176, + "logits/rejected": -1.850996732711792, + "logps/chosen": -85.87068176269531, + "logps/rejected": -119.11030578613281, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5628911852836609, + "rewards/margins": 25.55295181274414, + "rewards/rejected": -24.99005699157715, + "step": 1820 + }, + { + "epoch": 0.84, + "learning_rate": 2.404870624048706e-07, + "logits/chosen": -2.179274559020996, + "logits/rejected": -1.809653878211975, + "logps/chosen": -83.79485321044922, + "logps/rejected": -107.97098541259766, + "loss": 0.0061, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.3543555736541748, + "rewards/margins": 23.6602840423584, + "rewards/rejected": -22.305927276611328, + "step": 1830 + }, + { + "epoch": 0.84, + "learning_rate": 2.3997970573313036e-07, + "logits/chosen": -2.2063956260681152, + "logits/rejected": -1.8312809467315674, + "logps/chosen": -82.93327331542969, + "logps/rejected": -108.3280029296875, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.580742835998535, + "rewards/margins": 24.954925537109375, + "rewards/rejected": -22.374181747436523, + "step": 1840 + }, + { + "epoch": 0.84, + "learning_rate": 2.3947234906139015e-07, + "logits/chosen": -2.2583651542663574, + "logits/rejected": -1.8435817956924438, + "logps/chosen": -87.89311981201172, + "logps/rejected": -114.3637466430664, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6345696449279785, + "rewards/margins": 26.21219825744629, + "rewards/rejected": -23.577627182006836, + "step": 1850 + }, + { + "epoch": 0.85, + "learning_rate": 2.389649923896499e-07, + "logits/chosen": -2.1602165699005127, + "logits/rejected": -1.8217239379882812, + "logps/chosen": -86.65802001953125, + "logps/rejected": -116.1138916015625, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0264811515808105, + "rewards/margins": 24.80233383178711, + "rewards/rejected": -21.77585220336914, + "step": 1860 + }, + { + "epoch": 0.85, + "learning_rate": 2.384576357179097e-07, + "logits/chosen": -2.1380181312561035, + "logits/rejected": -1.7056655883789062, + "logps/chosen": -90.73863220214844, + "logps/rejected": -117.328125, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.132702350616455, + "rewards/margins": 26.685169219970703, + "rewards/rejected": -23.552465438842773, + "step": 1870 + }, + { + "epoch": 0.86, + "learning_rate": 2.3795027904616943e-07, + "logits/chosen": -2.2756571769714355, + "logits/rejected": -1.9350929260253906, + "logps/chosen": -90.4540786743164, + "logps/rejected": -118.94252014160156, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2916797399520874, + "rewards/margins": 23.93198013305664, + "rewards/rejected": -22.640300750732422, + "step": 1880 + }, + { + "epoch": 0.86, + "learning_rate": 2.374429223744292e-07, + "logits/chosen": -2.2293753623962402, + "logits/rejected": -1.8584178686141968, + "logps/chosen": -83.38288879394531, + "logps/rejected": -109.228759765625, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3209164142608643, + "rewards/margins": 24.548112869262695, + "rewards/rejected": -22.227197647094727, + "step": 1890 + }, + { + "epoch": 0.87, + "learning_rate": 2.3693556570268896e-07, + "logits/chosen": -2.28892183303833, + "logits/rejected": -1.9423027038574219, + "logps/chosen": -86.03301239013672, + "logps/rejected": -118.62422180175781, + "loss": 0.0038, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0244522094726562, + "rewards/margins": 25.199527740478516, + "rewards/rejected": -23.17507553100586, + "step": 1900 + }, + { + "epoch": 0.87, + "eval_logits/chosen": -2.2072012424468994, + "eval_logits/rejected": -1.8581523895263672, + "eval_logps/chosen": -85.27046203613281, + "eval_logps/rejected": -113.4885025024414, + "eval_loss": 0.006078703328967094, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.780059814453125, + "eval_rewards/margins": 25.108850479125977, + "eval_rewards/rejected": -23.32879066467285, + "eval_runtime": 195.9406, + "eval_samples_per_second": 14.606, + "eval_steps_per_second": 0.914, + "step": 1900 + }, + { + "epoch": 0.87, + "learning_rate": 2.3642820903094873e-07, + "logits/chosen": -2.218658208847046, + "logits/rejected": -1.8969409465789795, + "logps/chosen": -82.16979217529297, + "logps/rejected": -116.5958023071289, + "loss": 0.0043, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.3920204639434814, + "rewards/margins": 26.6087646484375, + "rewards/rejected": -24.21674156188965, + "step": 1910 + }, + { + "epoch": 0.88, + "learning_rate": 2.359208523592085e-07, + "logits/chosen": -2.2358078956604004, + "logits/rejected": -1.8991063833236694, + "logps/chosen": -85.44503021240234, + "logps/rejected": -115.95997619628906, + "loss": 0.0071, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.7518513202667236, + "rewards/margins": 25.580867767333984, + "rewards/rejected": -23.829017639160156, + "step": 1920 + }, + { + "epoch": 0.88, + "learning_rate": 2.3541349568746826e-07, + "logits/chosen": -2.23246169090271, + "logits/rejected": -1.8101627826690674, + "logps/chosen": -88.6487045288086, + "logps/rejected": -115.31546783447266, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0985748767852783, + "rewards/margins": 25.528507232666016, + "rewards/rejected": -23.429927825927734, + "step": 1930 + }, + { + "epoch": 0.89, + "learning_rate": 2.3490613901572803e-07, + "logits/chosen": -2.1937997341156006, + "logits/rejected": -1.899291753768921, + "logps/chosen": -82.33692169189453, + "logps/rejected": -116.04981994628906, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9832813739776611, + "rewards/margins": 25.707172393798828, + "rewards/rejected": -23.72389030456543, + "step": 1940 + }, + { + "epoch": 0.89, + "learning_rate": 2.343987823439878e-07, + "logits/chosen": -2.19626522064209, + "logits/rejected": -1.8097482919692993, + "logps/chosen": -81.79765319824219, + "logps/rejected": -115.43753814697266, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6907103061676025, + "rewards/margins": 26.663219451904297, + "rewards/rejected": -23.972511291503906, + "step": 1950 + }, + { + "epoch": 0.89, + "learning_rate": 2.3389142567224756e-07, + "logits/chosen": -2.2777841091156006, + "logits/rejected": -1.932579755783081, + "logps/chosen": -93.16586303710938, + "logps/rejected": -125.8462905883789, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5280853509902954, + "rewards/margins": 27.09100914001465, + "rewards/rejected": -25.562923431396484, + "step": 1960 + }, + { + "epoch": 0.9, + "learning_rate": 2.3338406900050733e-07, + "logits/chosen": -2.2580790519714355, + "logits/rejected": -1.8621156215667725, + "logps/chosen": -85.24269104003906, + "logps/rejected": -114.29624938964844, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0481958389282227, + "rewards/margins": 28.0736141204834, + "rewards/rejected": -25.025419235229492, + "step": 1970 + }, + { + "epoch": 0.9, + "learning_rate": 2.328767123287671e-07, + "logits/chosen": -2.237919569015503, + "logits/rejected": -1.9384901523590088, + "logps/chosen": -85.32179260253906, + "logps/rejected": -126.17274475097656, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1654307842254639, + "rewards/margins": 29.019460678100586, + "rewards/rejected": -27.854028701782227, + "step": 1980 + }, + { + "epoch": 0.91, + "learning_rate": 2.3236935565702686e-07, + "logits/chosen": -2.2402701377868652, + "logits/rejected": -1.9076135158538818, + "logps/chosen": -84.37796783447266, + "logps/rejected": -119.68265533447266, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9258636236190796, + "rewards/margins": 26.976810455322266, + "rewards/rejected": -25.050945281982422, + "step": 1990 + }, + { + "epoch": 0.91, + "learning_rate": 2.3186199898528663e-07, + "logits/chosen": -2.2411704063415527, + "logits/rejected": -1.8752963542938232, + "logps/chosen": -85.16765594482422, + "logps/rejected": -116.25230407714844, + "loss": 0.0074, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9921166896820068, + "rewards/margins": 26.783111572265625, + "rewards/rejected": -24.790996551513672, + "step": 2000 + }, + { + "epoch": 0.91, + "eval_logits/chosen": -2.1969516277313232, + "eval_logits/rejected": -1.849937081336975, + "eval_logps/chosen": -86.06404113769531, + "eval_logps/rejected": -117.32756042480469, + "eval_loss": 0.006207953207194805, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.3832741975784302, + "eval_rewards/margins": 26.631595611572266, + "eval_rewards/rejected": -25.24832534790039, + "eval_runtime": 285.1562, + "eval_samples_per_second": 10.037, + "eval_steps_per_second": 0.628, + "step": 2000 + }, + { + "epoch": 0.92, + "learning_rate": 2.313546423135464e-07, + "logits/chosen": -2.1540346145629883, + "logits/rejected": -1.7665355205535889, + "logps/chosen": -90.4669189453125, + "logps/rejected": -113.56608581542969, + "loss": 0.0323, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6254875659942627, + "rewards/margins": 24.596233367919922, + "rewards/rejected": -21.970745086669922, + "step": 2010 + }, + { + "epoch": 0.92, + "learning_rate": 2.3084728564180616e-07, + "logits/chosen": -2.095485210418701, + "logits/rejected": -1.8191229104995728, + "logps/chosen": -82.52064514160156, + "logps/rejected": -114.91255187988281, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7258262634277344, + "rewards/margins": 23.092130661010742, + "rewards/rejected": -20.366304397583008, + "step": 2020 + }, + { + "epoch": 0.93, + "learning_rate": 2.3033992897006593e-07, + "logits/chosen": -2.269178867340088, + "logits/rejected": -1.8472541570663452, + "logps/chosen": -85.8851318359375, + "logps/rejected": -109.74867248535156, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9320855140686035, + "rewards/margins": 24.42289161682129, + "rewards/rejected": -21.49080467224121, + "step": 2030 + }, + { + "epoch": 0.93, + "learning_rate": 2.298325722983257e-07, + "logits/chosen": -2.1464996337890625, + "logits/rejected": -1.7602676153182983, + "logps/chosen": -87.7806625366211, + "logps/rejected": -114.57737731933594, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2257797718048096, + "rewards/margins": 24.386262893676758, + "rewards/rejected": -22.160480499267578, + "step": 2040 + }, + { + "epoch": 0.94, + "learning_rate": 2.2932521562658546e-07, + "logits/chosen": -2.1523966789245605, + "logits/rejected": -1.8064305782318115, + "logps/chosen": -86.0357666015625, + "logps/rejected": -112.4377212524414, + "loss": 0.01, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2664999961853027, + "rewards/margins": 24.120498657226562, + "rewards/rejected": -21.853994369506836, + "step": 2050 + }, + { + "epoch": 0.94, + "learning_rate": 2.2881785895484523e-07, + "logits/chosen": -2.209660291671753, + "logits/rejected": -1.8102290630340576, + "logps/chosen": -87.49055480957031, + "logps/rejected": -110.42215728759766, + "loss": 0.0084, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9767696857452393, + "rewards/margins": 23.30925941467285, + "rewards/rejected": -20.33249282836914, + "step": 2060 + }, + { + "epoch": 0.94, + "learning_rate": 2.28310502283105e-07, + "logits/chosen": -2.1463191509246826, + "logits/rejected": -1.8173093795776367, + "logps/chosen": -80.68872833251953, + "logps/rejected": -112.73249816894531, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.864393711090088, + "rewards/margins": 23.488155364990234, + "rewards/rejected": -20.623760223388672, + "step": 2070 + }, + { + "epoch": 0.95, + "learning_rate": 2.2780314561136476e-07, + "logits/chosen": -2.2494287490844727, + "logits/rejected": -1.8683185577392578, + "logps/chosen": -90.07569885253906, + "logps/rejected": -109.84877014160156, + "loss": 0.0027, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.9630918502807617, + "rewards/margins": 24.1021728515625, + "rewards/rejected": -21.139080047607422, + "step": 2080 + }, + { + "epoch": 0.95, + "learning_rate": 2.2729578893962453e-07, + "logits/chosen": -2.2044570446014404, + "logits/rejected": -1.8658645153045654, + "logps/chosen": -87.5743179321289, + "logps/rejected": -112.40694427490234, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.718559741973877, + "rewards/margins": 23.71183967590332, + "rewards/rejected": -20.9932804107666, + "step": 2090 + }, + { + "epoch": 0.96, + "learning_rate": 2.267884322678843e-07, + "logits/chosen": -2.25122332572937, + "logits/rejected": -1.8896070718765259, + "logps/chosen": -85.34817504882812, + "logps/rejected": -111.40423583984375, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9988149404525757, + "rewards/margins": 24.812755584716797, + "rewards/rejected": -22.813940048217773, + "step": 2100 + }, + { + "epoch": 0.96, + "eval_logits/chosen": -2.1680314540863037, + "eval_logits/rejected": -1.8186105489730835, + "eval_logps/chosen": -85.12017822265625, + "eval_logps/rejected": -110.73912048339844, + "eval_loss": 0.005591261200606823, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 1.8552027940750122, + "eval_rewards/margins": 23.80930519104004, + "eval_rewards/rejected": -21.954099655151367, + "eval_runtime": 178.0339, + "eval_samples_per_second": 16.076, + "eval_steps_per_second": 1.005, + "step": 2100 + }, + { + "epoch": 0.96, + "learning_rate": 2.2628107559614406e-07, + "logits/chosen": -2.182180643081665, + "logits/rejected": -1.887372612953186, + "logps/chosen": -84.8567123413086, + "logps/rejected": -115.8074722290039, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.302744150161743, + "rewards/margins": 24.386987686157227, + "rewards/rejected": -22.084239959716797, + "step": 2110 + }, + { + "epoch": 0.97, + "learning_rate": 2.2577371892440383e-07, + "logits/chosen": -2.1381964683532715, + "logits/rejected": -1.7454639673233032, + "logps/chosen": -89.93260192871094, + "logps/rejected": -116.71270751953125, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.476393938064575, + "rewards/margins": 26.676769256591797, + "rewards/rejected": -24.200376510620117, + "step": 2120 + }, + { + "epoch": 0.97, + "learning_rate": 2.252663622526636e-07, + "logits/chosen": -2.1717324256896973, + "logits/rejected": -1.7591243982315063, + "logps/chosen": -92.45228576660156, + "logps/rejected": -119.2745361328125, + "loss": 0.0051, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9234495162963867, + "rewards/margins": 26.036029815673828, + "rewards/rejected": -24.112579345703125, + "step": 2130 + }, + { + "epoch": 0.98, + "learning_rate": 2.2475900558092336e-07, + "logits/chosen": -2.2273201942443848, + "logits/rejected": -1.8839191198349, + "logps/chosen": -82.78079223632812, + "logps/rejected": -117.9035415649414, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3843512535095215, + "rewards/margins": 26.67987632751465, + "rewards/rejected": -24.2955265045166, + "step": 2140 + }, + { + "epoch": 0.98, + "learning_rate": 2.2425164890918313e-07, + "logits/chosen": -2.1497018337249756, + "logits/rejected": -1.7351865768432617, + "logps/chosen": -91.68416595458984, + "logps/rejected": -117.03997802734375, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7206225395202637, + "rewards/margins": 25.490280151367188, + "rewards/rejected": -22.7696590423584, + "step": 2150 + }, + { + "epoch": 0.99, + "learning_rate": 2.237442922374429e-07, + "logits/chosen": -2.105646848678589, + "logits/rejected": -1.7680670022964478, + "logps/chosen": -83.17054748535156, + "logps/rejected": -110.3686294555664, + "loss": 0.003, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.5340194702148438, + "rewards/margins": 23.920883178710938, + "rewards/rejected": -22.386865615844727, + "step": 2160 + }, + { + "epoch": 0.99, + "learning_rate": 2.2323693556570266e-07, + "logits/chosen": -2.165457248687744, + "logits/rejected": -1.7284084558486938, + "logps/chosen": -87.86439514160156, + "logps/rejected": -118.21602630615234, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8017003536224365, + "rewards/margins": 25.351261138916016, + "rewards/rejected": -22.549560546875, + "step": 2170 + }, + { + "epoch": 1.0, + "learning_rate": 2.2272957889396242e-07, + "logits/chosen": -2.1325504779815674, + "logits/rejected": -1.746701955795288, + "logps/chosen": -86.5936279296875, + "logps/rejected": -111.45501708984375, + "loss": 0.0061, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3284072875976562, + "rewards/margins": 23.549602508544922, + "rewards/rejected": -21.221195220947266, + "step": 2180 + }, + { + "epoch": 1.0, + "learning_rate": 2.222222222222222e-07, + "logits/chosen": -2.1646571159362793, + "logits/rejected": -1.8008124828338623, + "logps/chosen": -91.38953399658203, + "logps/rejected": -111.65739440917969, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5468361377716064, + "rewards/margins": 23.903175354003906, + "rewards/rejected": -21.356340408325195, + "step": 2190 + }, + { + "epoch": 1.0, + "learning_rate": 2.2171486555048196e-07, + "logits/chosen": -2.1863300800323486, + "logits/rejected": -1.8556190729141235, + "logps/chosen": -88.28529357910156, + "logps/rejected": -117.9306869506836, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.728421926498413, + "rewards/margins": 25.30516242980957, + "rewards/rejected": -22.57674217224121, + "step": 2200 + }, + { + "epoch": 1.0, + "eval_logits/chosen": -2.1533539295196533, + "eval_logits/rejected": -1.8051024675369263, + "eval_logps/chosen": -85.23373413085938, + "eval_logps/rejected": -109.83948516845703, + "eval_loss": 0.005602886434644461, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 1.7984265089035034, + "eval_rewards/margins": 23.30270767211914, + "eval_rewards/rejected": -21.50428009033203, + "eval_runtime": 199.8494, + "eval_samples_per_second": 14.321, + "eval_steps_per_second": 0.896, + "step": 2200 + }, + { + "epoch": 1.01, + "learning_rate": 2.2120750887874172e-07, + "logits/chosen": -2.184231996536255, + "logits/rejected": -1.8242343664169312, + "logps/chosen": -87.5803451538086, + "logps/rejected": -116.75703430175781, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.086021900177002, + "rewards/margins": 24.399898529052734, + "rewards/rejected": -21.31387710571289, + "step": 2210 + }, + { + "epoch": 1.01, + "learning_rate": 2.207001522070015e-07, + "logits/chosen": -2.141301393508911, + "logits/rejected": -1.7759917974472046, + "logps/chosen": -91.86133575439453, + "logps/rejected": -116.03465270996094, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5139381885528564, + "rewards/margins": 24.87776756286621, + "rewards/rejected": -22.363828659057617, + "step": 2220 + }, + { + "epoch": 1.02, + "learning_rate": 2.2019279553526126e-07, + "logits/chosen": -2.147770643234253, + "logits/rejected": -1.6709057092666626, + "logps/chosen": -95.53349304199219, + "logps/rejected": -112.99432373046875, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.351959705352783, + "rewards/margins": 23.853574752807617, + "rewards/rejected": -21.501617431640625, + "step": 2230 + }, + { + "epoch": 1.02, + "learning_rate": 2.1968543886352102e-07, + "logits/chosen": -2.23237943649292, + "logits/rejected": -1.9411084651947021, + "logps/chosen": -82.67511749267578, + "logps/rejected": -113.44953918457031, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2467036247253418, + "rewards/margins": 23.66016960144043, + "rewards/rejected": -22.413467407226562, + "step": 2240 + }, + { + "epoch": 1.03, + "learning_rate": 2.191780821917808e-07, + "logits/chosen": -2.1674644947052, + "logits/rejected": -1.8577144145965576, + "logps/chosen": -84.78733825683594, + "logps/rejected": -115.30723571777344, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1725082397460938, + "rewards/margins": 24.452693939208984, + "rewards/rejected": -22.280183792114258, + "step": 2250 + }, + { + "epoch": 1.03, + "learning_rate": 2.1867072552004056e-07, + "logits/chosen": -2.288512706756592, + "logits/rejected": -1.899009108543396, + "logps/chosen": -87.27893829345703, + "logps/rejected": -116.2738265991211, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.6636362075805664, + "rewards/margins": 25.80401039123535, + "rewards/rejected": -23.1403751373291, + "step": 2260 + }, + { + "epoch": 1.04, + "learning_rate": 2.1816336884830032e-07, + "logits/chosen": -2.1499955654144287, + "logits/rejected": -1.848623514175415, + "logps/chosen": -81.82324981689453, + "logps/rejected": -115.3056411743164, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2145495414733887, + "rewards/margins": 25.514049530029297, + "rewards/rejected": -23.299501419067383, + "step": 2270 + }, + { + "epoch": 1.04, + "learning_rate": 2.176560121765601e-07, + "logits/chosen": -2.2483465671539307, + "logits/rejected": -1.9416240453720093, + "logps/chosen": -87.2221908569336, + "logps/rejected": -114.78794860839844, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0889229774475098, + "rewards/margins": 24.115562438964844, + "rewards/rejected": -22.026639938354492, + "step": 2280 + }, + { + "epoch": 1.05, + "learning_rate": 2.1714865550481986e-07, + "logits/chosen": -2.1781678199768066, + "logits/rejected": -1.788074254989624, + "logps/chosen": -85.21916961669922, + "logps/rejected": -115.5194091796875, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.176715850830078, + "rewards/margins": 25.536880493164062, + "rewards/rejected": -23.360164642333984, + "step": 2290 + }, + { + "epoch": 1.05, + "learning_rate": 2.1664129883307962e-07, + "logits/chosen": -2.1438374519348145, + "logits/rejected": -1.81368887424469, + "logps/chosen": -84.81913757324219, + "logps/rejected": -114.127685546875, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9444429874420166, + "rewards/margins": 24.719940185546875, + "rewards/rejected": -22.775497436523438, + "step": 2300 + }, + { + "epoch": 1.05, + "eval_logits/chosen": -2.1615262031555176, + "eval_logits/rejected": -1.812113881111145, + "eval_logps/chosen": -85.03972625732422, + "eval_logps/rejected": -110.49690246582031, + "eval_loss": 0.005681305192410946, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.8954312801361084, + "eval_rewards/margins": 23.728422164916992, + "eval_rewards/rejected": -21.832990646362305, + "eval_runtime": 213.6661, + "eval_samples_per_second": 13.395, + "eval_steps_per_second": 0.838, + "step": 2300 + }, + { + "epoch": 1.05, + "learning_rate": 2.161339421613394e-07, + "logits/chosen": -2.1927437782287598, + "logits/rejected": -1.8735787868499756, + "logps/chosen": -85.47551727294922, + "logps/rejected": -118.7635269165039, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.551397681236267, + "rewards/margins": 23.236629486083984, + "rewards/rejected": -21.685232162475586, + "step": 2310 + }, + { + "epoch": 1.06, + "learning_rate": 2.1562658548959916e-07, + "logits/chosen": -2.259660482406616, + "logits/rejected": -1.8416106700897217, + "logps/chosen": -88.98851013183594, + "logps/rejected": -113.52168273925781, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8511302471160889, + "rewards/margins": 26.2247257232666, + "rewards/rejected": -24.37359619140625, + "step": 2320 + }, + { + "epoch": 1.06, + "learning_rate": 2.1511922881785892e-07, + "logits/chosen": -2.151369571685791, + "logits/rejected": -1.7967971563339233, + "logps/chosen": -86.92037200927734, + "logps/rejected": -110.92555236816406, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9498761892318726, + "rewards/margins": 22.68193244934082, + "rewards/rejected": -20.7320556640625, + "step": 2330 + }, + { + "epoch": 1.07, + "learning_rate": 2.146118721461187e-07, + "logits/chosen": -2.1439459323883057, + "logits/rejected": -1.8120663166046143, + "logps/chosen": -85.99787902832031, + "logps/rejected": -114.73786926269531, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.070859432220459, + "rewards/margins": 24.2840518951416, + "rewards/rejected": -21.213193893432617, + "step": 2340 + }, + { + "epoch": 1.07, + "learning_rate": 2.1410451547437846e-07, + "logits/chosen": -2.2162632942199707, + "logits/rejected": -1.8168309926986694, + "logps/chosen": -86.9716796875, + "logps/rejected": -110.72579193115234, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.628699541091919, + "rewards/margins": 23.844585418701172, + "rewards/rejected": -21.215885162353516, + "step": 2350 + }, + { + "epoch": 1.08, + "learning_rate": 2.1359715880263822e-07, + "logits/chosen": -2.174852132797241, + "logits/rejected": -1.8075027465820312, + "logps/chosen": -83.8674087524414, + "logps/rejected": -112.02839660644531, + "loss": 0.0035, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.375577926635742, + "rewards/margins": 24.027584075927734, + "rewards/rejected": -21.65200424194336, + "step": 2360 + }, + { + "epoch": 1.08, + "learning_rate": 2.13089802130898e-07, + "logits/chosen": -2.167109251022339, + "logits/rejected": -1.8096405267715454, + "logps/chosen": -84.7580337524414, + "logps/rejected": -117.7773208618164, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1797847747802734, + "rewards/margins": 25.387409210205078, + "rewards/rejected": -23.207622528076172, + "step": 2370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1258244545915776e-07, + "logits/chosen": -2.141667127609253, + "logits/rejected": -1.7658653259277344, + "logps/chosen": -83.34464263916016, + "logps/rejected": -108.3573989868164, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8713493347167969, + "rewards/margins": 23.396373748779297, + "rewards/rejected": -21.525026321411133, + "step": 2380 + }, + { + "epoch": 1.09, + "learning_rate": 2.1207508878741752e-07, + "logits/chosen": -2.1764111518859863, + "logits/rejected": -1.8196337223052979, + "logps/chosen": -89.48072814941406, + "logps/rejected": -112.5357437133789, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0725154876708984, + "rewards/margins": 23.079078674316406, + "rewards/rejected": -22.006563186645508, + "step": 2390 + }, + { + "epoch": 1.1, + "learning_rate": 2.115677321156773e-07, + "logits/chosen": -2.1829419136047363, + "logits/rejected": -1.830394983291626, + "logps/chosen": -85.89895629882812, + "logps/rejected": -116.04026794433594, + "loss": 0.0056, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.8246675729751587, + "rewards/margins": 24.702465057373047, + "rewards/rejected": -22.877796173095703, + "step": 2400 + }, + { + "epoch": 1.1, + "eval_logits/chosen": -2.16304874420166, + "eval_logits/rejected": -1.8152433633804321, + "eval_logps/chosen": -86.18814086914062, + "eval_logps/rejected": -112.2934341430664, + "eval_loss": 0.005279215984046459, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 1.3212203979492188, + "eval_rewards/margins": 24.052478790283203, + "eval_rewards/rejected": -22.731260299682617, + "eval_runtime": 219.653, + "eval_samples_per_second": 13.03, + "eval_steps_per_second": 0.815, + "step": 2400 + }, + { + "epoch": 1.1, + "learning_rate": 2.1106037544393706e-07, + "logits/chosen": -2.186826705932617, + "logits/rejected": -1.8132612705230713, + "logps/chosen": -87.9347152709961, + "logps/rejected": -116.61759185791016, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.319627285003662, + "rewards/margins": 25.145320892333984, + "rewards/rejected": -22.82569122314453, + "step": 2410 + }, + { + "epoch": 1.1, + "learning_rate": 2.1055301877219682e-07, + "logits/chosen": -2.2042155265808105, + "logits/rejected": -1.7962379455566406, + "logps/chosen": -89.61426544189453, + "logps/rejected": -119.98515319824219, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1328256130218506, + "rewards/margins": 25.84686279296875, + "rewards/rejected": -23.714035034179688, + "step": 2420 + }, + { + "epoch": 1.11, + "learning_rate": 2.100456621004566e-07, + "logits/chosen": -2.12410831451416, + "logits/rejected": -1.81709885597229, + "logps/chosen": -83.92530822753906, + "logps/rejected": -115.90692138671875, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.454441785812378, + "rewards/margins": 25.90082359313965, + "rewards/rejected": -23.44638442993164, + "step": 2430 + }, + { + "epoch": 1.11, + "learning_rate": 2.0953830542871636e-07, + "logits/chosen": -2.24179744720459, + "logits/rejected": -1.9268741607666016, + "logps/chosen": -83.57841491699219, + "logps/rejected": -115.2148666381836, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5743858814239502, + "rewards/margins": 24.534379959106445, + "rewards/rejected": -22.95999526977539, + "step": 2440 + }, + { + "epoch": 1.12, + "learning_rate": 2.0903094875697612e-07, + "logits/chosen": -2.1769909858703613, + "logits/rejected": -1.7283875942230225, + "logps/chosen": -94.74465942382812, + "logps/rejected": -120.26570129394531, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9194726943969727, + "rewards/margins": 25.821313858032227, + "rewards/rejected": -23.901838302612305, + "step": 2450 + }, + { + "epoch": 1.12, + "learning_rate": 2.085235920852359e-07, + "logits/chosen": -2.1689000129699707, + "logits/rejected": -1.8665683269500732, + "logps/chosen": -85.41389465332031, + "logps/rejected": -120.8945083618164, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.879907250404358, + "rewards/margins": 26.468700408935547, + "rewards/rejected": -24.588794708251953, + "step": 2460 + }, + { + "epoch": 1.13, + "learning_rate": 2.0801623541349566e-07, + "logits/chosen": -2.243333101272583, + "logits/rejected": -1.8705765008926392, + "logps/chosen": -81.45762634277344, + "logps/rejected": -111.65384674072266, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.552882432937622, + "rewards/margins": 25.76004981994629, + "rewards/rejected": -23.207164764404297, + "step": 2470 + }, + { + "epoch": 1.13, + "learning_rate": 2.0750887874175542e-07, + "logits/chosen": -2.171480655670166, + "logits/rejected": -1.8349215984344482, + "logps/chosen": -86.48908996582031, + "logps/rejected": -120.96435546875, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6669495105743408, + "rewards/margins": 25.430204391479492, + "rewards/rejected": -23.763256072998047, + "step": 2480 + }, + { + "epoch": 1.14, + "learning_rate": 2.070015220700152e-07, + "logits/chosen": -2.1459603309631348, + "logits/rejected": -1.7423560619354248, + "logps/chosen": -87.96595764160156, + "logps/rejected": -120.45890045166016, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2795779705047607, + "rewards/margins": 26.35489273071289, + "rewards/rejected": -24.0753116607666, + "step": 2490 + }, + { + "epoch": 1.14, + "learning_rate": 2.0649416539827496e-07, + "logits/chosen": -2.1519787311553955, + "logits/rejected": -1.7902311086654663, + "logps/chosen": -85.25035095214844, + "logps/rejected": -112.2745590209961, + "loss": 0.009, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.7297900915145874, + "rewards/margins": 22.654048919677734, + "rewards/rejected": -20.92425537109375, + "step": 2500 + }, + { + "epoch": 1.14, + "eval_logits/chosen": -2.1591501235961914, + "eval_logits/rejected": -1.8124133348464966, + "eval_logps/chosen": -84.112548828125, + "eval_logps/rejected": -110.20504760742188, + "eval_loss": 0.005225938744843006, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 2.3590192794799805, + "eval_rewards/margins": 24.04608726501465, + "eval_rewards/rejected": -21.68706703186035, + "eval_runtime": 233.6775, + "eval_samples_per_second": 12.248, + "eval_steps_per_second": 0.766, + "step": 2500 + }, + { + "epoch": 1.15, + "learning_rate": 2.0598680872653472e-07, + "logits/chosen": -2.1743123531341553, + "logits/rejected": -1.7647409439086914, + "logps/chosen": -90.07218933105469, + "logps/rejected": -115.3643798828125, + "loss": 0.0037, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.752506732940674, + "rewards/margins": 24.845409393310547, + "rewards/rejected": -22.092905044555664, + "step": 2510 + }, + { + "epoch": 1.15, + "learning_rate": 2.054794520547945e-07, + "logits/chosen": -2.0834057331085205, + "logits/rejected": -1.7100751399993896, + "logps/chosen": -85.79044342041016, + "logps/rejected": -113.79996490478516, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7096903324127197, + "rewards/margins": 25.91143798828125, + "rewards/rejected": -23.20174789428711, + "step": 2520 + }, + { + "epoch": 1.15, + "learning_rate": 2.0497209538305426e-07, + "logits/chosen": -2.2217605113983154, + "logits/rejected": -1.871063470840454, + "logps/chosen": -87.78010559082031, + "logps/rejected": -115.4132080078125, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.101217746734619, + "rewards/margins": 25.03754234313965, + "rewards/rejected": -22.93632698059082, + "step": 2530 + }, + { + "epoch": 1.16, + "learning_rate": 2.0446473871131402e-07, + "logits/chosen": -2.188868761062622, + "logits/rejected": -1.7949635982513428, + "logps/chosen": -90.8096923828125, + "logps/rejected": -122.7888412475586, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.599236488342285, + "rewards/margins": 25.132827758789062, + "rewards/rejected": -22.533588409423828, + "step": 2540 + }, + { + "epoch": 1.16, + "learning_rate": 2.039573820395738e-07, + "logits/chosen": -2.1523804664611816, + "logits/rejected": -1.8026365041732788, + "logps/chosen": -79.64842224121094, + "logps/rejected": -115.60630798339844, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1584062576293945, + "rewards/margins": 25.834789276123047, + "rewards/rejected": -23.67638397216797, + "step": 2550 + }, + { + "epoch": 1.17, + "learning_rate": 2.0345002536783356e-07, + "logits/chosen": -2.191920757293701, + "logits/rejected": -1.7900508642196655, + "logps/chosen": -81.1897964477539, + "logps/rejected": -110.9433822631836, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1984918117523193, + "rewards/margins": 25.696517944335938, + "rewards/rejected": -22.498027801513672, + "step": 2560 + }, + { + "epoch": 1.17, + "learning_rate": 2.0294266869609332e-07, + "logits/chosen": -2.240447998046875, + "logits/rejected": -1.842795729637146, + "logps/chosen": -89.96078491210938, + "logps/rejected": -115.45732116699219, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5964362621307373, + "rewards/margins": 26.703838348388672, + "rewards/rejected": -24.107402801513672, + "step": 2570 + }, + { + "epoch": 1.18, + "learning_rate": 2.024353120243531e-07, + "logits/chosen": -2.155785083770752, + "logits/rejected": -1.7801926136016846, + "logps/chosen": -79.60318756103516, + "logps/rejected": -111.69859313964844, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1407876014709473, + "rewards/margins": 24.803207397460938, + "rewards/rejected": -21.66242027282715, + "step": 2580 + }, + { + "epoch": 1.18, + "learning_rate": 2.0192795535261286e-07, + "logits/chosen": -2.186657428741455, + "logits/rejected": -1.8247960805892944, + "logps/chosen": -83.01274108886719, + "logps/rejected": -115.82133483886719, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.050610065460205, + "rewards/margins": 26.237497329711914, + "rewards/rejected": -23.1868839263916, + "step": 2590 + }, + { + "epoch": 1.19, + "learning_rate": 2.0142059868087262e-07, + "logits/chosen": -2.184058666229248, + "logits/rejected": -1.8208153247833252, + "logps/chosen": -87.23689270019531, + "logps/rejected": -112.4271240234375, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2483184337615967, + "rewards/margins": 25.097911834716797, + "rewards/rejected": -21.849592208862305, + "step": 2600 + }, + { + "epoch": 1.19, + "eval_logits/chosen": -2.1579010486602783, + "eval_logits/rejected": -1.8120908737182617, + "eval_logps/chosen": -83.75383758544922, + "eval_logps/rejected": -112.24433898925781, + "eval_loss": 0.005189881194382906, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.5383784770965576, + "eval_rewards/margins": 25.245080947875977, + "eval_rewards/rejected": -22.706703186035156, + "eval_runtime": 299.5228, + "eval_samples_per_second": 9.555, + "eval_steps_per_second": 0.598, + "step": 2600 + }, + { + "epoch": 1.19, + "learning_rate": 2.009132420091324e-07, + "logits/chosen": -2.0997519493103027, + "logits/rejected": -1.7868757247924805, + "logps/chosen": -81.20166015625, + "logps/rejected": -114.8741455078125, + "loss": 0.0065, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8495497703552246, + "rewards/margins": 26.784374237060547, + "rewards/rejected": -23.934823989868164, + "step": 2610 + }, + { + "epoch": 1.2, + "learning_rate": 2.0040588533739216e-07, + "logits/chosen": -2.1891965866088867, + "logits/rejected": -1.8037872314453125, + "logps/chosen": -86.70457458496094, + "logps/rejected": -116.2646255493164, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.284919261932373, + "rewards/margins": 26.434612274169922, + "rewards/rejected": -24.149694442749023, + "step": 2620 + }, + { + "epoch": 1.2, + "learning_rate": 1.9989852866565192e-07, + "logits/chosen": -2.1447997093200684, + "logits/rejected": -1.8104356527328491, + "logps/chosen": -85.48689270019531, + "logps/rejected": -120.234375, + "loss": 0.0048, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.3036551475524902, + "rewards/margins": 25.647720336914062, + "rewards/rejected": -24.344066619873047, + "step": 2630 + }, + { + "epoch": 1.21, + "learning_rate": 1.993911719939117e-07, + "logits/chosen": -2.1070830821990967, + "logits/rejected": -1.7089662551879883, + "logps/chosen": -88.43952178955078, + "logps/rejected": -112.84476470947266, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.554791212081909, + "rewards/margins": 25.463603973388672, + "rewards/rejected": -22.9088134765625, + "step": 2640 + }, + { + "epoch": 1.21, + "learning_rate": 1.9888381532217146e-07, + "logits/chosen": -2.1640028953552246, + "logits/rejected": -1.803934097290039, + "logps/chosen": -87.4181900024414, + "logps/rejected": -119.70159912109375, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3634817600250244, + "rewards/margins": 25.766544342041016, + "rewards/rejected": -24.403064727783203, + "step": 2650 + }, + { + "epoch": 1.21, + "learning_rate": 1.9837645865043122e-07, + "logits/chosen": -2.202446460723877, + "logits/rejected": -1.8282486200332642, + "logps/chosen": -87.19379425048828, + "logps/rejected": -118.42472839355469, + "loss": 0.0024, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.8503637313842773, + "rewards/margins": 26.071752548217773, + "rewards/rejected": -24.22138786315918, + "step": 2660 + }, + { + "epoch": 1.22, + "learning_rate": 1.97869101978691e-07, + "logits/chosen": -2.149972915649414, + "logits/rejected": -1.789910912513733, + "logps/chosen": -87.65351867675781, + "logps/rejected": -117.45466613769531, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0328145027160645, + "rewards/margins": 25.302623748779297, + "rewards/rejected": -23.269811630249023, + "step": 2670 + }, + { + "epoch": 1.22, + "learning_rate": 1.9736174530695076e-07, + "logits/chosen": -2.2164740562438965, + "logits/rejected": -1.8319326639175415, + "logps/chosen": -87.3599853515625, + "logps/rejected": -120.3600845336914, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.874768853187561, + "rewards/margins": 27.10894775390625, + "rewards/rejected": -25.234180450439453, + "step": 2680 + }, + { + "epoch": 1.23, + "learning_rate": 1.9685438863521052e-07, + "logits/chosen": -2.1731603145599365, + "logits/rejected": -1.781818151473999, + "logps/chosen": -86.36930847167969, + "logps/rejected": -117.47142028808594, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4858036041259766, + "rewards/margins": 26.315841674804688, + "rewards/rejected": -23.83003807067871, + "step": 2690 + }, + { + "epoch": 1.23, + "learning_rate": 1.963470319634703e-07, + "logits/chosen": -2.2734062671661377, + "logits/rejected": -1.908062219619751, + "logps/chosen": -84.78250122070312, + "logps/rejected": -117.51930236816406, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3912415504455566, + "rewards/margins": 27.765466690063477, + "rewards/rejected": -25.37422752380371, + "step": 2700 + }, + { + "epoch": 1.23, + "eval_logits/chosen": -2.178615093231201, + "eval_logits/rejected": -1.8322229385375977, + "eval_logps/chosen": -86.60983276367188, + "eval_logps/rejected": -116.65242767333984, + "eval_loss": 0.005201002117246389, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.1103774309158325, + "eval_rewards/margins": 26.021133422851562, + "eval_rewards/rejected": -24.910757064819336, + "eval_runtime": 204.9916, + "eval_samples_per_second": 13.962, + "eval_steps_per_second": 0.873, + "step": 2700 + }, + { + "epoch": 1.24, + "learning_rate": 1.9583967529173006e-07, + "logits/chosen": -2.1784110069274902, + "logits/rejected": -1.8004591464996338, + "logps/chosen": -90.504638671875, + "logps/rejected": -120.81787109375, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6989917755126953, + "rewards/margins": 27.467670440673828, + "rewards/rejected": -25.7686767578125, + "step": 2710 + }, + { + "epoch": 1.24, + "learning_rate": 1.9533231861998982e-07, + "logits/chosen": -2.2853636741638184, + "logits/rejected": -1.9385350942611694, + "logps/chosen": -88.9593734741211, + "logps/rejected": -126.08199310302734, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.512266993522644, + "rewards/margins": 27.1735782623291, + "rewards/rejected": -25.661312103271484, + "step": 2720 + }, + { + "epoch": 1.25, + "learning_rate": 1.948249619482496e-07, + "logits/chosen": -2.0956151485443115, + "logits/rejected": -1.7385085821151733, + "logps/chosen": -88.5088882446289, + "logps/rejected": -119.0416488647461, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1305296421051025, + "rewards/margins": 26.505001068115234, + "rewards/rejected": -25.374475479125977, + "step": 2730 + }, + { + "epoch": 1.25, + "learning_rate": 1.9431760527650936e-07, + "logits/chosen": -2.2182435989379883, + "logits/rejected": -1.8027465343475342, + "logps/chosen": -87.2740707397461, + "logps/rejected": -126.13343811035156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.500243663787842, + "rewards/margins": 29.380783081054688, + "rewards/rejected": -26.880542755126953, + "step": 2740 + }, + { + "epoch": 1.26, + "learning_rate": 1.9381024860476912e-07, + "logits/chosen": -2.2416915893554688, + "logits/rejected": -1.851488471031189, + "logps/chosen": -92.11241149902344, + "logps/rejected": -120.4324722290039, + "loss": 0.0055, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0522854328155518, + "rewards/margins": 27.805065155029297, + "rewards/rejected": -25.75278091430664, + "step": 2750 + }, + { + "epoch": 1.26, + "learning_rate": 1.933028919330289e-07, + "logits/chosen": -2.1997532844543457, + "logits/rejected": -1.8702919483184814, + "logps/chosen": -87.5173568725586, + "logps/rejected": -121.913818359375, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8851993680000305, + "rewards/margins": 27.617206573486328, + "rewards/rejected": -26.732006072998047, + "step": 2760 + }, + { + "epoch": 1.26, + "learning_rate": 1.9279553526128866e-07, + "logits/chosen": -2.212709426879883, + "logits/rejected": -1.8357082605361938, + "logps/chosen": -85.00953674316406, + "logps/rejected": -122.2784652709961, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2315926551818848, + "rewards/margins": 29.97836685180664, + "rewards/rejected": -27.746774673461914, + "step": 2770 + }, + { + "epoch": 1.27, + "learning_rate": 1.9228817858954842e-07, + "logits/chosen": -2.2300631999969482, + "logits/rejected": -1.828784704208374, + "logps/chosen": -84.7209243774414, + "logps/rejected": -122.51011657714844, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3681228160858154, + "rewards/margins": 29.574649810791016, + "rewards/rejected": -26.206527709960938, + "step": 2780 + }, + { + "epoch": 1.27, + "learning_rate": 1.917808219178082e-07, + "logits/chosen": -2.255375385284424, + "logits/rejected": -1.8779224157333374, + "logps/chosen": -85.09639739990234, + "logps/rejected": -115.9112548828125, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2712059020996094, + "rewards/margins": 27.021814346313477, + "rewards/rejected": -23.750606536865234, + "step": 2790 + }, + { + "epoch": 1.28, + "learning_rate": 1.9127346524606796e-07, + "logits/chosen": -2.224299430847168, + "logits/rejected": -1.8939344882965088, + "logps/chosen": -82.23179626464844, + "logps/rejected": -113.6113052368164, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4387083053588867, + "rewards/margins": 26.06760025024414, + "rewards/rejected": -23.62889289855957, + "step": 2800 + }, + { + "epoch": 1.28, + "eval_logits/chosen": -2.1937084197998047, + "eval_logits/rejected": -1.8446825742721558, + "eval_logps/chosen": -84.96446990966797, + "eval_logps/rejected": -116.5991439819336, + "eval_loss": 0.005613674875348806, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.9330565929412842, + "eval_rewards/margins": 26.81716537475586, + "eval_rewards/rejected": -24.88410758972168, + "eval_runtime": 259.8625, + "eval_samples_per_second": 11.014, + "eval_steps_per_second": 0.689, + "step": 2800 + }, + { + "epoch": 1.28, + "learning_rate": 1.9076610857432772e-07, + "logits/chosen": -2.2159202098846436, + "logits/rejected": -1.7732995748519897, + "logps/chosen": -86.9103775024414, + "logps/rejected": -119.67277526855469, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8791584968566895, + "rewards/margins": 28.936452865600586, + "rewards/rejected": -26.05729103088379, + "step": 2810 + }, + { + "epoch": 1.29, + "learning_rate": 1.902587519025875e-07, + "logits/chosen": -2.169167995452881, + "logits/rejected": -1.750314474105835, + "logps/chosen": -89.01744079589844, + "logps/rejected": -120.8212661743164, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.396334171295166, + "rewards/margins": 29.005077362060547, + "rewards/rejected": -26.60874366760254, + "step": 2820 + }, + { + "epoch": 1.29, + "learning_rate": 1.8975139523084726e-07, + "logits/chosen": -2.1312053203582764, + "logits/rejected": -1.7972911596298218, + "logps/chosen": -86.7480697631836, + "logps/rejected": -125.67042541503906, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.189509391784668, + "rewards/margins": 28.36881446838379, + "rewards/rejected": -26.179306030273438, + "step": 2830 + }, + { + "epoch": 1.3, + "learning_rate": 1.8924403855910702e-07, + "logits/chosen": -2.2344472408294678, + "logits/rejected": -1.9485044479370117, + "logps/chosen": -79.72859954833984, + "logps/rejected": -119.08320617675781, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4703823328018188, + "rewards/margins": 27.174612045288086, + "rewards/rejected": -25.7042293548584, + "step": 2840 + }, + { + "epoch": 1.3, + "learning_rate": 1.887366818873668e-07, + "logits/chosen": -2.237384080886841, + "logits/rejected": -1.8804075717926025, + "logps/chosen": -87.28046417236328, + "logps/rejected": -117.6243896484375, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6116135120391846, + "rewards/margins": 26.46062660217285, + "rewards/rejected": -24.849010467529297, + "step": 2850 + }, + { + "epoch": 1.31, + "learning_rate": 1.8822932521562656e-07, + "logits/chosen": -2.1050355434417725, + "logits/rejected": -1.7900664806365967, + "logps/chosen": -87.86946105957031, + "logps/rejected": -118.79146575927734, + "loss": 0.0076, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.9105596542358398, + "rewards/margins": 25.8955020904541, + "rewards/rejected": -23.984943389892578, + "step": 2860 + }, + { + "epoch": 1.31, + "learning_rate": 1.8772196854388632e-07, + "logits/chosen": -2.2055516242980957, + "logits/rejected": -1.794002890586853, + "logps/chosen": -90.33226013183594, + "logps/rejected": -122.12618255615234, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3315025568008423, + "rewards/margins": 27.155811309814453, + "rewards/rejected": -25.82430648803711, + "step": 2870 + }, + { + "epoch": 1.31, + "learning_rate": 1.872146118721461e-07, + "logits/chosen": -2.1890950202941895, + "logits/rejected": -1.7347943782806396, + "logps/chosen": -93.88822937011719, + "logps/rejected": -118.97929382324219, + "loss": 0.0011, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.2069919109344482, + "rewards/margins": 27.36787986755371, + "rewards/rejected": -25.160892486572266, + "step": 2880 + }, + { + "epoch": 1.32, + "learning_rate": 1.8670725520040586e-07, + "logits/chosen": -2.2413697242736816, + "logits/rejected": -1.8580372333526611, + "logps/chosen": -88.85777282714844, + "logps/rejected": -122.02364349365234, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6634600162506104, + "rewards/margins": 29.12453842163086, + "rewards/rejected": -26.46108055114746, + "step": 2890 + }, + { + "epoch": 1.32, + "learning_rate": 1.8619989852866562e-07, + "logits/chosen": -2.1835293769836426, + "logits/rejected": -1.8364540338516235, + "logps/chosen": -87.46830749511719, + "logps/rejected": -118.46342468261719, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9554582834243774, + "rewards/margins": 27.929412841796875, + "rewards/rejected": -25.973957061767578, + "step": 2900 + }, + { + "epoch": 1.32, + "eval_logits/chosen": -2.1951773166656494, + "eval_logits/rejected": -1.8438202142715454, + "eval_logps/chosen": -85.55497741699219, + "eval_logps/rejected": -119.35179901123047, + "eval_loss": 0.005554942414164543, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.6378037929534912, + "eval_rewards/margins": 27.898239135742188, + "eval_rewards/rejected": -26.26043701171875, + "eval_runtime": 327.6474, + "eval_samples_per_second": 8.735, + "eval_steps_per_second": 0.546, + "step": 2900 + }, + { + "epoch": 1.33, + "learning_rate": 1.856925418569254e-07, + "logits/chosen": -2.1702980995178223, + "logits/rejected": -1.7541742324829102, + "logps/chosen": -87.9078598022461, + "logps/rejected": -117.9532699584961, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8803863525390625, + "rewards/margins": 28.9564151763916, + "rewards/rejected": -25.07602882385254, + "step": 2910 + }, + { + "epoch": 1.33, + "learning_rate": 1.8518518518518516e-07, + "logits/chosen": -2.164057493209839, + "logits/rejected": -1.8279097080230713, + "logps/chosen": -82.57586669921875, + "logps/rejected": -120.59794616699219, + "loss": 0.0053, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.3020541667938232, + "rewards/margins": 29.443384170532227, + "rewards/rejected": -27.141326904296875, + "step": 2920 + }, + { + "epoch": 1.34, + "learning_rate": 1.8467782851344492e-07, + "logits/chosen": -2.170772075653076, + "logits/rejected": -1.8161497116088867, + "logps/chosen": -85.89886474609375, + "logps/rejected": -123.47340393066406, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1593396663665771, + "rewards/margins": 29.278268814086914, + "rewards/rejected": -28.118927001953125, + "step": 2930 + }, + { + "epoch": 1.34, + "learning_rate": 1.841704718417047e-07, + "logits/chosen": -2.198502779006958, + "logits/rejected": -1.8595365285873413, + "logps/chosen": -82.33607482910156, + "logps/rejected": -117.26557922363281, + "loss": 0.0058, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3132636547088623, + "rewards/margins": 27.13201904296875, + "rewards/rejected": -25.818756103515625, + "step": 2940 + }, + { + "epoch": 1.35, + "learning_rate": 1.8366311516996446e-07, + "logits/chosen": -2.2103936672210693, + "logits/rejected": -1.8283309936523438, + "logps/chosen": -87.3060302734375, + "logps/rejected": -116.38105773925781, + "loss": 0.0002, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.252316951751709, + "rewards/margins": 27.21462059020996, + "rewards/rejected": -24.962305068969727, + "step": 2950 + }, + { + "epoch": 1.35, + "learning_rate": 1.8315575849822422e-07, + "logits/chosen": -2.1284189224243164, + "logits/rejected": -1.7991136312484741, + "logps/chosen": -85.66838073730469, + "logps/rejected": -115.23576354980469, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.003413677215576, + "rewards/margins": 25.363988876342773, + "rewards/rejected": -23.360576629638672, + "step": 2960 + }, + { + "epoch": 1.36, + "learning_rate": 1.82648401826484e-07, + "logits/chosen": -2.1712751388549805, + "logits/rejected": -1.8838971853256226, + "logps/chosen": -81.59526824951172, + "logps/rejected": -119.744384765625, + "loss": 0.0049, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.3410313129425049, + "rewards/margins": 26.60662269592285, + "rewards/rejected": -25.265588760375977, + "step": 2970 + }, + { + "epoch": 1.36, + "learning_rate": 1.8214104515474375e-07, + "logits/chosen": -2.229074239730835, + "logits/rejected": -1.8246219158172607, + "logps/chosen": -85.791259765625, + "logps/rejected": -122.32413482666016, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3884541988372803, + "rewards/margins": 29.200729370117188, + "rewards/rejected": -27.812274932861328, + "step": 2980 + }, + { + "epoch": 1.36, + "learning_rate": 1.8163368848300352e-07, + "logits/chosen": -2.2312495708465576, + "logits/rejected": -1.912697196006775, + "logps/chosen": -80.87403869628906, + "logps/rejected": -123.8989028930664, + "loss": 0.0083, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.5714377164840698, + "rewards/margins": 28.29909324645996, + "rewards/rejected": -26.7276554107666, + "step": 2990 + }, + { + "epoch": 1.37, + "learning_rate": 1.811263318112633e-07, + "logits/chosen": -2.2286040782928467, + "logits/rejected": -1.8826186656951904, + "logps/chosen": -86.8649673461914, + "logps/rejected": -121.7653579711914, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1471002101898193, + "rewards/margins": 27.70977783203125, + "rewards/rejected": -25.562679290771484, + "step": 3000 + }, + { + "epoch": 1.37, + "eval_logits/chosen": -2.1800849437713623, + "eval_logits/rejected": -1.830121636390686, + "eval_logps/chosen": -86.1629867553711, + "eval_logps/rejected": -119.01561737060547, + "eval_loss": 0.006075535900890827, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.3337992429733276, + "eval_rewards/margins": 27.4261474609375, + "eval_rewards/rejected": -26.092342376708984, + "eval_runtime": 190.0895, + "eval_samples_per_second": 15.056, + "eval_steps_per_second": 0.942, + "step": 3000 + }, + { + "epoch": 1.37, + "learning_rate": 1.8061897513952305e-07, + "logits/chosen": -2.1978001594543457, + "logits/rejected": -1.7904773950576782, + "logps/chosen": -89.93728637695312, + "logps/rejected": -120.550048828125, + "loss": 0.0003, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8481388092041016, + "rewards/margins": 28.82729721069336, + "rewards/rejected": -25.979156494140625, + "step": 3010 + }, + { + "epoch": 1.38, + "learning_rate": 1.8011161846778282e-07, + "logits/chosen": -2.20564603805542, + "logits/rejected": -1.8694099187850952, + "logps/chosen": -85.4388427734375, + "logps/rejected": -119.31624603271484, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.9251155853271484, + "rewards/margins": 26.277385711669922, + "rewards/rejected": -24.352270126342773, + "step": 3020 + }, + { + "epoch": 1.38, + "learning_rate": 1.796042617960426e-07, + "logits/chosen": -2.176328659057617, + "logits/rejected": -1.7874501943588257, + "logps/chosen": -97.2492446899414, + "logps/rejected": -123.11531066894531, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5614807605743408, + "rewards/margins": 26.78195571899414, + "rewards/rejected": -25.22047233581543, + "step": 3030 + }, + { + "epoch": 1.39, + "learning_rate": 1.7909690512430235e-07, + "logits/chosen": -2.1733341217041016, + "logits/rejected": -1.796979546546936, + "logps/chosen": -84.2280502319336, + "logps/rejected": -116.29603576660156, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3264310359954834, + "rewards/margins": 27.896953582763672, + "rewards/rejected": -25.57052230834961, + "step": 3040 + }, + { + "epoch": 1.39, + "learning_rate": 1.7858954845256212e-07, + "logits/chosen": -2.2226386070251465, + "logits/rejected": -1.8575359582901, + "logps/chosen": -85.23347473144531, + "logps/rejected": -116.77949523925781, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.571824550628662, + "rewards/margins": 27.728759765625, + "rewards/rejected": -25.156932830810547, + "step": 3050 + }, + { + "epoch": 1.4, + "learning_rate": 1.780821917808219e-07, + "logits/chosen": -2.196302652359009, + "logits/rejected": -1.8011735677719116, + "logps/chosen": -90.76152038574219, + "logps/rejected": -119.6912841796875, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5391037464141846, + "rewards/margins": 26.7655086517334, + "rewards/rejected": -24.226404190063477, + "step": 3060 + }, + { + "epoch": 1.4, + "learning_rate": 1.7757483510908165e-07, + "logits/chosen": -2.184532642364502, + "logits/rejected": -1.8580677509307861, + "logps/chosen": -84.7155990600586, + "logps/rejected": -126.76289367675781, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2373816967010498, + "rewards/margins": 28.667322158813477, + "rewards/rejected": -27.4299373626709, + "step": 3070 + }, + { + "epoch": 1.41, + "learning_rate": 1.7706747843734142e-07, + "logits/chosen": -2.236210823059082, + "logits/rejected": -1.8297055959701538, + "logps/chosen": -84.67916107177734, + "logps/rejected": -119.5815200805664, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.601902723312378, + "rewards/margins": 28.8743896484375, + "rewards/rejected": -27.27248764038086, + "step": 3080 + }, + { + "epoch": 1.41, + "learning_rate": 1.765601217656012e-07, + "logits/chosen": -2.2311558723449707, + "logits/rejected": -1.878273367881775, + "logps/chosen": -88.32709503173828, + "logps/rejected": -121.57609558105469, + "loss": 0.0054, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.2206145524978638, + "rewards/margins": 28.100128173828125, + "rewards/rejected": -26.879512786865234, + "step": 3090 + }, + { + "epoch": 1.42, + "learning_rate": 1.7605276509386095e-07, + "logits/chosen": -2.1835224628448486, + "logits/rejected": -1.8489364385604858, + "logps/chosen": -84.09500122070312, + "logps/rejected": -121.5748519897461, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4824763238430023, + "rewards/margins": 27.168865203857422, + "rewards/rejected": -26.686386108398438, + "step": 3100 + }, + { + "epoch": 1.42, + "eval_logits/chosen": -2.176138162612915, + "eval_logits/rejected": -1.8300259113311768, + "eval_logps/chosen": -86.61859130859375, + "eval_logps/rejected": -119.87804412841797, + "eval_loss": 0.0059745111502707005, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.1059939861297607, + "eval_rewards/margins": 27.629554748535156, + "eval_rewards/rejected": -26.523563385009766, + "eval_runtime": 207.1978, + "eval_samples_per_second": 13.813, + "eval_steps_per_second": 0.864, + "step": 3100 + }, + { + "epoch": 1.42, + "learning_rate": 1.7554540842212072e-07, + "logits/chosen": -2.169111728668213, + "logits/rejected": -1.8405656814575195, + "logps/chosen": -83.15672302246094, + "logps/rejected": -120.4351577758789, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0360536575317383, + "rewards/margins": 27.2529354095459, + "rewards/rejected": -26.216882705688477, + "step": 3110 + }, + { + "epoch": 1.42, + "learning_rate": 1.750380517503805e-07, + "logits/chosen": -2.2091641426086426, + "logits/rejected": -1.8550224304199219, + "logps/chosen": -84.96271514892578, + "logps/rejected": -119.28694152832031, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8836094737052917, + "rewards/margins": 26.456172943115234, + "rewards/rejected": -25.572561264038086, + "step": 3120 + }, + { + "epoch": 1.43, + "learning_rate": 1.7453069507864025e-07, + "logits/chosen": -2.2117037773132324, + "logits/rejected": -1.8669350147247314, + "logps/chosen": -84.34877014160156, + "logps/rejected": -119.05101013183594, + "loss": 0.0076, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.0027484893798828, + "rewards/margins": 28.111114501953125, + "rewards/rejected": -27.10836410522461, + "step": 3130 + }, + { + "epoch": 1.43, + "learning_rate": 1.7402333840690002e-07, + "logits/chosen": -2.1435580253601074, + "logits/rejected": -1.753458023071289, + "logps/chosen": -93.03610229492188, + "logps/rejected": -126.67008972167969, + "loss": 0.0008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7019534707069397, + "rewards/margins": 28.131439208984375, + "rewards/rejected": -27.42948341369629, + "step": 3140 + }, + { + "epoch": 1.44, + "learning_rate": 1.735159817351598e-07, + "logits/chosen": -2.1424784660339355, + "logits/rejected": -1.8056213855743408, + "logps/chosen": -82.12688446044922, + "logps/rejected": -125.8278579711914, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.45741605758667, + "rewards/margins": 30.075420379638672, + "rewards/rejected": -27.61800765991211, + "step": 3150 + }, + { + "epoch": 1.44, + "learning_rate": 1.7300862506341955e-07, + "logits/chosen": -2.2541353702545166, + "logits/rejected": -1.8698198795318604, + "logps/chosen": -87.37802124023438, + "logps/rejected": -123.66682434082031, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.516167402267456, + "rewards/margins": 29.741958618164062, + "rewards/rejected": -28.22579002380371, + "step": 3160 + }, + { + "epoch": 1.45, + "learning_rate": 1.7250126839167932e-07, + "logits/chosen": -2.297926902770996, + "logits/rejected": -1.9295371770858765, + "logps/chosen": -88.74690246582031, + "logps/rejected": -119.91023254394531, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9074515104293823, + "rewards/margins": 27.017669677734375, + "rewards/rejected": -26.110218048095703, + "step": 3170 + }, + { + "epoch": 1.45, + "learning_rate": 1.719939117199391e-07, + "logits/chosen": -2.1698784828186035, + "logits/rejected": -1.8197612762451172, + "logps/chosen": -87.33271789550781, + "logps/rejected": -120.39599609375, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6318087577819824, + "rewards/margins": 29.638574600219727, + "rewards/rejected": -27.006765365600586, + "step": 3180 + }, + { + "epoch": 1.46, + "learning_rate": 1.7148655504819885e-07, + "logits/chosen": -2.126783847808838, + "logits/rejected": -1.7983070611953735, + "logps/chosen": -83.7030029296875, + "logps/rejected": -122.6335678100586, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.919542670249939, + "rewards/margins": 28.071773529052734, + "rewards/rejected": -26.152231216430664, + "step": 3190 + }, + { + "epoch": 1.46, + "learning_rate": 1.7097919837645862e-07, + "logits/chosen": -2.227081298828125, + "logits/rejected": -1.9020026922225952, + "logps/chosen": -85.4066162109375, + "logps/rejected": -123.98991394042969, + "loss": 0.0031, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.6433346271514893, + "rewards/margins": 29.48199462890625, + "rewards/rejected": -27.838659286499023, + "step": 3200 + }, + { + "epoch": 1.46, + "eval_logits/chosen": -2.1810193061828613, + "eval_logits/rejected": -1.8324401378631592, + "eval_logps/chosen": -85.56733703613281, + "eval_logps/rejected": -119.99109649658203, + "eval_loss": 0.00612166291102767, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.631625771522522, + "eval_rewards/margins": 28.211711883544922, + "eval_rewards/rejected": -26.58008575439453, + "eval_runtime": 214.0667, + "eval_samples_per_second": 13.37, + "eval_steps_per_second": 0.836, + "step": 3200 + }, + { + "epoch": 1.47, + "learning_rate": 1.704718417047184e-07, + "logits/chosen": -2.2383410930633545, + "logits/rejected": -1.9348970651626587, + "logps/chosen": -86.20955657958984, + "logps/rejected": -122.5006103515625, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4148099422454834, + "rewards/margins": 27.235687255859375, + "rewards/rejected": -24.82087516784668, + "step": 3210 + }, + { + "epoch": 1.47, + "learning_rate": 1.6996448503297815e-07, + "logits/chosen": -2.160952091217041, + "logits/rejected": -1.7654712200164795, + "logps/chosen": -89.77068328857422, + "logps/rejected": -118.8796615600586, + "loss": 0.004, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.054098606109619, + "rewards/margins": 26.069538116455078, + "rewards/rejected": -24.01543617248535, + "step": 3220 + }, + { + "epoch": 1.47, + "learning_rate": 1.6945712836123792e-07, + "logits/chosen": -2.2201457023620605, + "logits/rejected": -1.820336937904358, + "logps/chosen": -84.93563842773438, + "logps/rejected": -115.88444519042969, + "loss": 0.0066, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6848886013031006, + "rewards/margins": 26.666845321655273, + "rewards/rejected": -23.98195457458496, + "step": 3230 + }, + { + "epoch": 1.48, + "learning_rate": 1.689497716894977e-07, + "logits/chosen": -2.1646978855133057, + "logits/rejected": -1.8357467651367188, + "logps/chosen": -80.21171569824219, + "logps/rejected": -111.47000885009766, + "loss": 0.0043, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.577789306640625, + "rewards/margins": 25.826122283935547, + "rewards/rejected": -23.248332977294922, + "step": 3240 + }, + { + "epoch": 1.48, + "learning_rate": 1.6844241501775745e-07, + "logits/chosen": -2.1942062377929688, + "logits/rejected": -1.831578254699707, + "logps/chosen": -87.41214752197266, + "logps/rejected": -118.68165588378906, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.257878065109253, + "rewards/margins": 27.593063354492188, + "rewards/rejected": -24.335186004638672, + "step": 3250 + }, + { + "epoch": 1.49, + "learning_rate": 1.6793505834601722e-07, + "logits/chosen": -2.270230293273926, + "logits/rejected": -1.930605173110962, + "logps/chosen": -77.88417053222656, + "logps/rejected": -119.29927062988281, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.550905704498291, + "rewards/margins": 28.38033103942871, + "rewards/rejected": -25.82942771911621, + "step": 3260 + }, + { + "epoch": 1.49, + "learning_rate": 1.67427701674277e-07, + "logits/chosen": -2.1990127563476562, + "logits/rejected": -1.842660665512085, + "logps/chosen": -82.07890319824219, + "logps/rejected": -117.83439636230469, + "loss": 0.0028, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.725590467453003, + "rewards/margins": 28.352636337280273, + "rewards/rejected": -25.627044677734375, + "step": 3270 + }, + { + "epoch": 1.5, + "learning_rate": 1.6692034500253675e-07, + "logits/chosen": -2.181185245513916, + "logits/rejected": -1.8166754245758057, + "logps/chosen": -82.80810546875, + "logps/rejected": -124.4463882446289, + "loss": 0.0037, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.80354642868042, + "rewards/margins": 29.090805053710938, + "rewards/rejected": -26.287261962890625, + "step": 3280 + }, + { + "epoch": 1.5, + "learning_rate": 1.6641298833079652e-07, + "logits/chosen": -2.245620012283325, + "logits/rejected": -1.8439216613769531, + "logps/chosen": -87.42176055908203, + "logps/rejected": -118.27108001708984, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.304450273513794, + "rewards/margins": 28.731517791748047, + "rewards/rejected": -25.42706871032715, + "step": 3290 + }, + { + "epoch": 1.51, + "learning_rate": 1.659056316590563e-07, + "logits/chosen": -2.1820719242095947, + "logits/rejected": -1.8486725091934204, + "logps/chosen": -83.21141815185547, + "logps/rejected": -117.5422592163086, + "loss": 0.0018, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.1327810287475586, + "rewards/margins": 28.282459259033203, + "rewards/rejected": -26.149677276611328, + "step": 3300 + }, + { + "epoch": 1.51, + "eval_logits/chosen": -2.188385486602783, + "eval_logits/rejected": -1.8376048803329468, + "eval_logps/chosen": -84.18167114257812, + "eval_logps/rejected": -117.2090072631836, + "eval_loss": 0.005902700126171112, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 2.3244550228118896, + "eval_rewards/margins": 27.51349639892578, + "eval_rewards/rejected": -25.189043045043945, + "eval_runtime": 191.0473, + "eval_samples_per_second": 14.981, + "eval_steps_per_second": 0.937, + "step": 3300 + }, + { + "epoch": 1.51, + "learning_rate": 1.6539827498731605e-07, + "logits/chosen": -2.2313265800476074, + "logits/rejected": -1.8483736515045166, + "logps/chosen": -87.50598907470703, + "logps/rejected": -118.2992935180664, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3828155994415283, + "rewards/margins": 27.8851261138916, + "rewards/rejected": -25.502309799194336, + "step": 3310 + }, + { + "epoch": 1.52, + "learning_rate": 1.6489091831557582e-07, + "logits/chosen": -2.143418312072754, + "logits/rejected": -1.8131214380264282, + "logps/chosen": -82.66302490234375, + "logps/rejected": -117.9818344116211, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9075496196746826, + "rewards/margins": 27.7532958984375, + "rewards/rejected": -24.845745086669922, + "step": 3320 + }, + { + "epoch": 1.52, + "learning_rate": 1.643835616438356e-07, + "logits/chosen": -2.185781955718994, + "logits/rejected": -1.8397849798202515, + "logps/chosen": -86.03587341308594, + "logps/rejected": -121.69071960449219, + "loss": 0.0037, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.5947184562683105, + "rewards/margins": 28.71170997619629, + "rewards/rejected": -26.116989135742188, + "step": 3330 + }, + { + "epoch": 1.52, + "learning_rate": 1.6387620497209535e-07, + "logits/chosen": -2.093822956085205, + "logits/rejected": -1.744879126548767, + "logps/chosen": -82.53218078613281, + "logps/rejected": -120.4306869506836, + "loss": 0.0069, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.362114429473877, + "rewards/margins": 28.930118560791016, + "rewards/rejected": -26.568002700805664, + "step": 3340 + }, + { + "epoch": 1.53, + "learning_rate": 1.6336884830035512e-07, + "logits/chosen": -2.222090244293213, + "logits/rejected": -1.889953851699829, + "logps/chosen": -84.85404968261719, + "logps/rejected": -122.89866638183594, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0494067668914795, + "rewards/margins": 27.54694175720215, + "rewards/rejected": -25.497535705566406, + "step": 3350 + }, + { + "epoch": 1.53, + "learning_rate": 1.6286149162861489e-07, + "logits/chosen": -2.2259693145751953, + "logits/rejected": -1.8098411560058594, + "logps/chosen": -87.61415100097656, + "logps/rejected": -129.11331176757812, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2813143730163574, + "rewards/margins": 30.5456485748291, + "rewards/rejected": -28.264331817626953, + "step": 3360 + }, + { + "epoch": 1.54, + "learning_rate": 1.6235413495687465e-07, + "logits/chosen": -2.115265369415283, + "logits/rejected": -1.7785238027572632, + "logps/chosen": -83.63951110839844, + "logps/rejected": -116.60148620605469, + "loss": 0.0029, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2996184825897217, + "rewards/margins": 27.973474502563477, + "rewards/rejected": -25.67385482788086, + "step": 3370 + }, + { + "epoch": 1.54, + "learning_rate": 1.6184677828513442e-07, + "logits/chosen": -2.213620185852051, + "logits/rejected": -1.853643774986267, + "logps/chosen": -84.76154327392578, + "logps/rejected": -120.73722839355469, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5600218772888184, + "rewards/margins": 28.535400390625, + "rewards/rejected": -25.97538185119629, + "step": 3380 + }, + { + "epoch": 1.55, + "learning_rate": 1.613394216133942e-07, + "logits/chosen": -2.2256524562835693, + "logits/rejected": -1.8734019994735718, + "logps/chosen": -82.6207046508789, + "logps/rejected": -123.33949279785156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0278208255767822, + "rewards/margins": 29.47714614868164, + "rewards/rejected": -27.449321746826172, + "step": 3390 + }, + { + "epoch": 1.55, + "learning_rate": 1.6083206494165398e-07, + "logits/chosen": -2.247122049331665, + "logits/rejected": -1.9250261783599854, + "logps/chosen": -81.63235473632812, + "logps/rejected": -120.65080261230469, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.553596258163452, + "rewards/margins": 29.37432289123535, + "rewards/rejected": -26.820724487304688, + "step": 3400 + }, + { + "epoch": 1.55, + "eval_logits/chosen": -2.1875288486480713, + "eval_logits/rejected": -1.8437479734420776, + "eval_logps/chosen": -84.4741439819336, + "eval_logps/rejected": -119.88742065429688, + "eval_loss": 0.005948640406131744, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.1782193183898926, + "eval_rewards/margins": 28.70646858215332, + "eval_rewards/rejected": -26.528249740600586, + "eval_runtime": 170.1725, + "eval_samples_per_second": 16.818, + "eval_steps_per_second": 1.052, + "step": 3400 + }, + { + "epoch": 1.56, + "learning_rate": 1.6032470826991375e-07, + "logits/chosen": -2.18739914894104, + "logits/rejected": -1.8111976385116577, + "logps/chosen": -83.60111999511719, + "logps/rejected": -120.876220703125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.492570400238037, + "rewards/margins": 29.177154541015625, + "rewards/rejected": -25.684585571289062, + "step": 3410 + }, + { + "epoch": 1.56, + "learning_rate": 1.598173515981735e-07, + "logits/chosen": -2.1494498252868652, + "logits/rejected": -1.715921401977539, + "logps/chosen": -87.95207214355469, + "logps/rejected": -117.0306396484375, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5173392295837402, + "rewards/margins": 28.325458526611328, + "rewards/rejected": -24.80811882019043, + "step": 3420 + }, + { + "epoch": 1.57, + "learning_rate": 1.5930999492643328e-07, + "logits/chosen": -2.2305150032043457, + "logits/rejected": -1.8406226634979248, + "logps/chosen": -85.95257568359375, + "logps/rejected": -121.40510559082031, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6646008491516113, + "rewards/margins": 29.42586898803711, + "rewards/rejected": -26.76127052307129, + "step": 3430 + }, + { + "epoch": 1.57, + "learning_rate": 1.5880263825469305e-07, + "logits/chosen": -2.2470791339874268, + "logits/rejected": -1.903607726097107, + "logps/chosen": -83.02677917480469, + "logps/rejected": -118.3487777709961, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.975865125656128, + "rewards/margins": 28.316776275634766, + "rewards/rejected": -25.340911865234375, + "step": 3440 + }, + { + "epoch": 1.57, + "learning_rate": 1.582952815829528e-07, + "logits/chosen": -2.106412410736084, + "logits/rejected": -1.7845014333724976, + "logps/chosen": -85.77284240722656, + "logps/rejected": -122.04698181152344, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.0580241680145264, + "rewards/margins": 28.00246238708496, + "rewards/rejected": -24.94443702697754, + "step": 3450 + }, + { + "epoch": 1.58, + "learning_rate": 1.5778792491121258e-07, + "logits/chosen": -2.230118989944458, + "logits/rejected": -1.8575313091278076, + "logps/chosen": -84.6324462890625, + "logps/rejected": -119.6858901977539, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.278738498687744, + "rewards/margins": 30.146167755126953, + "rewards/rejected": -26.867427825927734, + "step": 3460 + }, + { + "epoch": 1.58, + "learning_rate": 1.5728056823947235e-07, + "logits/chosen": -2.245896577835083, + "logits/rejected": -1.8796007633209229, + "logps/chosen": -83.982666015625, + "logps/rejected": -119.01679992675781, + "loss": 0.0045, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.464191436767578, + "rewards/margins": 29.175838470458984, + "rewards/rejected": -25.711650848388672, + "step": 3470 + }, + { + "epoch": 1.59, + "learning_rate": 1.567732115677321e-07, + "logits/chosen": -2.186904191970825, + "logits/rejected": -1.7904678583145142, + "logps/chosen": -88.25177001953125, + "logps/rejected": -118.5602798461914, + "loss": 0.0067, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 2.3008944988250732, + "rewards/margins": 27.154077529907227, + "rewards/rejected": -24.853181838989258, + "step": 3480 + }, + { + "epoch": 1.59, + "learning_rate": 1.5626585489599188e-07, + "logits/chosen": -2.129375457763672, + "logits/rejected": -1.7635730504989624, + "logps/chosen": -91.15408325195312, + "logps/rejected": -130.5845489501953, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3261685371398926, + "rewards/margins": 30.6134033203125, + "rewards/rejected": -27.287235260009766, + "step": 3490 + }, + { + "epoch": 1.6, + "learning_rate": 1.5575849822425165e-07, + "logits/chosen": -2.22548246383667, + "logits/rejected": -1.8935177326202393, + "logps/chosen": -83.39473724365234, + "logps/rejected": -121.51374816894531, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.055039882659912, + "rewards/margins": 27.6326904296875, + "rewards/rejected": -25.577648162841797, + "step": 3500 + }, + { + "epoch": 1.6, + "eval_logits/chosen": -2.186929941177368, + "eval_logits/rejected": -1.8434008359909058, + "eval_logps/chosen": -84.8189697265625, + "eval_logps/rejected": -120.2051010131836, + "eval_loss": 0.00656374916434288, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 2.0058064460754395, + "eval_rewards/margins": 28.69289779663086, + "eval_rewards/rejected": -26.687089920043945, + "eval_runtime": 188.198, + "eval_samples_per_second": 15.207, + "eval_steps_per_second": 0.951, + "step": 3500 + }, + { + "epoch": 1.6, + "learning_rate": 1.552511415525114e-07, + "logits/chosen": -2.2018935680389404, + "logits/rejected": -1.8334615230560303, + "logps/chosen": -85.780029296875, + "logps/rejected": -124.47122955322266, + "loss": 0.0048, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9497463703155518, + "rewards/margins": 29.585214614868164, + "rewards/rejected": -27.635467529296875, + "step": 3510 + }, + { + "epoch": 1.61, + "learning_rate": 1.5474378488077118e-07, + "logits/chosen": -2.2138073444366455, + "logits/rejected": -1.819265365600586, + "logps/chosen": -91.44041442871094, + "logps/rejected": -128.78485107421875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9119396209716797, + "rewards/margins": 30.014429092407227, + "rewards/rejected": -28.10248374938965, + "step": 3520 + }, + { + "epoch": 1.61, + "learning_rate": 1.5423642820903095e-07, + "logits/chosen": -2.1547751426696777, + "logits/rejected": -1.8329432010650635, + "logps/chosen": -84.93801879882812, + "logps/rejected": -124.8791275024414, + "loss": 0.0042, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.6016337871551514, + "rewards/margins": 29.17998695373535, + "rewards/rejected": -26.578350067138672, + "step": 3530 + }, + { + "epoch": 1.62, + "learning_rate": 1.537290715372907e-07, + "logits/chosen": -2.2604050636291504, + "logits/rejected": -1.8284223079681396, + "logps/chosen": -88.08625030517578, + "logps/rejected": -120.90476989746094, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.400468349456787, + "rewards/margins": 29.343048095703125, + "rewards/rejected": -26.942581176757812, + "step": 3540 + }, + { + "epoch": 1.62, + "learning_rate": 1.5322171486555048e-07, + "logits/chosen": -2.2389979362487793, + "logits/rejected": -1.8285210132598877, + "logps/chosen": -92.38504791259766, + "logps/rejected": -129.23291015625, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8334972858428955, + "rewards/margins": 31.94364356994629, + "rewards/rejected": -29.11014747619629, + "step": 3550 + }, + { + "epoch": 1.63, + "learning_rate": 1.5271435819381025e-07, + "logits/chosen": -2.2842297554016113, + "logits/rejected": -1.9232155084609985, + "logps/chosen": -88.47393035888672, + "logps/rejected": -126.19525146484375, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.37927508354187, + "rewards/margins": 30.97516441345215, + "rewards/rejected": -28.595890045166016, + "step": 3560 + }, + { + "epoch": 1.63, + "learning_rate": 1.5220700152207e-07, + "logits/chosen": -2.1726303100585938, + "logits/rejected": -1.7958831787109375, + "logps/chosen": -92.33584594726562, + "logps/rejected": -127.83787536621094, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.8039255142211914, + "rewards/margins": 29.595382690429688, + "rewards/rejected": -27.791458129882812, + "step": 3570 + }, + { + "epoch": 1.63, + "learning_rate": 1.5169964485032978e-07, + "logits/chosen": -2.295196771621704, + "logits/rejected": -1.9108684062957764, + "logps/chosen": -86.93212890625, + "logps/rejected": -136.36459350585938, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.010756015777588, + "rewards/margins": 32.493587493896484, + "rewards/rejected": -30.482830047607422, + "step": 3580 + }, + { + "epoch": 1.64, + "learning_rate": 1.5119228817858955e-07, + "logits/chosen": -2.2773728370666504, + "logits/rejected": -1.9154014587402344, + "logps/chosen": -85.64041900634766, + "logps/rejected": -127.74327087402344, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8653056621551514, + "rewards/margins": 31.216760635375977, + "rewards/rejected": -28.351455688476562, + "step": 3590 + }, + { + "epoch": 1.64, + "learning_rate": 1.506849315068493e-07, + "logits/chosen": -2.1455962657928467, + "logits/rejected": -1.849805474281311, + "logps/chosen": -87.38817596435547, + "logps/rejected": -123.53019714355469, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1171107292175293, + "rewards/margins": 29.911731719970703, + "rewards/rejected": -27.79462242126465, + "step": 3600 + }, + { + "epoch": 1.64, + "eval_logits/chosen": -2.1960811614990234, + "eval_logits/rejected": -1.8514564037322998, + "eval_logps/chosen": -85.992919921875, + "eval_logps/rejected": -124.32015228271484, + "eval_loss": 0.005746352486312389, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.4188352823257446, + "eval_rewards/margins": 30.1634521484375, + "eval_rewards/rejected": -28.744617462158203, + "eval_runtime": 179.5705, + "eval_samples_per_second": 15.938, + "eval_steps_per_second": 0.997, + "step": 3600 + }, + { + "epoch": 1.65, + "learning_rate": 1.5017757483510908e-07, + "logits/chosen": -2.1365771293640137, + "logits/rejected": -1.8313045501708984, + "logps/chosen": -85.9605712890625, + "logps/rejected": -131.0879364013672, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9845980405807495, + "rewards/margins": 30.611257553100586, + "rewards/rejected": -29.626659393310547, + "step": 3610 + }, + { + "epoch": 1.65, + "learning_rate": 1.4967021816336885e-07, + "logits/chosen": -2.197392225265503, + "logits/rejected": -1.8468068838119507, + "logps/chosen": -82.49998474121094, + "logps/rejected": -126.23731994628906, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.3484909534454346, + "rewards/margins": 31.131275177001953, + "rewards/rejected": -29.78278160095215, + "step": 3620 + }, + { + "epoch": 1.66, + "learning_rate": 1.491628614916286e-07, + "logits/chosen": -2.269951343536377, + "logits/rejected": -1.8826881647109985, + "logps/chosen": -88.26679992675781, + "logps/rejected": -132.8121795654297, + "loss": 0.0056, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.3638824224472046, + "rewards/margins": 31.313467025756836, + "rewards/rejected": -29.949581146240234, + "step": 3630 + }, + { + "epoch": 1.66, + "learning_rate": 1.4865550481988838e-07, + "logits/chosen": -2.1884894371032715, + "logits/rejected": -1.8790266513824463, + "logps/chosen": -79.186279296875, + "logps/rejected": -124.1537094116211, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4217311143875122, + "rewards/margins": 30.239089965820312, + "rewards/rejected": -28.81736183166504, + "step": 3640 + }, + { + "epoch": 1.67, + "learning_rate": 1.4814814814814815e-07, + "logits/chosen": -2.2884726524353027, + "logits/rejected": -1.9434821605682373, + "logps/chosen": -82.31842041015625, + "logps/rejected": -121.79225158691406, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6052954196929932, + "rewards/margins": 30.475265502929688, + "rewards/rejected": -28.869970321655273, + "step": 3650 + }, + { + "epoch": 1.67, + "learning_rate": 1.476407914764079e-07, + "logits/chosen": -2.2113142013549805, + "logits/rejected": -1.858170747756958, + "logps/chosen": -88.19541931152344, + "logps/rejected": -128.73983764648438, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.839066505432129, + "rewards/margins": 30.854511260986328, + "rewards/rejected": -29.015445709228516, + "step": 3660 + }, + { + "epoch": 1.68, + "learning_rate": 1.4713343480466768e-07, + "logits/chosen": -2.1949267387390137, + "logits/rejected": -1.8198333978652954, + "logps/chosen": -86.29945373535156, + "logps/rejected": -126.752197265625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4010677337646484, + "rewards/margins": 32.765567779541016, + "rewards/rejected": -29.3644962310791, + "step": 3670 + }, + { + "epoch": 1.68, + "learning_rate": 1.4662607813292745e-07, + "logits/chosen": -2.2420246601104736, + "logits/rejected": -1.8449939489364624, + "logps/chosen": -88.93192291259766, + "logps/rejected": -125.01644134521484, + "loss": 0.0099, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.367809295654297, + "rewards/margins": 31.084781646728516, + "rewards/rejected": -28.716970443725586, + "step": 3680 + }, + { + "epoch": 1.68, + "learning_rate": 1.461187214611872e-07, + "logits/chosen": -2.1443724632263184, + "logits/rejected": -1.7969995737075806, + "logps/chosen": -88.4251937866211, + "logps/rejected": -124.62715911865234, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7502880096435547, + "rewards/margins": 30.875295639038086, + "rewards/rejected": -28.125009536743164, + "step": 3690 + }, + { + "epoch": 1.69, + "learning_rate": 1.4561136478944698e-07, + "logits/chosen": -2.1906471252441406, + "logits/rejected": -1.8091493844985962, + "logps/chosen": -85.97318267822266, + "logps/rejected": -130.11227416992188, + "loss": 0.003, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.8357086181640625, + "rewards/margins": 32.3084602355957, + "rewards/rejected": -28.47275161743164, + "step": 3700 + }, + { + "epoch": 1.69, + "eval_logits/chosen": -2.1971521377563477, + "eval_logits/rejected": -1.848021149635315, + "eval_logps/chosen": -84.61711120605469, + "eval_logps/rejected": -123.59754943847656, + "eval_loss": 0.005367867648601532, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.106738567352295, + "eval_rewards/margins": 30.49005699157715, + "eval_rewards/rejected": -28.38331413269043, + "eval_runtime": 176.7324, + "eval_samples_per_second": 16.194, + "eval_steps_per_second": 1.013, + "step": 3700 + }, + { + "epoch": 1.69, + "learning_rate": 1.4510400811770675e-07, + "logits/chosen": -2.226139545440674, + "logits/rejected": -1.836024522781372, + "logps/chosen": -91.7187271118164, + "logps/rejected": -129.92689514160156, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7738643884658813, + "rewards/margins": 30.75638198852539, + "rewards/rejected": -28.982519149780273, + "step": 3710 + }, + { + "epoch": 1.7, + "learning_rate": 1.445966514459665e-07, + "logits/chosen": -2.2146477699279785, + "logits/rejected": -1.8724933862686157, + "logps/chosen": -82.95762634277344, + "logps/rejected": -124.72331237792969, + "loss": 0.005, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.609710216522217, + "rewards/margins": 30.03472328186035, + "rewards/rejected": -27.425012588500977, + "step": 3720 + }, + { + "epoch": 1.7, + "learning_rate": 1.4408929477422628e-07, + "logits/chosen": -2.1765360832214355, + "logits/rejected": -1.8613145351409912, + "logps/chosen": -88.75824737548828, + "logps/rejected": -126.1810531616211, + "loss": 0.0056, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.3347582817077637, + "rewards/margins": 30.444360733032227, + "rewards/rejected": -28.109600067138672, + "step": 3730 + }, + { + "epoch": 1.71, + "learning_rate": 1.4358193810248604e-07, + "logits/chosen": -2.203244686126709, + "logits/rejected": -1.8518617153167725, + "logps/chosen": -83.48390197753906, + "logps/rejected": -124.0763168334961, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4211070537567139, + "rewards/margins": 29.72439956665039, + "rewards/rejected": -28.303295135498047, + "step": 3740 + }, + { + "epoch": 1.71, + "learning_rate": 1.430745814307458e-07, + "logits/chosen": -2.231482982635498, + "logits/rejected": -1.8217432498931885, + "logps/chosen": -89.63983917236328, + "logps/rejected": -125.65821838378906, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.655423641204834, + "rewards/margins": 29.783100128173828, + "rewards/rejected": -27.127676010131836, + "step": 3750 + }, + { + "epoch": 1.72, + "learning_rate": 1.4256722475900558e-07, + "logits/chosen": -2.152547597885132, + "logits/rejected": -1.8171924352645874, + "logps/chosen": -81.50759887695312, + "logps/rejected": -123.8464126586914, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3097615242004395, + "rewards/margins": 29.635555267333984, + "rewards/rejected": -28.325796127319336, + "step": 3760 + }, + { + "epoch": 1.72, + "learning_rate": 1.4205986808726534e-07, + "logits/chosen": -2.249340534210205, + "logits/rejected": -1.9595706462860107, + "logps/chosen": -85.8914794921875, + "logps/rejected": -138.0444793701172, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.460918664932251, + "rewards/margins": 32.2510871887207, + "rewards/rejected": -29.790172576904297, + "step": 3770 + }, + { + "epoch": 1.73, + "learning_rate": 1.415525114155251e-07, + "logits/chosen": -2.1925182342529297, + "logits/rejected": -1.7810020446777344, + "logps/chosen": -93.9482421875, + "logps/rejected": -130.27859497070312, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.997776746749878, + "rewards/margins": 31.849191665649414, + "rewards/rejected": -29.851415634155273, + "step": 3780 + }, + { + "epoch": 1.73, + "learning_rate": 1.4104515474378488e-07, + "logits/chosen": -2.134547472000122, + "logits/rejected": -1.8135312795639038, + "logps/chosen": -84.8584976196289, + "logps/rejected": -123.8740005493164, + "loss": 0.009, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.4246654510498047, + "rewards/margins": 29.43796730041504, + "rewards/rejected": -28.013301849365234, + "step": 3790 + }, + { + "epoch": 1.73, + "learning_rate": 1.4053779807204464e-07, + "logits/chosen": -2.267373561859131, + "logits/rejected": -1.8483003377914429, + "logps/chosen": -87.11589050292969, + "logps/rejected": -115.36067199707031, + "loss": 0.006, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.7466697692871094, + "rewards/margins": 28.394153594970703, + "rewards/rejected": -24.647480010986328, + "step": 3800 + }, + { + "epoch": 1.73, + "eval_logits/chosen": -2.191563367843628, + "eval_logits/rejected": -1.8433810472488403, + "eval_logps/chosen": -83.24746704101562, + "eval_logps/rejected": -116.2911148071289, + "eval_loss": 0.005436885170638561, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.791560173034668, + "eval_rewards/margins": 27.521656036376953, + "eval_rewards/rejected": -24.73009490966797, + "eval_runtime": 221.0913, + "eval_samples_per_second": 12.945, + "eval_steps_per_second": 0.81, + "step": 3800 + }, + { + "epoch": 1.74, + "learning_rate": 1.400304414003044e-07, + "logits/chosen": -2.2577452659606934, + "logits/rejected": -1.8542404174804688, + "logps/chosen": -81.37186431884766, + "logps/rejected": -114.3775405883789, + "loss": 0.0011, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.739640951156616, + "rewards/margins": 26.94769859313965, + "rewards/rejected": -24.208059310913086, + "step": 3810 + }, + { + "epoch": 1.74, + "learning_rate": 1.3952308472856418e-07, + "logits/chosen": -2.1801652908325195, + "logits/rejected": -1.8121554851531982, + "logps/chosen": -87.84750366210938, + "logps/rejected": -119.56182861328125, + "loss": 0.0007, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7155660390853882, + "rewards/margins": 28.609859466552734, + "rewards/rejected": -26.894290924072266, + "step": 3820 + }, + { + "epoch": 1.75, + "learning_rate": 1.3901572805682394e-07, + "logits/chosen": -2.166355609893799, + "logits/rejected": -1.7864339351654053, + "logps/chosen": -87.84122467041016, + "logps/rejected": -126.62345886230469, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1120693683624268, + "rewards/margins": 30.040283203125, + "rewards/rejected": -26.928213119506836, + "step": 3830 + }, + { + "epoch": 1.75, + "learning_rate": 1.385083713850837e-07, + "logits/chosen": -2.1811468601226807, + "logits/rejected": -1.7890942096710205, + "logps/chosen": -86.22557830810547, + "logps/rejected": -121.93861389160156, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1839969158172607, + "rewards/margins": 29.871551513671875, + "rewards/rejected": -26.687557220458984, + "step": 3840 + }, + { + "epoch": 1.76, + "learning_rate": 1.3800101471334348e-07, + "logits/chosen": -2.186249256134033, + "logits/rejected": -1.8273032903671265, + "logps/chosen": -86.01737213134766, + "logps/rejected": -119.9991226196289, + "loss": 0.0055, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.2794878482818604, + "rewards/margins": 28.3900203704834, + "rewards/rejected": -27.11053466796875, + "step": 3850 + }, + { + "epoch": 1.76, + "learning_rate": 1.3749365804160324e-07, + "logits/chosen": -2.122135639190674, + "logits/rejected": -1.7545080184936523, + "logps/chosen": -86.4559555053711, + "logps/rejected": -118.87664794921875, + "loss": 0.0033, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.654794454574585, + "rewards/margins": 27.156728744506836, + "rewards/rejected": -25.501934051513672, + "step": 3860 + }, + { + "epoch": 1.77, + "learning_rate": 1.36986301369863e-07, + "logits/chosen": -2.1122047901153564, + "logits/rejected": -1.7238849401474, + "logps/chosen": -84.66944122314453, + "logps/rejected": -121.77825927734375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.5499777793884277, + "rewards/margins": 31.206939697265625, + "rewards/rejected": -27.656963348388672, + "step": 3870 + }, + { + "epoch": 1.77, + "learning_rate": 1.3647894469812278e-07, + "logits/chosen": -2.1466057300567627, + "logits/rejected": -1.7561490535736084, + "logps/chosen": -84.68426513671875, + "logps/rejected": -123.66081237792969, + "loss": 0.0014, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.3084232807159424, + "rewards/margins": 31.223583221435547, + "rewards/rejected": -27.915157318115234, + "step": 3880 + }, + { + "epoch": 1.78, + "learning_rate": 1.3597158802638254e-07, + "logits/chosen": -2.1600029468536377, + "logits/rejected": -1.7710784673690796, + "logps/chosen": -87.58964538574219, + "logps/rejected": -127.94322204589844, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7342724800109863, + "rewards/margins": 30.390094757080078, + "rewards/rejected": -27.65582275390625, + "step": 3890 + }, + { + "epoch": 1.78, + "learning_rate": 1.354642313546423e-07, + "logits/chosen": -2.1588668823242188, + "logits/rejected": -1.8171707391738892, + "logps/chosen": -82.0718002319336, + "logps/rejected": -117.87044525146484, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.580735445022583, + "rewards/margins": 28.978771209716797, + "rewards/rejected": -26.398035049438477, + "step": 3900 + }, + { + "epoch": 1.78, + "eval_logits/chosen": -2.1919665336608887, + "eval_logits/rejected": -1.8445065021514893, + "eval_logps/chosen": -84.54802703857422, + "eval_logps/rejected": -120.56312561035156, + "eval_loss": 0.005169562995433807, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 2.1412765979766846, + "eval_rewards/margins": 29.007375717163086, + "eval_rewards/rejected": -26.866098403930664, + "eval_runtime": 203.0484, + "eval_samples_per_second": 14.095, + "eval_steps_per_second": 0.882, + "step": 3900 + }, + { + "epoch": 1.78, + "learning_rate": 1.3495687468290208e-07, + "logits/chosen": -2.2244656085968018, + "logits/rejected": -1.810752272605896, + "logps/chosen": -89.08476257324219, + "logps/rejected": -123.11407470703125, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.44339919090271, + "rewards/margins": 30.2976016998291, + "rewards/rejected": -27.854202270507812, + "step": 3910 + }, + { + "epoch": 1.79, + "learning_rate": 1.3444951801116184e-07, + "logits/chosen": -2.219104290008545, + "logits/rejected": -1.8220125436782837, + "logps/chosen": -89.28169250488281, + "logps/rejected": -126.15754699707031, + "loss": 0.0039, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9085346460342407, + "rewards/margins": 30.235088348388672, + "rewards/rejected": -28.326553344726562, + "step": 3920 + }, + { + "epoch": 1.79, + "learning_rate": 1.339421613394216e-07, + "logits/chosen": -2.276082754135132, + "logits/rejected": -1.8841686248779297, + "logps/chosen": -89.07032775878906, + "logps/rejected": -126.38359069824219, + "loss": 0.0043, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.427870988845825, + "rewards/margins": 30.165613174438477, + "rewards/rejected": -27.737743377685547, + "step": 3930 + }, + { + "epoch": 1.8, + "learning_rate": 1.3343480466768138e-07, + "logits/chosen": -2.227997064590454, + "logits/rejected": -1.8480758666992188, + "logps/chosen": -84.52095794677734, + "logps/rejected": -119.73951721191406, + "loss": 0.0066, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.0445349216461182, + "rewards/margins": 28.596317291259766, + "rewards/rejected": -27.551782608032227, + "step": 3940 + }, + { + "epoch": 1.8, + "learning_rate": 1.3292744799594114e-07, + "logits/chosen": -2.182831287384033, + "logits/rejected": -1.7697114944458008, + "logps/chosen": -88.89148712158203, + "logps/rejected": -125.78935241699219, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.283107042312622, + "rewards/margins": 31.58938217163086, + "rewards/rejected": -28.306278228759766, + "step": 3950 + }, + { + "epoch": 1.81, + "learning_rate": 1.324200913242009e-07, + "logits/chosen": -2.156838893890381, + "logits/rejected": -1.7892711162567139, + "logps/chosen": -82.74298095703125, + "logps/rejected": -127.2794189453125, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.570988416671753, + "rewards/margins": 32.732200622558594, + "rewards/rejected": -29.161212921142578, + "step": 3960 + }, + { + "epoch": 1.81, + "learning_rate": 1.3191273465246068e-07, + "logits/chosen": -2.2453393936157227, + "logits/rejected": -1.8167974948883057, + "logps/chosen": -89.5159912109375, + "logps/rejected": -128.8140869140625, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.0265374183654785, + "rewards/margins": 31.421884536743164, + "rewards/rejected": -28.395349502563477, + "step": 3970 + }, + { + "epoch": 1.82, + "learning_rate": 1.3140537798072044e-07, + "logits/chosen": -2.1521337032318115, + "logits/rejected": -1.7532793283462524, + "logps/chosen": -85.2146987915039, + "logps/rejected": -111.48863220214844, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.89916729927063, + "rewards/margins": 27.08025550842285, + "rewards/rejected": -23.181087493896484, + "step": 3980 + }, + { + "epoch": 1.82, + "learning_rate": 1.308980213089802e-07, + "logits/chosen": -2.2195143699645996, + "logits/rejected": -1.8655322790145874, + "logps/chosen": -83.930419921875, + "logps/rejected": -112.97065734863281, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.9724647998809814, + "rewards/margins": 25.749919891357422, + "rewards/rejected": -21.777454376220703, + "step": 3990 + }, + { + "epoch": 1.83, + "learning_rate": 1.3039066463723998e-07, + "logits/chosen": -2.243203639984131, + "logits/rejected": -1.7824580669403076, + "logps/chosen": -85.41126251220703, + "logps/rejected": -116.12227630615234, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.609849691390991, + "rewards/margins": 27.11124038696289, + "rewards/rejected": -24.50139045715332, + "step": 4000 + }, + { + "epoch": 1.83, + "eval_logits/chosen": -2.207897663116455, + "eval_logits/rejected": -1.8570655584335327, + "eval_logps/chosen": -83.74642181396484, + "eval_logps/rejected": -115.08486938476562, + "eval_loss": 0.0051609063521027565, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 2.5420799255371094, + "eval_rewards/margins": 26.66905403137207, + "eval_rewards/rejected": -24.126972198486328, + "eval_runtime": 213.5807, + "eval_samples_per_second": 13.4, + "eval_steps_per_second": 0.838, + "step": 4000 + }, + { + "epoch": 1.83, + "learning_rate": 1.2988330796549974e-07, + "logits/chosen": -2.2335009574890137, + "logits/rejected": -1.8103595972061157, + "logps/chosen": -87.22106170654297, + "logps/rejected": -117.1398696899414, + "loss": 0.0065, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": 2.971987247467041, + "rewards/margins": 27.090845108032227, + "rewards/rejected": -24.118860244750977, + "step": 4010 + }, + { + "epoch": 1.83, + "learning_rate": 1.293759512937595e-07, + "logits/chosen": -2.2131710052490234, + "logits/rejected": -1.8810796737670898, + "logps/chosen": -89.93437957763672, + "logps/rejected": -119.86589050292969, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9654256701469421, + "rewards/margins": 24.501127243041992, + "rewards/rejected": -23.535701751708984, + "step": 4020 + }, + { + "epoch": 1.84, + "learning_rate": 1.2886859462201928e-07, + "logits/chosen": -2.2927210330963135, + "logits/rejected": -1.9104375839233398, + "logps/chosen": -88.72476196289062, + "logps/rejected": -117.77976989746094, + "loss": 0.0041, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.68806791305542, + "rewards/margins": 27.564483642578125, + "rewards/rejected": -24.876415252685547, + "step": 4030 + }, + { + "epoch": 1.84, + "learning_rate": 1.2836123795027904e-07, + "logits/chosen": -2.168994903564453, + "logits/rejected": -1.7794716358184814, + "logps/chosen": -81.93544006347656, + "logps/rejected": -116.9870376586914, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4068737030029297, + "rewards/margins": 27.525577545166016, + "rewards/rejected": -25.118701934814453, + "step": 4040 + }, + { + "epoch": 1.85, + "learning_rate": 1.278538812785388e-07, + "logits/chosen": -2.18416690826416, + "logits/rejected": -1.7201156616210938, + "logps/chosen": -91.39389038085938, + "logps/rejected": -118.92759704589844, + "loss": 0.0063, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.148336887359619, + "rewards/margins": 27.875076293945312, + "rewards/rejected": -24.72673988342285, + "step": 4050 + }, + { + "epoch": 1.85, + "learning_rate": 1.2734652460679858e-07, + "logits/chosen": -2.2627367973327637, + "logits/rejected": -1.9123704433441162, + "logps/chosen": -88.3521728515625, + "logps/rejected": -117.85545349121094, + "loss": 0.0027, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9983296394348145, + "rewards/margins": 25.974822998046875, + "rewards/rejected": -22.976491928100586, + "step": 4060 + }, + { + "epoch": 1.86, + "learning_rate": 1.2683916793505834e-07, + "logits/chosen": -2.1818766593933105, + "logits/rejected": -1.8303353786468506, + "logps/chosen": -82.59492492675781, + "logps/rejected": -120.16998291015625, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4224205017089844, + "rewards/margins": 27.243423461914062, + "rewards/rejected": -23.821001052856445, + "step": 4070 + }, + { + "epoch": 1.86, + "learning_rate": 1.263318112633181e-07, + "logits/chosen": -2.2209744453430176, + "logits/rejected": -1.8506110906600952, + "logps/chosen": -87.74772644042969, + "logps/rejected": -112.5324478149414, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.3162944316864014, + "rewards/margins": 25.54704475402832, + "rewards/rejected": -22.230749130249023, + "step": 4080 + }, + { + "epoch": 1.87, + "learning_rate": 1.2582445459157788e-07, + "logits/chosen": -2.203535318374634, + "logits/rejected": -1.8149940967559814, + "logps/chosen": -81.7652587890625, + "logps/rejected": -113.93977355957031, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.2748992443084717, + "rewards/margins": 26.76283836364746, + "rewards/rejected": -23.48794174194336, + "step": 4090 + }, + { + "epoch": 1.87, + "learning_rate": 1.2531709791983764e-07, + "logits/chosen": -2.271902561187744, + "logits/rejected": -1.8522933721542358, + "logps/chosen": -89.37789916992188, + "logps/rejected": -117.07502746582031, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4491279125213623, + "rewards/margins": 26.383209228515625, + "rewards/rejected": -22.934078216552734, + "step": 4100 + }, + { + "epoch": 1.87, + "eval_logits/chosen": -2.217360019683838, + "eval_logits/rejected": -1.8673908710479736, + "eval_logps/chosen": -83.58930969238281, + "eval_logps/rejected": -114.42141723632812, + "eval_loss": 0.005190215539187193, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.6206393241882324, + "eval_rewards/margins": 26.415889739990234, + "eval_rewards/rejected": -23.795251846313477, + "eval_runtime": 206.2078, + "eval_samples_per_second": 13.879, + "eval_steps_per_second": 0.868, + "step": 4100 + }, + { + "epoch": 1.88, + "learning_rate": 1.248097412480974e-07, + "logits/chosen": -2.3479442596435547, + "logits/rejected": -1.9204511642456055, + "logps/chosen": -88.93357849121094, + "logps/rejected": -119.96925354003906, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7320475578308105, + "rewards/margins": 28.458393096923828, + "rewards/rejected": -24.72634506225586, + "step": 4110 + }, + { + "epoch": 1.88, + "learning_rate": 1.2430238457635718e-07, + "logits/chosen": -2.1347765922546387, + "logits/rejected": -1.743198037147522, + "logps/chosen": -87.92202758789062, + "logps/rejected": -118.41119384765625, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6807589530944824, + "rewards/margins": 27.0130558013916, + "rewards/rejected": -24.33229637145996, + "step": 4120 + }, + { + "epoch": 1.89, + "learning_rate": 1.2379502790461694e-07, + "logits/chosen": -2.176631450653076, + "logits/rejected": -1.8327052593231201, + "logps/chosen": -84.86781311035156, + "logps/rejected": -117.3744888305664, + "loss": 0.0054, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.2650296688079834, + "rewards/margins": 25.868539810180664, + "rewards/rejected": -24.603511810302734, + "step": 4130 + }, + { + "epoch": 1.89, + "learning_rate": 1.232876712328767e-07, + "logits/chosen": -2.158825635910034, + "logits/rejected": -1.7359037399291992, + "logps/chosen": -88.40345001220703, + "logps/rejected": -119.45426177978516, + "loss": 0.0016, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4086227416992188, + "rewards/margins": 27.16936683654785, + "rewards/rejected": -24.760744094848633, + "step": 4140 + }, + { + "epoch": 1.89, + "learning_rate": 1.2278031456113648e-07, + "logits/chosen": -2.2729620933532715, + "logits/rejected": -1.8309637308120728, + "logps/chosen": -91.2960205078125, + "logps/rejected": -117.98994445800781, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.7180824279785156, + "rewards/margins": 27.0567569732666, + "rewards/rejected": -23.33867645263672, + "step": 4150 + }, + { + "epoch": 1.9, + "learning_rate": 1.2227295788939624e-07, + "logits/chosen": -2.2085700035095215, + "logits/rejected": -1.8833599090576172, + "logps/chosen": -86.73484802246094, + "logps/rejected": -124.00797271728516, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6060290336608887, + "rewards/margins": 27.345844268798828, + "rewards/rejected": -24.739816665649414, + "step": 4160 + }, + { + "epoch": 1.9, + "learning_rate": 1.21765601217656e-07, + "logits/chosen": -2.231919050216675, + "logits/rejected": -1.8927338123321533, + "logps/chosen": -82.05496978759766, + "logps/rejected": -125.29072570800781, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5094146728515625, + "rewards/margins": 27.967309951782227, + "rewards/rejected": -25.457895278930664, + "step": 4170 + }, + { + "epoch": 1.91, + "learning_rate": 1.2125824454591578e-07, + "logits/chosen": -2.252375364303589, + "logits/rejected": -1.7970874309539795, + "logps/chosen": -92.34709167480469, + "logps/rejected": -115.58805084228516, + "loss": 0.0047, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.9096739292144775, + "rewards/margins": 27.263355255126953, + "rewards/rejected": -24.353681564331055, + "step": 4180 + }, + { + "epoch": 1.91, + "learning_rate": 1.2075088787417554e-07, + "logits/chosen": -2.1913251876831055, + "logits/rejected": -1.8256721496582031, + "logps/chosen": -83.70695495605469, + "logps/rejected": -117.19889068603516, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.558323621749878, + "rewards/margins": 27.682703018188477, + "rewards/rejected": -24.124378204345703, + "step": 4190 + }, + { + "epoch": 1.92, + "learning_rate": 1.202435312024353e-07, + "logits/chosen": -2.2380259037017822, + "logits/rejected": -1.9088646173477173, + "logps/chosen": -82.8735580444336, + "logps/rejected": -115.74534606933594, + "loss": 0.0026, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.5881214141845703, + "rewards/margins": 26.207622528076172, + "rewards/rejected": -23.6195011138916, + "step": 4200 + }, + { + "epoch": 1.92, + "eval_logits/chosen": -2.2144737243652344, + "eval_logits/rejected": -1.8624593019485474, + "eval_logps/chosen": -83.490234375, + "eval_logps/rejected": -111.31690979003906, + "eval_loss": 0.00542183592915535, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.670175075531006, + "eval_rewards/margins": 24.913169860839844, + "eval_rewards/rejected": -22.242996215820312, + "eval_runtime": 204.9075, + "eval_samples_per_second": 13.967, + "eval_steps_per_second": 0.874, + "step": 4200 + }, + { + "epoch": 1.92, + "learning_rate": 1.1973617453069508e-07, + "logits/chosen": -2.214597463607788, + "logits/rejected": -1.8877366781234741, + "logps/chosen": -81.5813980102539, + "logps/rejected": -110.80888366699219, + "loss": 0.0036, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.2836194038391113, + "rewards/margins": 24.602767944335938, + "rewards/rejected": -22.31914710998535, + "step": 4210 + }, + { + "epoch": 1.93, + "learning_rate": 1.1922881785895484e-07, + "logits/chosen": -2.2340848445892334, + "logits/rejected": -1.8911195993423462, + "logps/chosen": -88.96124267578125, + "logps/rejected": -121.70379638671875, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.17574405670166, + "rewards/margins": 26.648061752319336, + "rewards/rejected": -23.47231674194336, + "step": 4220 + }, + { + "epoch": 1.93, + "learning_rate": 1.187214611872146e-07, + "logits/chosen": -2.306396007537842, + "logits/rejected": -1.935389757156372, + "logps/chosen": -90.6281509399414, + "logps/rejected": -122.7242660522461, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.553041934967041, + "rewards/margins": 26.54819107055664, + "rewards/rejected": -23.995147705078125, + "step": 4230 + }, + { + "epoch": 1.94, + "learning_rate": 1.1821410451547436e-07, + "logits/chosen": -2.159374713897705, + "logits/rejected": -1.8598381280899048, + "logps/chosen": -81.08094024658203, + "logps/rejected": -112.6502914428711, + "loss": 0.0026, + "rewards/accuracies": 1.0, + "rewards/chosen": 4.01515531539917, + "rewards/margins": 25.243701934814453, + "rewards/rejected": -21.22854232788086, + "step": 4240 + }, + { + "epoch": 1.94, + "learning_rate": 1.1770674784373413e-07, + "logits/chosen": -2.15578031539917, + "logits/rejected": -1.851265549659729, + "logps/chosen": -78.29918670654297, + "logps/rejected": -111.49513244628906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1447842121124268, + "rewards/margins": 23.960596084594727, + "rewards/rejected": -21.815811157226562, + "step": 4250 + }, + { + "epoch": 1.94, + "learning_rate": 1.171993911719939e-07, + "logits/chosen": -2.2845335006713867, + "logits/rejected": -1.9803342819213867, + "logps/chosen": -84.60355377197266, + "logps/rejected": -118.61265563964844, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0940170288085938, + "rewards/margins": 26.21561050415039, + "rewards/rejected": -24.121593475341797, + "step": 4260 + }, + { + "epoch": 1.95, + "learning_rate": 1.1669203450025366e-07, + "logits/chosen": -2.1469063758850098, + "logits/rejected": -1.885765790939331, + "logps/chosen": -78.9441909790039, + "logps/rejected": -114.021240234375, + "loss": 0.0073, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.2761025428771973, + "rewards/margins": 24.522062301635742, + "rewards/rejected": -22.24595832824707, + "step": 4270 + }, + { + "epoch": 1.95, + "learning_rate": 1.1618467782851343e-07, + "logits/chosen": -2.2316300868988037, + "logits/rejected": -1.8505769968032837, + "logps/chosen": -82.11341857910156, + "logps/rejected": -119.51060485839844, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4899394512176514, + "rewards/margins": 26.23581314086914, + "rewards/rejected": -23.74587059020996, + "step": 4280 + }, + { + "epoch": 1.96, + "learning_rate": 1.156773211567732e-07, + "logits/chosen": -2.1731178760528564, + "logits/rejected": -1.7443568706512451, + "logps/chosen": -84.8414306640625, + "logps/rejected": -119.10728454589844, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4432387351989746, + "rewards/margins": 28.680456161499023, + "rewards/rejected": -25.237218856811523, + "step": 4290 + }, + { + "epoch": 1.96, + "learning_rate": 1.1516996448503296e-07, + "logits/chosen": -2.245872974395752, + "logits/rejected": -1.8761298656463623, + "logps/chosen": -83.03449249267578, + "logps/rejected": -122.46466064453125, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.476714611053467, + "rewards/margins": 27.374313354492188, + "rewards/rejected": -24.897600173950195, + "step": 4300 + }, + { + "epoch": 1.96, + "eval_logits/chosen": -2.2145586013793945, + "eval_logits/rejected": -1.862236499786377, + "eval_logps/chosen": -83.82196807861328, + "eval_logps/rejected": -114.24178314208984, + "eval_loss": 0.005356738809496164, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 2.504307270050049, + "eval_rewards/margins": 26.20973777770996, + "eval_rewards/rejected": -23.705427169799805, + "eval_runtime": 231.7023, + "eval_samples_per_second": 12.352, + "eval_steps_per_second": 0.773, + "step": 4300 + }, + { + "epoch": 1.97, + "learning_rate": 1.1466260781329273e-07, + "logits/chosen": -2.2044477462768555, + "logits/rejected": -1.769928216934204, + "logps/chosen": -87.90011596679688, + "logps/rejected": -117.90144348144531, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1481876373291016, + "rewards/margins": 27.33310317993164, + "rewards/rejected": -24.184917449951172, + "step": 4310 + }, + { + "epoch": 1.97, + "learning_rate": 1.141552511415525e-07, + "logits/chosen": -2.2101001739501953, + "logits/rejected": -1.8445708751678467, + "logps/chosen": -90.51927185058594, + "logps/rejected": -117.70124816894531, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.421943426132202, + "rewards/margins": 27.004901885986328, + "rewards/rejected": -24.582958221435547, + "step": 4320 + }, + { + "epoch": 1.98, + "learning_rate": 1.1364789446981226e-07, + "logits/chosen": -2.20845365524292, + "logits/rejected": -1.8483657836914062, + "logps/chosen": -90.68830871582031, + "logps/rejected": -119.9162368774414, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.908290386199951, + "rewards/margins": 27.960651397705078, + "rewards/rejected": -25.052364349365234, + "step": 4330 + }, + { + "epoch": 1.98, + "learning_rate": 1.1314053779807203e-07, + "logits/chosen": -2.2378525733947754, + "logits/rejected": -1.9095252752304077, + "logps/chosen": -84.37041473388672, + "logps/rejected": -117.41896057128906, + "loss": 0.0036, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9735949039459229, + "rewards/margins": 26.64251136779785, + "rewards/rejected": -24.668912887573242, + "step": 4340 + }, + { + "epoch": 1.99, + "learning_rate": 1.126331811263318e-07, + "logits/chosen": -2.2925407886505127, + "logits/rejected": -1.940437912940979, + "logps/chosen": -83.49569702148438, + "logps/rejected": -115.74214172363281, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6832937002182007, + "rewards/margins": 25.954483032226562, + "rewards/rejected": -24.271190643310547, + "step": 4350 + }, + { + "epoch": 1.99, + "learning_rate": 1.1212582445459156e-07, + "logits/chosen": -2.1880042552948, + "logits/rejected": -1.7285563945770264, + "logps/chosen": -90.88077545166016, + "logps/rejected": -118.75814056396484, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5746841430664062, + "rewards/margins": 28.058208465576172, + "rewards/rejected": -25.483524322509766, + "step": 4360 + }, + { + "epoch": 1.99, + "learning_rate": 1.1161846778285133e-07, + "logits/chosen": -2.1667580604553223, + "logits/rejected": -1.878365159034729, + "logps/chosen": -78.63961029052734, + "logps/rejected": -118.59150695800781, + "loss": 0.0068, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0068283081054688, + "rewards/margins": 25.848861694335938, + "rewards/rejected": -23.8420352935791, + "step": 4370 + }, + { + "epoch": 2.0, + "learning_rate": 1.111111111111111e-07, + "logits/chosen": -2.160161018371582, + "logits/rejected": -1.7247244119644165, + "logps/chosen": -90.16160583496094, + "logps/rejected": -113.54561614990234, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1644601821899414, + "rewards/margins": 26.249164581298828, + "rewards/rejected": -23.084701538085938, + "step": 4380 + }, + { + "epoch": 2.0, + "learning_rate": 1.1060375443937086e-07, + "logits/chosen": -2.183781862258911, + "logits/rejected": -1.8434038162231445, + "logps/chosen": -84.59786224365234, + "logps/rejected": -118.7528076171875, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.4381895065307617, + "rewards/margins": 26.610469818115234, + "rewards/rejected": -24.17228126525879, + "step": 4390 + }, + { + "epoch": 2.01, + "learning_rate": 1.1009639776763063e-07, + "logits/chosen": -2.2394397258758545, + "logits/rejected": -1.8388830423355103, + "logps/chosen": -87.1556396484375, + "logps/rejected": -115.66267395019531, + "loss": 0.0024, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.9926955699920654, + "rewards/margins": 26.94419288635254, + "rewards/rejected": -23.951494216918945, + "step": 4400 + }, + { + "epoch": 2.01, + "eval_logits/chosen": -2.2128782272338867, + "eval_logits/rejected": -1.8610923290252686, + "eval_logps/chosen": -84.40758514404297, + "eval_logps/rejected": -115.89351654052734, + "eval_loss": 0.005497102625668049, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.211501359939575, + "eval_rewards/margins": 26.7427921295166, + "eval_rewards/rejected": -24.531293869018555, + "eval_runtime": 181.6374, + "eval_samples_per_second": 15.757, + "eval_steps_per_second": 0.985, + "step": 4400 + }, + { + "epoch": 2.01, + "learning_rate": 1.095890410958904e-07, + "logits/chosen": -2.1844065189361572, + "logits/rejected": -1.7922824621200562, + "logps/chosen": -87.32149505615234, + "logps/rejected": -124.73358154296875, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.739438533782959, + "rewards/margins": 29.961498260498047, + "rewards/rejected": -26.222061157226562, + "step": 4410 + }, + { + "epoch": 2.02, + "learning_rate": 1.0908168442415016e-07, + "logits/chosen": -2.1687610149383545, + "logits/rejected": -1.8179349899291992, + "logps/chosen": -82.60697937011719, + "logps/rejected": -117.3493881225586, + "loss": 0.0076, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0033984184265137, + "rewards/margins": 26.623882293701172, + "rewards/rejected": -24.620487213134766, + "step": 4420 + }, + { + "epoch": 2.02, + "learning_rate": 1.0857432775240993e-07, + "logits/chosen": -2.157742977142334, + "logits/rejected": -1.8218927383422852, + "logps/chosen": -81.67804718017578, + "logps/rejected": -123.06440734863281, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.68721342086792, + "rewards/margins": 28.739349365234375, + "rewards/rejected": -26.052135467529297, + "step": 4430 + }, + { + "epoch": 2.03, + "learning_rate": 1.080669710806697e-07, + "logits/chosen": -2.275503396987915, + "logits/rejected": -1.8494739532470703, + "logps/chosen": -83.60503387451172, + "logps/rejected": -121.38664245605469, + "loss": 0.0025, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1225333213806152, + "rewards/margins": 28.552631378173828, + "rewards/rejected": -25.430099487304688, + "step": 4440 + }, + { + "epoch": 2.03, + "learning_rate": 1.0755961440892946e-07, + "logits/chosen": -2.269984722137451, + "logits/rejected": -1.902269721031189, + "logps/chosen": -83.16432189941406, + "logps/rejected": -120.77657318115234, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8239471912384033, + "rewards/margins": 28.915185928344727, + "rewards/rejected": -26.091238021850586, + "step": 4450 + }, + { + "epoch": 2.04, + "learning_rate": 1.0705225773718923e-07, + "logits/chosen": -2.2269845008850098, + "logits/rejected": -1.8621666431427002, + "logps/chosen": -85.25446319580078, + "logps/rejected": -115.29423522949219, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4882960319519043, + "rewards/margins": 26.619338989257812, + "rewards/rejected": -24.13104248046875, + "step": 4460 + }, + { + "epoch": 2.04, + "learning_rate": 1.06544901065449e-07, + "logits/chosen": -2.236506700515747, + "logits/rejected": -1.9493324756622314, + "logps/chosen": -87.72699737548828, + "logps/rejected": -120.0291748046875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.765307903289795, + "rewards/margins": 26.442195892333984, + "rewards/rejected": -23.676889419555664, + "step": 4470 + }, + { + "epoch": 2.04, + "learning_rate": 1.0603754439370876e-07, + "logits/chosen": -2.22512149810791, + "logits/rejected": -1.8360923528671265, + "logps/chosen": -85.01690673828125, + "logps/rejected": -123.11708068847656, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9220136404037476, + "rewards/margins": 28.249126434326172, + "rewards/rejected": -26.327117919921875, + "step": 4480 + }, + { + "epoch": 2.05, + "learning_rate": 1.0553018772196853e-07, + "logits/chosen": -2.271622896194458, + "logits/rejected": -1.8992735147476196, + "logps/chosen": -88.58778381347656, + "logps/rejected": -118.49006652832031, + "loss": 0.0011, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 3.1313204765319824, + "rewards/margins": 26.39713478088379, + "rewards/rejected": -23.265811920166016, + "step": 4490 + }, + { + "epoch": 2.05, + "learning_rate": 1.050228310502283e-07, + "logits/chosen": -2.2248878479003906, + "logits/rejected": -1.8648513555526733, + "logps/chosen": -84.03074645996094, + "logps/rejected": -119.88997650146484, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.3918333053588867, + "rewards/margins": 26.75858497619629, + "rewards/rejected": -25.366750717163086, + "step": 4500 + }, + { + "epoch": 2.05, + "eval_logits/chosen": -2.2098562717437744, + "eval_logits/rejected": -1.8567416667938232, + "eval_logps/chosen": -84.76105499267578, + "eval_logps/rejected": -117.29058074951172, + "eval_loss": 0.005427930504083633, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 2.0347681045532227, + "eval_rewards/margins": 27.26459312438965, + "eval_rewards/rejected": -25.22982406616211, + "eval_runtime": 226.6245, + "eval_samples_per_second": 12.629, + "eval_steps_per_second": 0.79, + "step": 4500 + }, + { + "epoch": 2.06, + "learning_rate": 1.0451547437848806e-07, + "logits/chosen": -2.244274139404297, + "logits/rejected": -1.835097074508667, + "logps/chosen": -89.41958618164062, + "logps/rejected": -119.83707427978516, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5296854972839355, + "rewards/margins": 27.9240779876709, + "rewards/rejected": -25.394390106201172, + "step": 4510 + }, + { + "epoch": 2.06, + "learning_rate": 1.0400811770674783e-07, + "logits/chosen": -2.20641827583313, + "logits/rejected": -1.8095362186431885, + "logps/chosen": -79.14988708496094, + "logps/rejected": -120.5500717163086, + "loss": 0.0065, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.5764966011047363, + "rewards/margins": 29.600894927978516, + "rewards/rejected": -27.024398803710938, + "step": 4520 + }, + { + "epoch": 2.07, + "learning_rate": 1.035007610350076e-07, + "logits/chosen": -2.1728744506835938, + "logits/rejected": -1.750156044960022, + "logps/chosen": -90.58625793457031, + "logps/rejected": -115.90065002441406, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.2151265144348145, + "rewards/margins": 26.991992950439453, + "rewards/rejected": -24.776866912841797, + "step": 4530 + }, + { + "epoch": 2.07, + "learning_rate": 1.0299340436326736e-07, + "logits/chosen": -2.1972928047180176, + "logits/rejected": -1.8641561269760132, + "logps/chosen": -83.82379150390625, + "logps/rejected": -120.79838562011719, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.7972400188446045, + "rewards/margins": 28.825088500976562, + "rewards/rejected": -26.027847290039062, + "step": 4540 + }, + { + "epoch": 2.08, + "learning_rate": 1.0248604769152713e-07, + "logits/chosen": -2.2066047191619873, + "logits/rejected": -1.8661673069000244, + "logps/chosen": -82.5276870727539, + "logps/rejected": -117.04951477050781, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.4789516925811768, + "rewards/margins": 26.52178955078125, + "rewards/rejected": -24.042835235595703, + "step": 4550 + }, + { + "epoch": 2.08, + "learning_rate": 1.019786910197869e-07, + "logits/chosen": -2.2894678115844727, + "logits/rejected": -1.9717973470687866, + "logps/chosen": -85.42427825927734, + "logps/rejected": -122.86556243896484, + "loss": 0.0066, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.7783312797546387, + "rewards/margins": 26.929943084716797, + "rewards/rejected": -26.151615142822266, + "step": 4560 + }, + { + "epoch": 2.09, + "learning_rate": 1.0147133434804666e-07, + "logits/chosen": -2.2856619358062744, + "logits/rejected": -1.8935363292694092, + "logps/chosen": -83.85643005371094, + "logps/rejected": -116.0418930053711, + "loss": 0.0053, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.631080389022827, + "rewards/margins": 28.714336395263672, + "rewards/rejected": -26.083255767822266, + "step": 4570 + }, + { + "epoch": 2.09, + "learning_rate": 1.0096397767630643e-07, + "logits/chosen": -2.1582155227661133, + "logits/rejected": -1.7837406396865845, + "logps/chosen": -90.37853240966797, + "logps/rejected": -126.54129791259766, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.798124313354492, + "rewards/margins": 29.615198135375977, + "rewards/rejected": -25.817073822021484, + "step": 4580 + }, + { + "epoch": 2.1, + "learning_rate": 1.004566210045662e-07, + "logits/chosen": -2.217268705368042, + "logits/rejected": -1.9051685333251953, + "logps/chosen": -79.26915740966797, + "logps/rejected": -121.41270446777344, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.768852949142456, + "rewards/margins": 27.323421478271484, + "rewards/rejected": -24.5545654296875, + "step": 4590 + }, + { + "epoch": 2.1, + "learning_rate": 9.994926433282596e-08, + "logits/chosen": -2.2435638904571533, + "logits/rejected": -1.8570373058319092, + "logps/chosen": -90.26069641113281, + "logps/rejected": -124.71513366699219, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2154905796051025, + "rewards/margins": 28.169509887695312, + "rewards/rejected": -25.954015731811523, + "step": 4600 + }, + { + "epoch": 2.1, + "eval_logits/chosen": -2.2148590087890625, + "eval_logits/rejected": -1.865024447441101, + "eval_logps/chosen": -84.85028839111328, + "eval_logps/rejected": -118.58063507080078, + "eval_loss": 0.005515058524906635, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.9901474714279175, + "eval_rewards/margins": 27.865001678466797, + "eval_rewards/rejected": -25.874853134155273, + "eval_runtime": 189.7173, + "eval_samples_per_second": 15.086, + "eval_steps_per_second": 0.944, + "step": 4600 + }, + { + "epoch": 2.1, + "learning_rate": 9.944190766108573e-08, + "logits/chosen": -2.2464184761047363, + "logits/rejected": -1.902503252029419, + "logps/chosen": -86.0322036743164, + "logps/rejected": -123.70219421386719, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7963413000106812, + "rewards/margins": 29.205463409423828, + "rewards/rejected": -27.409122467041016, + "step": 4610 + }, + { + "epoch": 2.11, + "learning_rate": 9.89345509893455e-08, + "logits/chosen": -2.134826183319092, + "logits/rejected": -1.8044805526733398, + "logps/chosen": -87.43550109863281, + "logps/rejected": -123.2356185913086, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.1314964294433594, + "rewards/margins": 29.698129653930664, + "rewards/rejected": -26.566635131835938, + "step": 4620 + }, + { + "epoch": 2.11, + "learning_rate": 9.842719431760526e-08, + "logits/chosen": -2.146523952484131, + "logits/rejected": -1.7418187856674194, + "logps/chosen": -83.50428771972656, + "logps/rejected": -122.54347229003906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.376321792602539, + "rewards/margins": 30.02010726928711, + "rewards/rejected": -27.643783569335938, + "step": 4630 + }, + { + "epoch": 2.12, + "learning_rate": 9.791983764586503e-08, + "logits/chosen": -2.262716770172119, + "logits/rejected": -1.8752014636993408, + "logps/chosen": -84.97921752929688, + "logps/rejected": -122.33912658691406, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.812735915184021, + "rewards/margins": 29.280685424804688, + "rewards/rejected": -27.46795082092285, + "step": 4640 + }, + { + "epoch": 2.12, + "learning_rate": 9.74124809741248e-08, + "logits/chosen": -2.2482247352600098, + "logits/rejected": -1.9075597524642944, + "logps/chosen": -81.56404113769531, + "logps/rejected": -120.65312194824219, + "loss": 0.0056, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.012248992919922, + "rewards/margins": 28.712228775024414, + "rewards/rejected": -26.699981689453125, + "step": 4650 + }, + { + "epoch": 2.13, + "learning_rate": 9.690512430238456e-08, + "logits/chosen": -2.1412789821624756, + "logits/rejected": -1.8419468402862549, + "logps/chosen": -83.85114288330078, + "logps/rejected": -121.31649017333984, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0328783988952637, + "rewards/margins": 27.409412384033203, + "rewards/rejected": -26.376529693603516, + "step": 4660 + }, + { + "epoch": 2.13, + "learning_rate": 9.639776763064433e-08, + "logits/chosen": -2.2971882820129395, + "logits/rejected": -1.9334943294525146, + "logps/chosen": -82.55015563964844, + "logps/rejected": -119.16120910644531, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0977225303649902, + "rewards/margins": 28.73293113708496, + "rewards/rejected": -26.635211944580078, + "step": 4670 + }, + { + "epoch": 2.14, + "learning_rate": 9.58904109589041e-08, + "logits/chosen": -2.325941562652588, + "logits/rejected": -1.964868187904358, + "logps/chosen": -87.65157318115234, + "logps/rejected": -125.1788330078125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.739508032798767, + "rewards/margins": 30.17959213256836, + "rewards/rejected": -28.44008445739746, + "step": 4680 + }, + { + "epoch": 2.14, + "learning_rate": 9.538305428716386e-08, + "logits/chosen": -2.1548006534576416, + "logits/rejected": -1.8352489471435547, + "logps/chosen": -83.15804290771484, + "logps/rejected": -114.95072174072266, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4212478399276733, + "rewards/margins": 27.054697036743164, + "rewards/rejected": -25.63344955444336, + "step": 4690 + }, + { + "epoch": 2.15, + "learning_rate": 9.487569761542363e-08, + "logits/chosen": -2.263066530227661, + "logits/rejected": -1.8140977621078491, + "logps/chosen": -93.13688659667969, + "logps/rejected": -124.15168762207031, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.275789737701416, + "rewards/margins": 29.475337982177734, + "rewards/rejected": -26.199548721313477, + "step": 4700 + }, + { + "epoch": 2.15, + "eval_logits/chosen": -2.2139129638671875, + "eval_logits/rejected": -1.8623522520065308, + "eval_logps/chosen": -85.51490783691406, + "eval_logps/rejected": -120.75127410888672, + "eval_loss": 0.005448976997286081, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.657840371131897, + "eval_rewards/margins": 28.618017196655273, + "eval_rewards/rejected": -26.960174560546875, + "eval_runtime": 230.7918, + "eval_samples_per_second": 12.401, + "eval_steps_per_second": 0.776, + "step": 4700 + }, + { + "epoch": 2.15, + "learning_rate": 9.43683409436834e-08, + "logits/chosen": -2.2211930751800537, + "logits/rejected": -1.881513237953186, + "logps/chosen": -82.05587005615234, + "logps/rejected": -126.84517669677734, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6722524166107178, + "rewards/margins": 30.808147430419922, + "rewards/rejected": -28.13589859008789, + "step": 4710 + }, + { + "epoch": 2.15, + "learning_rate": 9.386098427194316e-08, + "logits/chosen": -2.1461105346679688, + "logits/rejected": -1.7770576477050781, + "logps/chosen": -85.20562744140625, + "logps/rejected": -125.3041763305664, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.119962692260742, + "rewards/margins": 29.933517456054688, + "rewards/rejected": -27.813552856445312, + "step": 4720 + }, + { + "epoch": 2.16, + "learning_rate": 9.335362760020293e-08, + "logits/chosen": -2.215076208114624, + "logits/rejected": -1.8217270374298096, + "logps/chosen": -84.71824645996094, + "logps/rejected": -120.50927734375, + "loss": 0.0058, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.6982955932617188, + "rewards/margins": 29.129375457763672, + "rewards/rejected": -26.431079864501953, + "step": 4730 + }, + { + "epoch": 2.16, + "learning_rate": 9.28462709284627e-08, + "logits/chosen": -2.2664477825164795, + "logits/rejected": -1.8872871398925781, + "logps/chosen": -86.96624755859375, + "logps/rejected": -124.43575286865234, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4599993228912354, + "rewards/margins": 30.88054847717285, + "rewards/rejected": -29.420547485351562, + "step": 4740 + }, + { + "epoch": 2.17, + "learning_rate": 9.233891425672246e-08, + "logits/chosen": -2.2344231605529785, + "logits/rejected": -1.7941217422485352, + "logps/chosen": -88.73806762695312, + "logps/rejected": -124.6733627319336, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": 3.4499599933624268, + "rewards/margins": 30.964941024780273, + "rewards/rejected": -27.51498031616211, + "step": 4750 + }, + { + "epoch": 2.17, + "learning_rate": 9.183155758498223e-08, + "logits/chosen": -2.26192045211792, + "logits/rejected": -1.9525654315948486, + "logps/chosen": -86.94519805908203, + "logps/rejected": -128.98985290527344, + "loss": 0.0012, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.0427141189575195, + "rewards/margins": 29.724853515625, + "rewards/rejected": -28.682140350341797, + "step": 4760 + }, + { + "epoch": 2.18, + "learning_rate": 9.1324200913242e-08, + "logits/chosen": -2.2315638065338135, + "logits/rejected": -1.844601035118103, + "logps/chosen": -90.42842864990234, + "logps/rejected": -123.21382904052734, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7231295108795166, + "rewards/margins": 28.74625587463379, + "rewards/rejected": -27.023128509521484, + "step": 4770 + }, + { + "epoch": 2.18, + "learning_rate": 9.081684424150176e-08, + "logits/chosen": -2.2464542388916016, + "logits/rejected": -1.9291092157363892, + "logps/chosen": -90.54940032958984, + "logps/rejected": -130.29818725585938, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4829115867614746, + "rewards/margins": 30.492401123046875, + "rewards/rejected": -29.00948715209961, + "step": 4780 + }, + { + "epoch": 2.19, + "learning_rate": 9.030948756976153e-08, + "logits/chosen": -2.249579906463623, + "logits/rejected": -1.7951542139053345, + "logps/chosen": -84.71315002441406, + "logps/rejected": -123.6868896484375, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.647388458251953, + "rewards/margins": 31.460529327392578, + "rewards/rejected": -28.813140869140625, + "step": 4790 + }, + { + "epoch": 2.19, + "learning_rate": 8.98021308980213e-08, + "logits/chosen": -2.2210094928741455, + "logits/rejected": -1.8732092380523682, + "logps/chosen": -83.642578125, + "logps/rejected": -124.18983459472656, + "loss": 0.0064, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.0244059562683105, + "rewards/margins": 29.753732681274414, + "rewards/rejected": -27.729320526123047, + "step": 4800 + }, + { + "epoch": 2.19, + "eval_logits/chosen": -2.208674192428589, + "eval_logits/rejected": -1.8558579683303833, + "eval_logps/chosen": -86.5013198852539, + "eval_logps/rejected": -123.4262466430664, + "eval_loss": 0.005769502837210894, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.1646300554275513, + "eval_rewards/margins": 29.462299346923828, + "eval_rewards/rejected": -28.29766845703125, + "eval_runtime": 258.0094, + "eval_samples_per_second": 11.093, + "eval_steps_per_second": 0.694, + "step": 4800 + }, + { + "epoch": 2.2, + "learning_rate": 8.929477422628106e-08, + "logits/chosen": -2.2377803325653076, + "logits/rejected": -1.8689501285552979, + "logps/chosen": -88.69053649902344, + "logps/rejected": -133.93704223632812, + "loss": 0.0043, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.5955907106399536, + "rewards/margins": 29.29754066467285, + "rewards/rejected": -27.701946258544922, + "step": 4810 + }, + { + "epoch": 2.2, + "learning_rate": 8.878741755454083e-08, + "logits/chosen": -2.179999589920044, + "logits/rejected": -1.8029935359954834, + "logps/chosen": -90.18666076660156, + "logps/rejected": -128.03399658203125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3174841403961182, + "rewards/margins": 30.12929344177246, + "rewards/rejected": -28.811809539794922, + "step": 4820 + }, + { + "epoch": 2.2, + "learning_rate": 8.82800608828006e-08, + "logits/chosen": -2.2542881965637207, + "logits/rejected": -1.860487699508667, + "logps/chosen": -84.78041076660156, + "logps/rejected": -127.14323425292969, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.2697548866271973, + "rewards/margins": 31.854806900024414, + "rewards/rejected": -29.585052490234375, + "step": 4830 + }, + { + "epoch": 2.21, + "learning_rate": 8.777270421106036e-08, + "logits/chosen": -2.2306056022644043, + "logits/rejected": -1.9451026916503906, + "logps/chosen": -84.75010681152344, + "logps/rejected": -127.5153579711914, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.20773085951805115, + "rewards/margins": 29.199344635009766, + "rewards/rejected": -28.99161720275879, + "step": 4840 + }, + { + "epoch": 2.21, + "learning_rate": 8.726534753932013e-08, + "logits/chosen": -2.195384979248047, + "logits/rejected": -1.8609682321548462, + "logps/chosen": -84.73804473876953, + "logps/rejected": -123.91796875, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5444185733795166, + "rewards/margins": 30.432912826538086, + "rewards/rejected": -28.88849449157715, + "step": 4850 + }, + { + "epoch": 2.22, + "learning_rate": 8.67579908675799e-08, + "logits/chosen": -2.1483521461486816, + "logits/rejected": -1.7701698541641235, + "logps/chosen": -81.2613754272461, + "logps/rejected": -121.82652282714844, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8260562419891357, + "rewards/margins": 30.310237884521484, + "rewards/rejected": -27.484180450439453, + "step": 4860 + }, + { + "epoch": 2.22, + "learning_rate": 8.625063419583966e-08, + "logits/chosen": -2.243568181991577, + "logits/rejected": -1.941277265548706, + "logps/chosen": -84.82609558105469, + "logps/rejected": -127.32414245605469, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3905398845672607, + "rewards/margins": 30.30340003967285, + "rewards/rejected": -28.912860870361328, + "step": 4870 + }, + { + "epoch": 2.23, + "learning_rate": 8.574327752409943e-08, + "logits/chosen": -2.1722254753112793, + "logits/rejected": -1.8854080438613892, + "logps/chosen": -83.84065246582031, + "logps/rejected": -128.77059936523438, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0625784397125244, + "rewards/margins": 31.2793025970459, + "rewards/rejected": -30.216724395751953, + "step": 4880 + }, + { + "epoch": 2.23, + "learning_rate": 8.52359208523592e-08, + "logits/chosen": -2.1614387035369873, + "logits/rejected": -1.8331111669540405, + "logps/chosen": -85.25975036621094, + "logps/rejected": -130.95721435546875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.8772072792053223, + "rewards/margins": 31.198410034179688, + "rewards/rejected": -28.32120132446289, + "step": 4890 + }, + { + "epoch": 2.24, + "learning_rate": 8.472856418061896e-08, + "logits/chosen": -2.2037620544433594, + "logits/rejected": -1.8145354986190796, + "logps/chosen": -86.72196197509766, + "logps/rejected": -121.60040283203125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.486250877380371, + "rewards/margins": 30.308456420898438, + "rewards/rejected": -27.822208404541016, + "step": 4900 + }, + { + "epoch": 2.24, + "eval_logits/chosen": -2.2160322666168213, + "eval_logits/rejected": -1.868016242980957, + "eval_logps/chosen": -86.11427307128906, + "eval_logps/rejected": -123.58334350585938, + "eval_loss": 0.0056231957860291, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.3581570386886597, + "eval_rewards/margins": 29.7343692779541, + "eval_rewards/rejected": -28.376211166381836, + "eval_runtime": 178.9426, + "eval_samples_per_second": 15.994, + "eval_steps_per_second": 1.0, + "step": 4900 + }, + { + "epoch": 2.24, + "learning_rate": 8.422120750887873e-08, + "logits/chosen": -2.2726593017578125, + "logits/rejected": -1.8736642599105835, + "logps/chosen": -87.76054382324219, + "logps/rejected": -124.5219955444336, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.735135316848755, + "rewards/margins": 30.699026107788086, + "rewards/rejected": -27.963891983032227, + "step": 4910 + }, + { + "epoch": 2.25, + "learning_rate": 8.37138508371385e-08, + "logits/chosen": -2.2303450107574463, + "logits/rejected": -1.8574903011322021, + "logps/chosen": -87.68033599853516, + "logps/rejected": -125.37784576416016, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6258609294891357, + "rewards/margins": 30.00998878479004, + "rewards/rejected": -28.384124755859375, + "step": 4920 + }, + { + "epoch": 2.25, + "learning_rate": 8.320649416539826e-08, + "logits/chosen": -2.1431241035461426, + "logits/rejected": -1.8002105951309204, + "logps/chosen": -86.46646881103516, + "logps/rejected": -127.82108306884766, + "loss": 0.0034, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.7209049463272095, + "rewards/margins": 30.69203758239746, + "rewards/rejected": -28.971134185791016, + "step": 4930 + }, + { + "epoch": 2.25, + "learning_rate": 8.269913749365803e-08, + "logits/chosen": -2.21543550491333, + "logits/rejected": -1.858415961265564, + "logps/chosen": -85.4444808959961, + "logps/rejected": -125.85990142822266, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.123652219772339, + "rewards/margins": 29.99489974975586, + "rewards/rejected": -27.87125015258789, + "step": 4940 + }, + { + "epoch": 2.26, + "learning_rate": 8.21917808219178e-08, + "logits/chosen": -2.226576328277588, + "logits/rejected": -1.8361542224884033, + "logps/chosen": -88.27388000488281, + "logps/rejected": -124.52458190917969, + "loss": 0.0054, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.222522020339966, + "rewards/margins": 29.900218963623047, + "rewards/rejected": -27.677698135375977, + "step": 4950 + }, + { + "epoch": 2.26, + "learning_rate": 8.168442415017756e-08, + "logits/chosen": -2.211759567260742, + "logits/rejected": -1.8759946823120117, + "logps/chosen": -91.97505187988281, + "logps/rejected": -127.7925796508789, + "loss": 0.0044, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.8236006498336792, + "rewards/margins": 30.634103775024414, + "rewards/rejected": -29.810501098632812, + "step": 4960 + }, + { + "epoch": 2.27, + "learning_rate": 8.117706747843733e-08, + "logits/chosen": -2.220738172531128, + "logits/rejected": -1.8724660873413086, + "logps/chosen": -87.83768463134766, + "logps/rejected": -127.0542984008789, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.5908282995224, + "rewards/margins": 30.813274383544922, + "rewards/rejected": -29.22244644165039, + "step": 4970 + }, + { + "epoch": 2.27, + "learning_rate": 8.06697108066971e-08, + "logits/chosen": -2.2144250869750977, + "logits/rejected": -1.8192403316497803, + "logps/chosen": -88.7645034790039, + "logps/rejected": -123.9654769897461, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7449915409088135, + "rewards/margins": 30.582874298095703, + "rewards/rejected": -28.8378849029541, + "step": 4980 + }, + { + "epoch": 2.28, + "learning_rate": 8.016235413495687e-08, + "logits/chosen": -2.1595869064331055, + "logits/rejected": -1.8579118251800537, + "logps/chosen": -81.31648254394531, + "logps/rejected": -125.78511810302734, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.1897120475769043, + "rewards/margins": 31.16072654724121, + "rewards/rejected": -28.97101402282715, + "step": 4990 + }, + { + "epoch": 2.28, + "learning_rate": 7.965499746321664e-08, + "logits/chosen": -2.163888692855835, + "logits/rejected": -1.7756332159042358, + "logps/chosen": -91.29290771484375, + "logps/rejected": -126.38360595703125, + "loss": 0.0025, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.3966686725616455, + "rewards/margins": 29.905893325805664, + "rewards/rejected": -27.509225845336914, + "step": 5000 + }, + { + "epoch": 2.28, + "eval_logits/chosen": -2.2226171493530273, + "eval_logits/rejected": -1.8767516613006592, + "eval_logps/chosen": -86.72997283935547, + "eval_logps/rejected": -125.4724349975586, + "eval_loss": 0.005568630062043667, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.0503071546554565, + "eval_rewards/margins": 30.37106704711914, + "eval_rewards/rejected": -29.320756912231445, + "eval_runtime": 265.4131, + "eval_samples_per_second": 10.783, + "eval_steps_per_second": 0.674, + "step": 5000 + }, + { + "epoch": 2.29, + "learning_rate": 7.91476407914764e-08, + "logits/chosen": -2.237316608428955, + "logits/rejected": -1.8469164371490479, + "logps/chosen": -86.86993408203125, + "logps/rejected": -128.10240173339844, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.2446770668029785, + "rewards/margins": 31.02500343322754, + "rewards/rejected": -29.78032875061035, + "step": 5010 + }, + { + "epoch": 2.29, + "learning_rate": 7.864028411973617e-08, + "logits/chosen": -2.212674379348755, + "logits/rejected": -1.7991310358047485, + "logps/chosen": -90.48421478271484, + "logps/rejected": -128.47349548339844, + "loss": 0.0067, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.104187488555908, + "rewards/margins": 31.993030548095703, + "rewards/rejected": -29.888843536376953, + "step": 5020 + }, + { + "epoch": 2.3, + "learning_rate": 7.813292744799594e-08, + "logits/chosen": -2.192707061767578, + "logits/rejected": -1.812227487564087, + "logps/chosen": -85.58504486083984, + "logps/rejected": -125.02815246582031, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.6018452644348145, + "rewards/margins": 31.598779678344727, + "rewards/rejected": -28.996929168701172, + "step": 5030 + }, + { + "epoch": 2.3, + "learning_rate": 7.76255707762557e-08, + "logits/chosen": -2.2332510948181152, + "logits/rejected": -1.9000991582870483, + "logps/chosen": -87.93112182617188, + "logps/rejected": -124.27226257324219, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8778412938117981, + "rewards/margins": 29.009052276611328, + "rewards/rejected": -28.131210327148438, + "step": 5040 + }, + { + "epoch": 2.31, + "learning_rate": 7.711821410451547e-08, + "logits/chosen": -2.2971322536468506, + "logits/rejected": -1.955288290977478, + "logps/chosen": -84.05998229980469, + "logps/rejected": -121.64460754394531, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.0523746013641357, + "rewards/margins": 30.503372192382812, + "rewards/rejected": -28.45099449157715, + "step": 5050 + }, + { + "epoch": 2.31, + "learning_rate": 7.661085743277524e-08, + "logits/chosen": -2.2180066108703613, + "logits/rejected": -1.8513801097869873, + "logps/chosen": -91.88666534423828, + "logps/rejected": -131.45933532714844, + "loss": 0.0019, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.476855516433716, + "rewards/margins": 32.50453186035156, + "rewards/rejected": -30.02767562866211, + "step": 5060 + }, + { + "epoch": 2.31, + "learning_rate": 7.6103500761035e-08, + "logits/chosen": -2.202540636062622, + "logits/rejected": -1.883195161819458, + "logps/chosen": -86.0487060546875, + "logps/rejected": -127.7849349975586, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5693166255950928, + "rewards/margins": 30.323623657226562, + "rewards/rejected": -29.75430679321289, + "step": 5070 + }, + { + "epoch": 2.32, + "learning_rate": 7.559614408929477e-08, + "logits/chosen": -2.167088031768799, + "logits/rejected": -1.7546894550323486, + "logps/chosen": -86.92243194580078, + "logps/rejected": -121.72308349609375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5353543758392334, + "rewards/margins": 30.667156219482422, + "rewards/rejected": -28.13180160522461, + "step": 5080 + }, + { + "epoch": 2.32, + "learning_rate": 7.508878741755454e-08, + "logits/chosen": -2.1240665912628174, + "logits/rejected": -1.8034683465957642, + "logps/chosen": -86.10752868652344, + "logps/rejected": -130.32803344726562, + "loss": 0.0055, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.6282140016555786, + "rewards/margins": 30.408313751220703, + "rewards/rejected": -29.780099868774414, + "step": 5090 + }, + { + "epoch": 2.33, + "learning_rate": 7.45814307458143e-08, + "logits/chosen": -2.234485387802124, + "logits/rejected": -1.8363311290740967, + "logps/chosen": -89.19425964355469, + "logps/rejected": -129.45681762695312, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.9281721115112305, + "rewards/margins": 33.132511138916016, + "rewards/rejected": -31.204341888427734, + "step": 5100 + }, + { + "epoch": 2.33, + "eval_logits/chosen": -2.2230384349823, + "eval_logits/rejected": -1.8779499530792236, + "eval_logps/chosen": -86.5483627319336, + "eval_logps/rejected": -125.72158813476562, + "eval_loss": 0.005534125491976738, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 1.1411113739013672, + "eval_rewards/margins": 30.586444854736328, + "eval_rewards/rejected": -29.44533348083496, + "eval_runtime": 203.014, + "eval_samples_per_second": 14.098, + "eval_steps_per_second": 0.882, + "step": 5100 + }, + { + "epoch": 2.33, + "learning_rate": 7.407407407407407e-08, + "logits/chosen": -2.23368239402771, + "logits/rejected": -1.8570277690887451, + "logps/chosen": -87.19905853271484, + "logps/rejected": -127.96573638916016, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1923465728759766, + "rewards/margins": 31.500961303710938, + "rewards/rejected": -30.30861473083496, + "step": 5110 + }, + { + "epoch": 2.34, + "learning_rate": 7.356671740233384e-08, + "logits/chosen": -2.14494252204895, + "logits/rejected": -1.8105385303497314, + "logps/chosen": -83.87150573730469, + "logps/rejected": -126.78532409667969, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.35639578104019165, + "rewards/margins": 30.178089141845703, + "rewards/rejected": -29.82169532775879, + "step": 5120 + }, + { + "epoch": 2.34, + "learning_rate": 7.30593607305936e-08, + "logits/chosen": -2.253058671951294, + "logits/rejected": -1.9436228275299072, + "logps/chosen": -90.07007598876953, + "logps/rejected": -127.9438705444336, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.44818297028541565, + "rewards/margins": 30.035837173461914, + "rewards/rejected": -30.4840145111084, + "step": 5130 + }, + { + "epoch": 2.35, + "learning_rate": 7.255200405885337e-08, + "logits/chosen": -2.2380034923553467, + "logits/rejected": -1.849329948425293, + "logps/chosen": -91.55721282958984, + "logps/rejected": -132.04197692871094, + "loss": 0.001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7558342218399048, + "rewards/margins": 31.776962280273438, + "rewards/rejected": -30.021127700805664, + "step": 5140 + }, + { + "epoch": 2.35, + "learning_rate": 7.204464738711314e-08, + "logits/chosen": -2.249427318572998, + "logits/rejected": -1.916865348815918, + "logps/chosen": -84.08346557617188, + "logps/rejected": -127.4309310913086, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0999155044555664, + "rewards/margins": 31.15311622619629, + "rewards/rejected": -30.053197860717773, + "step": 5150 + }, + { + "epoch": 2.36, + "learning_rate": 7.15372907153729e-08, + "logits/chosen": -2.164412498474121, + "logits/rejected": -1.8164761066436768, + "logps/chosen": -84.87574768066406, + "logps/rejected": -127.91072845458984, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4683095216751099, + "rewards/margins": 30.6422176361084, + "rewards/rejected": -29.173908233642578, + "step": 5160 + }, + { + "epoch": 2.36, + "learning_rate": 7.102993404363267e-08, + "logits/chosen": -2.2733216285705566, + "logits/rejected": -1.8871898651123047, + "logps/chosen": -84.68191528320312, + "logps/rejected": -128.5628204345703, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4870762825012207, + "rewards/margins": 31.898874282836914, + "rewards/rejected": -30.41179847717285, + "step": 5170 + }, + { + "epoch": 2.36, + "learning_rate": 7.052257737189244e-08, + "logits/chosen": -2.1040546894073486, + "logits/rejected": -1.794029951095581, + "logps/chosen": -87.33931732177734, + "logps/rejected": -122.92645263671875, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.523239016532898, + "rewards/margins": 28.69746971130371, + "rewards/rejected": -28.174230575561523, + "step": 5180 + }, + { + "epoch": 2.37, + "learning_rate": 7.00152207001522e-08, + "logits/chosen": -2.2180774211883545, + "logits/rejected": -1.8783124685287476, + "logps/chosen": -88.9879150390625, + "logps/rejected": -129.85577392578125, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.285766363143921, + "rewards/margins": 31.67111587524414, + "rewards/rejected": -30.38534927368164, + "step": 5190 + }, + { + "epoch": 2.37, + "learning_rate": 6.950786402841197e-08, + "logits/chosen": -2.2432150840759277, + "logits/rejected": -1.8757518529891968, + "logps/chosen": -96.52727508544922, + "logps/rejected": -133.89486694335938, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.2539239525794983, + "rewards/margins": 30.236125946044922, + "rewards/rejected": -30.49005126953125, + "step": 5200 + }, + { + "epoch": 2.37, + "eval_logits/chosen": -2.2150111198425293, + "eval_logits/rejected": -1.8662109375, + "eval_logps/chosen": -86.7686767578125, + "eval_logps/rejected": -126.29188537597656, + "eval_loss": 0.005586822517216206, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 1.030951976776123, + "eval_rewards/margins": 30.761432647705078, + "eval_rewards/rejected": -29.730480194091797, + "eval_runtime": 372.2708, + "eval_samples_per_second": 7.688, + "eval_steps_per_second": 0.481, + "step": 5200 + }, + { + "epoch": 2.38, + "learning_rate": 6.900050735667174e-08, + "logits/chosen": -2.2496633529663086, + "logits/rejected": -1.9013668298721313, + "logps/chosen": -85.10989379882812, + "logps/rejected": -126.21388244628906, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0123857259750366, + "rewards/margins": 28.938159942626953, + "rewards/rejected": -27.925771713256836, + "step": 5210 + }, + { + "epoch": 2.38, + "learning_rate": 6.84931506849315e-08, + "logits/chosen": -2.169320583343506, + "logits/rejected": -1.8793160915374756, + "logps/chosen": -83.18421173095703, + "logps/rejected": -132.3877410888672, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.122344732284546, + "rewards/margins": 30.82855796813965, + "rewards/rejected": -29.70621109008789, + "step": 5220 + }, + { + "epoch": 2.39, + "learning_rate": 6.798579401319127e-08, + "logits/chosen": -2.3072543144226074, + "logits/rejected": -1.8543882369995117, + "logps/chosen": -92.82881164550781, + "logps/rejected": -127.92374420166016, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.4654512405395508, + "rewards/margins": 31.590072631835938, + "rewards/rejected": -30.124622344970703, + "step": 5230 + }, + { + "epoch": 2.39, + "learning_rate": 6.747843734145104e-08, + "logits/chosen": -2.2160370349884033, + "logits/rejected": -1.8520358800888062, + "logps/chosen": -87.04851531982422, + "logps/rejected": -126.8850326538086, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.426818370819092, + "rewards/margins": 32.05809020996094, + "rewards/rejected": -29.631271362304688, + "step": 5240 + }, + { + "epoch": 2.4, + "learning_rate": 6.69710806697108e-08, + "logits/chosen": -2.2358498573303223, + "logits/rejected": -1.92721426486969, + "logps/chosen": -83.96819305419922, + "logps/rejected": -127.56068420410156, + "loss": 0.0044, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.6845203638076782, + "rewards/margins": 31.140390396118164, + "rewards/rejected": -29.455867767333984, + "step": 5250 + }, + { + "epoch": 2.4, + "learning_rate": 6.646372399797057e-08, + "logits/chosen": -2.224752426147461, + "logits/rejected": -1.8739010095596313, + "logps/chosen": -83.2956771850586, + "logps/rejected": -126.33967590332031, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8728569746017456, + "rewards/margins": 29.507293701171875, + "rewards/rejected": -28.63443374633789, + "step": 5260 + }, + { + "epoch": 2.41, + "learning_rate": 6.595636732623034e-08, + "logits/chosen": -2.2283332347869873, + "logits/rejected": -1.898648977279663, + "logps/chosen": -89.44114685058594, + "logps/rejected": -134.07838439941406, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2760619521141052, + "rewards/margins": 31.4327335357666, + "rewards/rejected": -31.156673431396484, + "step": 5270 + }, + { + "epoch": 2.41, + "learning_rate": 6.54490106544901e-08, + "logits/chosen": -2.217665195465088, + "logits/rejected": -1.7950804233551025, + "logps/chosen": -91.71220397949219, + "logps/rejected": -131.68295288085938, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5391443967819214, + "rewards/margins": 33.635643005371094, + "rewards/rejected": -32.09649658203125, + "step": 5280 + }, + { + "epoch": 2.41, + "learning_rate": 6.494165398274987e-08, + "logits/chosen": -2.1419761180877686, + "logits/rejected": -1.7783762216567993, + "logps/chosen": -90.9324722290039, + "logps/rejected": -132.05467224121094, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3793599605560303, + "rewards/margins": 31.754810333251953, + "rewards/rejected": -30.375452041625977, + "step": 5290 + }, + { + "epoch": 2.42, + "learning_rate": 6.443429731100964e-08, + "logits/chosen": -2.23848032951355, + "logits/rejected": -1.9507777690887451, + "logps/chosen": -83.6585464477539, + "logps/rejected": -128.34262084960938, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07423808425664902, + "rewards/margins": 31.000595092773438, + "rewards/rejected": -30.926355361938477, + "step": 5300 + }, + { + "epoch": 2.42, + "eval_logits/chosen": -2.220287322998047, + "eval_logits/rejected": -1.8729933500289917, + "eval_logps/chosen": -87.28707885742188, + "eval_logps/rejected": -127.75875091552734, + "eval_loss": 0.005639108829200268, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.7717540264129639, + "eval_rewards/margins": 31.235660552978516, + "eval_rewards/rejected": -30.463911056518555, + "eval_runtime": 204.4438, + "eval_samples_per_second": 13.999, + "eval_steps_per_second": 0.876, + "step": 5300 + }, + { + "epoch": 2.42, + "learning_rate": 6.39269406392694e-08, + "logits/chosen": -2.217376232147217, + "logits/rejected": -1.8636280298233032, + "logps/chosen": -85.64130401611328, + "logps/rejected": -130.76148986816406, + "loss": 0.0045, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.24725079536438, + "rewards/margins": 32.80742263793945, + "rewards/rejected": -30.5601749420166, + "step": 5310 + }, + { + "epoch": 2.43, + "learning_rate": 6.341958396752917e-08, + "logits/chosen": -2.2781529426574707, + "logits/rejected": -1.9017364978790283, + "logps/chosen": -85.34228515625, + "logps/rejected": -126.925537109375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1655864715576172, + "rewards/margins": 31.301239013671875, + "rewards/rejected": -30.135656356811523, + "step": 5320 + }, + { + "epoch": 2.43, + "learning_rate": 6.291222729578894e-08, + "logits/chosen": -2.2987561225891113, + "logits/rejected": -1.894020438194275, + "logps/chosen": -90.0018539428711, + "logps/rejected": -120.316650390625, + "loss": 0.0058, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.8631511926651, + "rewards/margins": 30.350738525390625, + "rewards/rejected": -28.487585067749023, + "step": 5330 + }, + { + "epoch": 2.44, + "learning_rate": 6.24048706240487e-08, + "logits/chosen": -2.3006176948547363, + "logits/rejected": -1.861555814743042, + "logps/chosen": -88.1524658203125, + "logps/rejected": -130.68968200683594, + "loss": 0.0057, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4345285892486572, + "rewards/margins": 31.54620933532715, + "rewards/rejected": -30.111682891845703, + "step": 5340 + }, + { + "epoch": 2.44, + "learning_rate": 6.189751395230847e-08, + "logits/chosen": -2.287721872329712, + "logits/rejected": -1.911665678024292, + "logps/chosen": -94.31678771972656, + "logps/rejected": -128.20156860351562, + "loss": 0.0021, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.2087610960006714, + "rewards/margins": 30.08795738220215, + "rewards/rejected": -28.879199981689453, + "step": 5350 + }, + { + "epoch": 2.45, + "learning_rate": 6.139015728056824e-08, + "logits/chosen": -2.2673823833465576, + "logits/rejected": -1.8630508184432983, + "logps/chosen": -85.5520248413086, + "logps/rejected": -127.2315444946289, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.068432331085205, + "rewards/margins": 31.822891235351562, + "rewards/rejected": -29.75446128845215, + "step": 5360 + }, + { + "epoch": 2.45, + "learning_rate": 6.0882800608828e-08, + "logits/chosen": -2.211057662963867, + "logits/rejected": -1.789331078529358, + "logps/chosen": -91.29240417480469, + "logps/rejected": -127.65742492675781, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.5129761695861816, + "rewards/margins": 32.06275939941406, + "rewards/rejected": -29.54978370666504, + "step": 5370 + }, + { + "epoch": 2.46, + "learning_rate": 6.037544393708777e-08, + "logits/chosen": -2.1423110961914062, + "logits/rejected": -1.8088276386260986, + "logps/chosen": -86.91880798339844, + "logps/rejected": -131.5565948486328, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9576078653335571, + "rewards/margins": 31.21225929260254, + "rewards/rejected": -30.254650115966797, + "step": 5380 + }, + { + "epoch": 2.46, + "learning_rate": 5.986808726534754e-08, + "logits/chosen": -2.1703662872314453, + "logits/rejected": -1.8025754690170288, + "logps/chosen": -90.57283020019531, + "logps/rejected": -127.29478454589844, + "loss": 0.0057, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.42478710412979126, + "rewards/margins": 30.417232513427734, + "rewards/rejected": -29.992446899414062, + "step": 5390 + }, + { + "epoch": 2.46, + "learning_rate": 5.93607305936073e-08, + "logits/chosen": -2.2170863151550293, + "logits/rejected": -1.8885765075683594, + "logps/chosen": -83.57946014404297, + "logps/rejected": -127.2809829711914, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1054432392120361, + "rewards/margins": 31.797679901123047, + "rewards/rejected": -30.69223976135254, + "step": 5400 + }, + { + "epoch": 2.46, + "eval_logits/chosen": -2.218388795852661, + "eval_logits/rejected": -1.8641334772109985, + "eval_logps/chosen": -87.43651580810547, + "eval_logps/rejected": -127.63639068603516, + "eval_loss": 0.005585874430835247, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.6970371007919312, + "eval_rewards/margins": 31.099769592285156, + "eval_rewards/rejected": -30.402734756469727, + "eval_runtime": 215.095, + "eval_samples_per_second": 13.306, + "eval_steps_per_second": 0.832, + "step": 5400 + }, + { + "epoch": 2.47, + "learning_rate": 5.8853373921867065e-08, + "logits/chosen": -2.204249858856201, + "logits/rejected": -1.8770767450332642, + "logps/chosen": -89.52336120605469, + "logps/rejected": -134.68844604492188, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.223055362701416, + "rewards/margins": 32.22364044189453, + "rewards/rejected": -30.000585556030273, + "step": 5410 + }, + { + "epoch": 2.47, + "learning_rate": 5.834601725012683e-08, + "logits/chosen": -2.255474805831909, + "logits/rejected": -1.9023358821868896, + "logps/chosen": -89.14444732666016, + "logps/rejected": -128.97544860839844, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.772989273071289, + "rewards/margins": 31.232650756835938, + "rewards/rejected": -29.45966148376465, + "step": 5420 + }, + { + "epoch": 2.48, + "learning_rate": 5.78386605783866e-08, + "logits/chosen": -2.12762451171875, + "logits/rejected": -1.7764146327972412, + "logps/chosen": -89.85285186767578, + "logps/rejected": -135.08656311035156, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.07732389122247696, + "rewards/margins": 32.275455474853516, + "rewards/rejected": -32.19812774658203, + "step": 5430 + }, + { + "epoch": 2.48, + "learning_rate": 5.7331303906646365e-08, + "logits/chosen": -2.1940665245056152, + "logits/rejected": -1.8633617162704468, + "logps/chosen": -86.71977233886719, + "logps/rejected": -132.53306579589844, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.124468207359314, + "rewards/margins": 31.2531681060791, + "rewards/rejected": -30.128698348999023, + "step": 5440 + }, + { + "epoch": 2.49, + "learning_rate": 5.682394723490613e-08, + "logits/chosen": -2.319063663482666, + "logits/rejected": -1.9095830917358398, + "logps/chosen": -92.62286376953125, + "logps/rejected": -133.32777404785156, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9819850921630859, + "rewards/margins": 32.897369384765625, + "rewards/rejected": -31.915386199951172, + "step": 5450 + }, + { + "epoch": 2.49, + "learning_rate": 5.63165905631659e-08, + "logits/chosen": -2.249760627746582, + "logits/rejected": -1.880089521408081, + "logps/chosen": -90.4861831665039, + "logps/rejected": -135.6083221435547, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5621588826179504, + "rewards/margins": 33.28693771362305, + "rewards/rejected": -32.72478103637695, + "step": 5460 + }, + { + "epoch": 2.5, + "learning_rate": 5.5809233891425665e-08, + "logits/chosen": -2.229804277420044, + "logits/rejected": -1.8705679178237915, + "logps/chosen": -89.12641143798828, + "logps/rejected": -131.51339721679688, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6339949369430542, + "rewards/margins": 31.29616928100586, + "rewards/rejected": -30.66217613220215, + "step": 5470 + }, + { + "epoch": 2.5, + "learning_rate": 5.530187721968543e-08, + "logits/chosen": -2.2353408336639404, + "logits/rejected": -1.8864301443099976, + "logps/chosen": -90.36353302001953, + "logps/rejected": -135.7219696044922, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8752725720405579, + "rewards/margins": 33.36455535888672, + "rewards/rejected": -32.489280700683594, + "step": 5480 + }, + { + "epoch": 2.51, + "learning_rate": 5.47945205479452e-08, + "logits/chosen": -2.1790707111358643, + "logits/rejected": -1.8824752569198608, + "logps/chosen": -86.35264587402344, + "logps/rejected": -138.5924835205078, + "loss": 0.003, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.8381770849227905, + "rewards/margins": 33.54732131958008, + "rewards/rejected": -32.70914077758789, + "step": 5490 + }, + { + "epoch": 2.51, + "learning_rate": 5.4287163876204964e-08, + "logits/chosen": -2.253610134124756, + "logits/rejected": -1.8832261562347412, + "logps/chosen": -91.31092834472656, + "logps/rejected": -132.42288208007812, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8345121145248413, + "rewards/margins": 31.579730987548828, + "rewards/rejected": -30.745220184326172, + "step": 5500 + }, + { + "epoch": 2.51, + "eval_logits/chosen": -2.2216947078704834, + "eval_logits/rejected": -1.8705134391784668, + "eval_logps/chosen": -87.70054626464844, + "eval_logps/rejected": -128.5836181640625, + "eval_loss": 0.005452133249491453, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.56501704454422, + "eval_rewards/margins": 31.44136619567871, + "eval_rewards/rejected": -30.8763484954834, + "eval_runtime": 239.2992, + "eval_samples_per_second": 11.96, + "eval_steps_per_second": 0.748, + "step": 5500 + }, + { + "epoch": 2.52, + "learning_rate": 5.377980720446473e-08, + "logits/chosen": -2.198091745376587, + "logits/rejected": -1.8183279037475586, + "logps/chosen": -88.14014434814453, + "logps/rejected": -130.6202392578125, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0735613107681274, + "rewards/margins": 32.70407485961914, + "rewards/rejected": -31.63051414489746, + "step": 5510 + }, + { + "epoch": 2.52, + "learning_rate": 5.32724505327245e-08, + "logits/chosen": -2.260110378265381, + "logits/rejected": -1.8869386911392212, + "logps/chosen": -85.59989929199219, + "logps/rejected": -126.64656829833984, + "loss": 0.0019, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1739155054092407, + "rewards/margins": 29.694509506225586, + "rewards/rejected": -28.520593643188477, + "step": 5520 + }, + { + "epoch": 2.52, + "learning_rate": 5.2765093860984264e-08, + "logits/chosen": -2.2093660831451416, + "logits/rejected": -1.9314342737197876, + "logps/chosen": -86.80977630615234, + "logps/rejected": -128.8234405517578, + "loss": 0.0033, + "rewards/accuracies": 0.9624999761581421, + "rewards/chosen": -1.0760526657104492, + "rewards/margins": 28.858245849609375, + "rewards/rejected": -29.93429946899414, + "step": 5530 + }, + { + "epoch": 2.53, + "learning_rate": 5.225773718924403e-08, + "logits/chosen": -2.182633876800537, + "logits/rejected": -1.857081651687622, + "logps/chosen": -84.45915222167969, + "logps/rejected": -127.63262939453125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2383257150650024, + "rewards/margins": 31.82940101623535, + "rewards/rejected": -30.591073989868164, + "step": 5540 + }, + { + "epoch": 2.53, + "learning_rate": 5.17503805175038e-08, + "logits/chosen": -2.2090537548065186, + "logits/rejected": -1.8241138458251953, + "logps/chosen": -89.18778991699219, + "logps/rejected": -133.34609985351562, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9115778207778931, + "rewards/margins": 32.943077087402344, + "rewards/rejected": -32.03150177001953, + "step": 5550 + }, + { + "epoch": 2.54, + "learning_rate": 5.1243023845763564e-08, + "logits/chosen": -2.2495675086975098, + "logits/rejected": -1.9659570455551147, + "logps/chosen": -82.7123031616211, + "logps/rejected": -135.22415161132812, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8649141192436218, + "rewards/margins": 33.649383544921875, + "rewards/rejected": -32.78447341918945, + "step": 5560 + }, + { + "epoch": 2.54, + "learning_rate": 5.073566717402333e-08, + "logits/chosen": -2.2645201683044434, + "logits/rejected": -1.9173786640167236, + "logps/chosen": -85.26544952392578, + "logps/rejected": -131.0226593017578, + "loss": 0.0079, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9299182891845703, + "rewards/margins": 32.061363220214844, + "rewards/rejected": -31.131444931030273, + "step": 5570 + }, + { + "epoch": 2.55, + "learning_rate": 5.02283105022831e-08, + "logits/chosen": -2.21622633934021, + "logits/rejected": -1.8026697635650635, + "logps/chosen": -93.25666046142578, + "logps/rejected": -133.81333923339844, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4977938234806061, + "rewards/margins": 31.675308227539062, + "rewards/rejected": -31.17751693725586, + "step": 5580 + }, + { + "epoch": 2.55, + "learning_rate": 4.9720953830542864e-08, + "logits/chosen": -2.175873279571533, + "logits/rejected": -1.7768224477767944, + "logps/chosen": -91.02295684814453, + "logps/rejected": -131.60630798339844, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.008679199032485485, + "rewards/margins": 32.17238235473633, + "rewards/rejected": -32.181060791015625, + "step": 5590 + }, + { + "epoch": 2.56, + "learning_rate": 4.921359715880263e-08, + "logits/chosen": -2.2250924110412598, + "logits/rejected": -1.7964969873428345, + "logps/chosen": -83.45528411865234, + "logps/rejected": -126.2359848022461, + "loss": 0.0021, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5450199842453003, + "rewards/margins": 32.206085205078125, + "rewards/rejected": -30.66106605529785, + "step": 5600 + }, + { + "epoch": 2.56, + "eval_logits/chosen": -2.225454807281494, + "eval_logits/rejected": -1.8789043426513672, + "eval_logps/chosen": -87.96759796142578, + "eval_logps/rejected": -129.4971466064453, + "eval_loss": 0.005582863464951515, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 0.4314943850040436, + "eval_rewards/margins": 31.764596939086914, + "eval_rewards/rejected": -31.33310317993164, + "eval_runtime": 190.7581, + "eval_samples_per_second": 15.003, + "eval_steps_per_second": 0.938, + "step": 5600 + }, + { + "epoch": 2.56, + "learning_rate": 4.87062404870624e-08, + "logits/chosen": -2.2279891967773438, + "logits/rejected": -1.8970493078231812, + "logps/chosen": -85.4381103515625, + "logps/rejected": -128.7406005859375, + "loss": 0.002, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.32868337631225586, + "rewards/margins": 31.653844833374023, + "rewards/rejected": -31.325159072875977, + "step": 5610 + }, + { + "epoch": 2.57, + "learning_rate": 4.8198883815322164e-08, + "logits/chosen": -2.2391271591186523, + "logits/rejected": -1.919921636581421, + "logps/chosen": -86.07292175292969, + "logps/rejected": -133.414306640625, + "loss": 0.0064, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.4069444537162781, + "rewards/margins": 31.80039405822754, + "rewards/rejected": -31.393451690673828, + "step": 5620 + }, + { + "epoch": 2.57, + "learning_rate": 4.769152714358193e-08, + "logits/chosen": -2.1675751209259033, + "logits/rejected": -1.8634262084960938, + "logps/chosen": -84.79447937011719, + "logps/rejected": -135.7115020751953, + "loss": 0.0009, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.24717219173908234, + "rewards/margins": 33.35146713256836, + "rewards/rejected": -33.10429382324219, + "step": 5630 + }, + { + "epoch": 2.57, + "learning_rate": 4.71841704718417e-08, + "logits/chosen": -2.1868836879730225, + "logits/rejected": -1.8967559337615967, + "logps/chosen": -88.42156982421875, + "logps/rejected": -132.83580017089844, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.0003570079861674458, + "rewards/margins": 31.02736473083496, + "rewards/rejected": -31.027725219726562, + "step": 5640 + }, + { + "epoch": 2.58, + "learning_rate": 4.6676813800101464e-08, + "logits/chosen": -2.281507968902588, + "logits/rejected": -1.8936166763305664, + "logps/chosen": -91.97183990478516, + "logps/rejected": -128.8743438720703, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.5036630630493164, + "rewards/margins": 31.219980239868164, + "rewards/rejected": -29.716318130493164, + "step": 5650 + }, + { + "epoch": 2.58, + "learning_rate": 4.616945712836123e-08, + "logits/chosen": -2.1977477073669434, + "logits/rejected": -1.8735698461532593, + "logps/chosen": -89.23414611816406, + "logps/rejected": -131.9024658203125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5118563771247864, + "rewards/margins": 32.36008834838867, + "rewards/rejected": -31.848236083984375, + "step": 5660 + }, + { + "epoch": 2.59, + "learning_rate": 4.5662100456621e-08, + "logits/chosen": -2.2489736080169678, + "logits/rejected": -1.8674437999725342, + "logps/chosen": -87.26219940185547, + "logps/rejected": -134.82095336914062, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8911517858505249, + "rewards/margins": 32.76057434082031, + "rewards/rejected": -31.86942481994629, + "step": 5670 + }, + { + "epoch": 2.59, + "learning_rate": 4.5154743784880764e-08, + "logits/chosen": -2.1873042583465576, + "logits/rejected": -1.924572229385376, + "logps/chosen": -79.94978332519531, + "logps/rejected": -128.44432067871094, + "loss": 0.0015, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.669002890586853, + "rewards/margins": 30.952983856201172, + "rewards/rejected": -30.283981323242188, + "step": 5680 + }, + { + "epoch": 2.6, + "learning_rate": 4.464738711314053e-08, + "logits/chosen": -2.160274028778076, + "logits/rejected": -1.8004605770111084, + "logps/chosen": -96.46055603027344, + "logps/rejected": -133.01870727539062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.372810959815979, + "rewards/margins": 31.60434913635254, + "rewards/rejected": -30.231542587280273, + "step": 5690 + }, + { + "epoch": 2.6, + "learning_rate": 4.41400304414003e-08, + "logits/chosen": -2.2056326866149902, + "logits/rejected": -1.73297917842865, + "logps/chosen": -96.11383056640625, + "logps/rejected": -131.00546264648438, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5699256658554077, + "rewards/margins": 32.902523040771484, + "rewards/rejected": -31.332595825195312, + "step": 5700 + }, + { + "epoch": 2.6, + "eval_logits/chosen": -2.226090669631958, + "eval_logits/rejected": -1.876379370689392, + "eval_logps/chosen": -87.41751861572266, + "eval_logps/rejected": -128.55892944335938, + "eval_loss": 0.005494570359587669, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 0.7065298557281494, + "eval_rewards/margins": 31.570541381835938, + "eval_rewards/rejected": -30.864015579223633, + "eval_runtime": 195.492, + "eval_samples_per_second": 14.64, + "eval_steps_per_second": 0.916, + "step": 5700 + }, + { + "epoch": 2.61, + "learning_rate": 4.3632673769660064e-08, + "logits/chosen": -2.2404072284698486, + "logits/rejected": -1.8427753448486328, + "logps/chosen": -88.0647964477539, + "logps/rejected": -132.00401306152344, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0430487394332886, + "rewards/margins": 31.766735076904297, + "rewards/rejected": -30.723682403564453, + "step": 5710 + }, + { + "epoch": 2.61, + "learning_rate": 4.312531709791983e-08, + "logits/chosen": -2.238086223602295, + "logits/rejected": -1.9353258609771729, + "logps/chosen": -86.8983383178711, + "logps/rejected": -134.00552368164062, + "loss": 0.0, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7805923223495483, + "rewards/margins": 32.820377349853516, + "rewards/rejected": -32.03978729248047, + "step": 5720 + }, + { + "epoch": 2.62, + "learning_rate": 4.26179604261796e-08, + "logits/chosen": -2.210732936859131, + "logits/rejected": -1.8510334491729736, + "logps/chosen": -86.54997253417969, + "logps/rejected": -128.822998046875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1234691143035889, + "rewards/margins": 31.587039947509766, + "rewards/rejected": -30.46356773376465, + "step": 5730 + }, + { + "epoch": 2.62, + "learning_rate": 4.2110603754439363e-08, + "logits/chosen": -2.271686553955078, + "logits/rejected": -1.9221289157867432, + "logps/chosen": -85.12126922607422, + "logps/rejected": -132.54244995117188, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.38059353828430176, + "rewards/margins": 32.59527587890625, + "rewards/rejected": -32.214683532714844, + "step": 5740 + }, + { + "epoch": 2.62, + "learning_rate": 4.160324708269913e-08, + "logits/chosen": -2.2383780479431152, + "logits/rejected": -1.9224342107772827, + "logps/chosen": -87.90855407714844, + "logps/rejected": -133.9766387939453, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.365673303604126, + "rewards/margins": 32.20100784301758, + "rewards/rejected": -30.8353328704834, + "step": 5750 + }, + { + "epoch": 2.63, + "learning_rate": 4.10958904109589e-08, + "logits/chosen": -2.2130231857299805, + "logits/rejected": -1.9353950023651123, + "logps/chosen": -79.39093017578125, + "logps/rejected": -129.04461669921875, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7016046047210693, + "rewards/margins": 31.313640594482422, + "rewards/rejected": -29.612030029296875, + "step": 5760 + }, + { + "epoch": 2.63, + "learning_rate": 4.0588533739218663e-08, + "logits/chosen": -2.196366786956787, + "logits/rejected": -1.783656358718872, + "logps/chosen": -81.48753356933594, + "logps/rejected": -127.0324478149414, + "loss": 0.0035, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7704079151153564, + "rewards/margins": 33.856910705566406, + "rewards/rejected": -32.08650207519531, + "step": 5770 + }, + { + "epoch": 2.64, + "learning_rate": 4.0081177067478437e-08, + "logits/chosen": -2.1401500701904297, + "logits/rejected": -1.800840139389038, + "logps/chosen": -90.09111785888672, + "logps/rejected": -134.8428955078125, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.179386854171753, + "rewards/margins": 33.37128448486328, + "rewards/rejected": -32.191898345947266, + "step": 5780 + }, + { + "epoch": 2.64, + "learning_rate": 3.95738203957382e-08, + "logits/chosen": -2.2394440174102783, + "logits/rejected": -1.904920220375061, + "logps/chosen": -87.17567443847656, + "logps/rejected": -130.36404418945312, + "loss": 0.0054, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.23616953194141388, + "rewards/margins": 31.9814453125, + "rewards/rejected": -31.74527931213379, + "step": 5790 + }, + { + "epoch": 2.65, + "learning_rate": 3.906646372399797e-08, + "logits/chosen": -2.1932971477508545, + "logits/rejected": -1.808300256729126, + "logps/chosen": -90.1788558959961, + "logps/rejected": -135.00550842285156, + "loss": 0.0055, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.4655330777168274, + "rewards/margins": 31.84645652770996, + "rewards/rejected": -31.380924224853516, + "step": 5800 + }, + { + "epoch": 2.65, + "eval_logits/chosen": -2.2272167205810547, + "eval_logits/rejected": -1.878515601158142, + "eval_logps/chosen": -87.6694564819336, + "eval_logps/rejected": -129.5568084716797, + "eval_loss": 0.005615293048322201, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.5805642604827881, + "eval_rewards/margins": 31.943510055541992, + "eval_rewards/rejected": -31.36294937133789, + "eval_runtime": 188.904, + "eval_samples_per_second": 15.151, + "eval_steps_per_second": 0.948, + "step": 5800 + }, + { + "epoch": 2.65, + "learning_rate": 3.8559107052257736e-08, + "logits/chosen": -2.2895407676696777, + "logits/rejected": -1.9646167755126953, + "logps/chosen": -84.39744567871094, + "logps/rejected": -128.11642456054688, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.45202645659446716, + "rewards/margins": 31.760883331298828, + "rewards/rejected": -32.21290969848633, + "step": 5810 + }, + { + "epoch": 2.66, + "learning_rate": 3.80517503805175e-08, + "logits/chosen": -2.2432637214660645, + "logits/rejected": -1.90109384059906, + "logps/chosen": -87.83378601074219, + "logps/rejected": -134.02479553222656, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.3190554678440094, + "rewards/margins": 32.41364669799805, + "rewards/rejected": -32.73270034790039, + "step": 5820 + }, + { + "epoch": 2.66, + "learning_rate": 3.754439370877727e-08, + "logits/chosen": -2.249756336212158, + "logits/rejected": -1.8411035537719727, + "logps/chosen": -92.8016128540039, + "logps/rejected": -141.8028106689453, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7895019054412842, + "rewards/margins": 35.841468811035156, + "rewards/rejected": -34.051971435546875, + "step": 5830 + }, + { + "epoch": 2.67, + "learning_rate": 3.7037037037037036e-08, + "logits/chosen": -2.2434568405151367, + "logits/rejected": -1.8818607330322266, + "logps/chosen": -85.84037780761719, + "logps/rejected": -130.4074249267578, + "loss": 0.0013, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.18663930892944336, + "rewards/margins": 32.276615142822266, + "rewards/rejected": -32.08997344970703, + "step": 5840 + }, + { + "epoch": 2.67, + "learning_rate": 3.65296803652968e-08, + "logits/chosen": -2.266979932785034, + "logits/rejected": -1.9421230554580688, + "logps/chosen": -88.89350128173828, + "logps/rejected": -130.6448516845703, + "loss": 0.0059, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.22369590401649475, + "rewards/margins": 30.364971160888672, + "rewards/rejected": -30.14127540588379, + "step": 5850 + }, + { + "epoch": 2.67, + "learning_rate": 3.602232369355657e-08, + "logits/chosen": -2.245170831680298, + "logits/rejected": -1.8216416835784912, + "logps/chosen": -89.65043640136719, + "logps/rejected": -136.75848388671875, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.8733927607536316, + "rewards/margins": 34.61988830566406, + "rewards/rejected": -33.746498107910156, + "step": 5860 + }, + { + "epoch": 2.68, + "learning_rate": 3.5514967021816336e-08, + "logits/chosen": -2.264880895614624, + "logits/rejected": -1.87582528591156, + "logps/chosen": -89.56029510498047, + "logps/rejected": -132.30987548828125, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9222901463508606, + "rewards/margins": 32.37554931640625, + "rewards/rejected": -31.453258514404297, + "step": 5870 + }, + { + "epoch": 2.68, + "learning_rate": 3.50076103500761e-08, + "logits/chosen": -2.2106261253356934, + "logits/rejected": -1.8250715732574463, + "logps/chosen": -86.20150756835938, + "logps/rejected": -131.51016235351562, + "loss": 0.0043, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.9935638904571533, + "rewards/margins": 32.885887145996094, + "rewards/rejected": -30.892318725585938, + "step": 5880 + }, + { + "epoch": 2.69, + "learning_rate": 3.450025367833587e-08, + "logits/chosen": -2.1880767345428467, + "logits/rejected": -1.850630521774292, + "logps/chosen": -92.76158142089844, + "logps/rejected": -135.90554809570312, + "loss": 0.008, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7444310188293457, + "rewards/margins": 32.634525299072266, + "rewards/rejected": -31.890094757080078, + "step": 5890 + }, + { + "epoch": 2.69, + "learning_rate": 3.3992897006595636e-08, + "logits/chosen": -2.2573089599609375, + "logits/rejected": -1.9743645191192627, + "logps/chosen": -89.33732604980469, + "logps/rejected": -133.85531616210938, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.07906317710876465, + "rewards/margins": 31.270915985107422, + "rewards/rejected": -31.191858291625977, + "step": 5900 + }, + { + "epoch": 2.69, + "eval_logits/chosen": -2.2270514965057373, + "eval_logits/rejected": -1.8773261308670044, + "eval_logps/chosen": -88.0054931640625, + "eval_logps/rejected": -130.35690307617188, + "eval_loss": 0.0056848106905817986, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.4125469923019409, + "eval_rewards/margins": 32.175540924072266, + "eval_rewards/rejected": -31.76299285888672, + "eval_runtime": 187.4914, + "eval_samples_per_second": 15.265, + "eval_steps_per_second": 0.955, + "step": 5900 + }, + { + "epoch": 2.7, + "learning_rate": 3.34855403348554e-08, + "logits/chosen": -2.249181032180786, + "logits/rejected": -1.8485383987426758, + "logps/chosen": -94.90681457519531, + "logps/rejected": -131.73171997070312, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9341878890991211, + "rewards/margins": 31.412506103515625, + "rewards/rejected": -30.478321075439453, + "step": 5910 + }, + { + "epoch": 2.7, + "learning_rate": 3.297818366311517e-08, + "logits/chosen": -2.2404422760009766, + "logits/rejected": -1.9138734340667725, + "logps/chosen": -84.72028350830078, + "logps/rejected": -135.90516662597656, + "loss": 0.0042, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.11444835364818573, + "rewards/margins": 33.07588577270508, + "rewards/rejected": -32.96143341064453, + "step": 5920 + }, + { + "epoch": 2.71, + "learning_rate": 3.2470826991374936e-08, + "logits/chosen": -2.2721214294433594, + "logits/rejected": -1.8678615093231201, + "logps/chosen": -89.56704711914062, + "logps/rejected": -135.8103790283203, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.21784238517284393, + "rewards/margins": 33.14167022705078, + "rewards/rejected": -32.923828125, + "step": 5930 + }, + { + "epoch": 2.71, + "learning_rate": 3.19634703196347e-08, + "logits/chosen": -2.2767839431762695, + "logits/rejected": -1.95871102809906, + "logps/chosen": -92.42479705810547, + "logps/rejected": -139.7154998779297, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.1621972620487213, + "rewards/margins": 33.40778350830078, + "rewards/rejected": -33.56998062133789, + "step": 5940 + }, + { + "epoch": 2.72, + "learning_rate": 3.145611364789447e-08, + "logits/chosen": -2.1546614170074463, + "logits/rejected": -1.7696462869644165, + "logps/chosen": -92.0521469116211, + "logps/rejected": -134.0331268310547, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3609464168548584, + "rewards/margins": 32.90177917480469, + "rewards/rejected": -31.540836334228516, + "step": 5950 + }, + { + "epoch": 2.72, + "learning_rate": 3.0948756976154236e-08, + "logits/chosen": -2.2606728076934814, + "logits/rejected": -1.9319846630096436, + "logps/chosen": -84.58219146728516, + "logps/rejected": -129.25262451171875, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3570373058319092, + "rewards/margins": 32.03596878051758, + "rewards/rejected": -30.678930282592773, + "step": 5960 + }, + { + "epoch": 2.73, + "learning_rate": 3.0441400304414e-08, + "logits/chosen": -2.2868599891662598, + "logits/rejected": -1.9675430059432983, + "logps/chosen": -84.88082122802734, + "logps/rejected": -130.11279296875, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.3799717426300049, + "rewards/margins": 32.025333404541016, + "rewards/rejected": -30.645360946655273, + "step": 5970 + }, + { + "epoch": 2.73, + "learning_rate": 2.993404363267377e-08, + "logits/chosen": -2.2390341758728027, + "logits/rejected": -1.9002273082733154, + "logps/chosen": -84.3361587524414, + "logps/rejected": -132.21054077148438, + "loss": 0.0015, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.16436767578125, + "rewards/margins": 31.722976684570312, + "rewards/rejected": -31.887344360351562, + "step": 5980 + }, + { + "epoch": 2.73, + "learning_rate": 2.9426686960933532e-08, + "logits/chosen": -2.199117660522461, + "logits/rejected": -1.7541431188583374, + "logps/chosen": -94.94574737548828, + "logps/rejected": -130.81954956054688, + "loss": 0.0046, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6142606735229492, + "rewards/margins": 33.10905456542969, + "rewards/rejected": -31.494792938232422, + "step": 5990 + }, + { + "epoch": 2.74, + "learning_rate": 2.89193302891933e-08, + "logits/chosen": -2.368619918823242, + "logits/rejected": -1.9851986169815063, + "logps/chosen": -90.95460510253906, + "logps/rejected": -135.75814819335938, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.2684900164604187, + "rewards/margins": 33.65724563598633, + "rewards/rejected": -33.388755798339844, + "step": 6000 + }, + { + "epoch": 2.74, + "eval_logits/chosen": -2.220628023147583, + "eval_logits/rejected": -1.8684388399124146, + "eval_logps/chosen": -87.90426635742188, + "eval_logps/rejected": -129.78916931152344, + "eval_loss": 0.005480717867612839, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.4631572663784027, + "eval_rewards/margins": 31.94228172302246, + "eval_rewards/rejected": -31.479124069213867, + "eval_runtime": 188.1705, + "eval_samples_per_second": 15.21, + "eval_steps_per_second": 0.951, + "step": 6000 + }, + { + "epoch": 2.74, + "learning_rate": 2.8411973617453066e-08, + "logits/chosen": -2.1487109661102295, + "logits/rejected": -1.817098617553711, + "logps/chosen": -88.42924499511719, + "logps/rejected": -132.3211212158203, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3025569915771484, + "rewards/margins": 33.558433532714844, + "rewards/rejected": -32.25587844848633, + "step": 6010 + }, + { + "epoch": 2.75, + "learning_rate": 2.7904616945712832e-08, + "logits/chosen": -2.2678167819976807, + "logits/rejected": -1.8667113780975342, + "logps/chosen": -86.5342025756836, + "logps/rejected": -129.55935668945312, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0008518695831299, + "rewards/margins": 32.452972412109375, + "rewards/rejected": -31.452117919921875, + "step": 6020 + }, + { + "epoch": 2.75, + "learning_rate": 2.73972602739726e-08, + "logits/chosen": -2.264303684234619, + "logits/rejected": -1.878883957862854, + "logps/chosen": -87.34925842285156, + "logps/rejected": -130.01307678222656, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.4018937945365906, + "rewards/margins": 32.04921340942383, + "rewards/rejected": -31.647314071655273, + "step": 6030 + }, + { + "epoch": 2.76, + "learning_rate": 2.6889903602232366e-08, + "logits/chosen": -2.2151219844818115, + "logits/rejected": -1.8500292301177979, + "logps/chosen": -89.37690734863281, + "logps/rejected": -132.4504852294922, + "loss": 0.0042, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.7266126871109009, + "rewards/margins": 33.85057067871094, + "rewards/rejected": -32.123958587646484, + "step": 6040 + }, + { + "epoch": 2.76, + "learning_rate": 2.6382546930492132e-08, + "logits/chosen": -2.3891372680664062, + "logits/rejected": -2.01668119430542, + "logps/chosen": -88.968017578125, + "logps/rejected": -134.05789184570312, + "loss": 0.0012, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.28615638613700867, + "rewards/margins": 31.448923110961914, + "rewards/rejected": -31.162769317626953, + "step": 6050 + }, + { + "epoch": 2.77, + "learning_rate": 2.58751902587519e-08, + "logits/chosen": -2.265878200531006, + "logits/rejected": -1.890244722366333, + "logps/chosen": -83.14924621582031, + "logps/rejected": -128.8566436767578, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.234644889831543, + "rewards/margins": 32.04364776611328, + "rewards/rejected": -30.80900001525879, + "step": 6060 + }, + { + "epoch": 2.77, + "learning_rate": 2.5367833587011665e-08, + "logits/chosen": -2.3138954639434814, + "logits/rejected": -1.8876469135284424, + "logps/chosen": -96.02142333984375, + "logps/rejected": -132.93771362304688, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.01418936252594, + "rewards/margins": 31.96274185180664, + "rewards/rejected": -30.948551177978516, + "step": 6070 + }, + { + "epoch": 2.78, + "learning_rate": 2.4860476915271432e-08, + "logits/chosen": -2.2674362659454346, + "logits/rejected": -1.8280704021453857, + "logps/chosen": -91.15046691894531, + "logps/rejected": -131.0101318359375, + "loss": 0.0032, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.064391613006592, + "rewards/margins": 34.05684280395508, + "rewards/rejected": -31.99245262145996, + "step": 6080 + }, + { + "epoch": 2.78, + "learning_rate": 2.43531202435312e-08, + "logits/chosen": -2.2650763988494873, + "logits/rejected": -1.9617116451263428, + "logps/chosen": -81.56208801269531, + "logps/rejected": -131.12905883789062, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9505952000617981, + "rewards/margins": 32.1887092590332, + "rewards/rejected": -31.238117218017578, + "step": 6090 + }, + { + "epoch": 2.78, + "learning_rate": 2.3845763571790965e-08, + "logits/chosen": -2.2631096839904785, + "logits/rejected": -1.8503172397613525, + "logps/chosen": -93.38179016113281, + "logps/rejected": -127.70606994628906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.097931981086731, + "rewards/margins": 31.50238037109375, + "rewards/rejected": -30.40444564819336, + "step": 6100 + }, + { + "epoch": 2.78, + "eval_logits/chosen": -2.2261240482330322, + "eval_logits/rejected": -1.877672791481018, + "eval_logps/chosen": -87.89844512939453, + "eval_logps/rejected": -130.31732177734375, + "eval_loss": 0.005521238315850496, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.46606892347335815, + "eval_rewards/margins": 32.209266662597656, + "eval_rewards/rejected": -31.743196487426758, + "eval_runtime": 230.5005, + "eval_samples_per_second": 12.416, + "eval_steps_per_second": 0.777, + "step": 6100 + }, + { + "epoch": 2.79, + "learning_rate": 2.3338406900050732e-08, + "logits/chosen": -2.156193494796753, + "logits/rejected": -1.7497676610946655, + "logps/chosen": -88.01410675048828, + "logps/rejected": -129.71466064453125, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.868211567401886, + "rewards/margins": 32.48591995239258, + "rewards/rejected": -31.61771011352539, + "step": 6110 + }, + { + "epoch": 2.79, + "learning_rate": 2.28310502283105e-08, + "logits/chosen": -2.27543306350708, + "logits/rejected": -1.8602546453475952, + "logps/chosen": -87.84466552734375, + "logps/rejected": -132.81777954101562, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.908778429031372, + "rewards/margins": 33.64677810668945, + "rewards/rejected": -31.73800277709961, + "step": 6120 + }, + { + "epoch": 2.8, + "learning_rate": 2.2323693556570265e-08, + "logits/chosen": -2.2359254360198975, + "logits/rejected": -1.9188182353973389, + "logps/chosen": -83.94422149658203, + "logps/rejected": -129.99581909179688, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0997272729873657, + "rewards/margins": 33.11286163330078, + "rewards/rejected": -32.01313400268555, + "step": 6130 + }, + { + "epoch": 2.8, + "learning_rate": 2.1816336884830032e-08, + "logits/chosen": -2.220909357070923, + "logits/rejected": -1.9032386541366577, + "logps/chosen": -84.5809555053711, + "logps/rejected": -131.4840545654297, + "loss": 0.0023, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.0809751749038696, + "rewards/margins": 32.72708511352539, + "rewards/rejected": -31.6461124420166, + "step": 6140 + }, + { + "epoch": 2.81, + "learning_rate": 2.13089802130898e-08, + "logits/chosen": -2.242598056793213, + "logits/rejected": -1.8045127391815186, + "logps/chosen": -88.67266082763672, + "logps/rejected": -134.9513397216797, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.345744013786316, + "rewards/margins": 35.6363410949707, + "rewards/rejected": -34.29059600830078, + "step": 6150 + }, + { + "epoch": 2.81, + "learning_rate": 2.0801623541349565e-08, + "logits/chosen": -2.2422022819519043, + "logits/rejected": -1.8511192798614502, + "logps/chosen": -89.98072052001953, + "logps/rejected": -134.96629333496094, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8219916820526123, + "rewards/margins": 34.537723541259766, + "rewards/rejected": -32.71573257446289, + "step": 6160 + }, + { + "epoch": 2.82, + "learning_rate": 2.0294266869609332e-08, + "logits/chosen": -2.2780232429504395, + "logits/rejected": -1.8996295928955078, + "logps/chosen": -90.99410247802734, + "logps/rejected": -132.14706420898438, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.23341834545135498, + "rewards/margins": 32.95896530151367, + "rewards/rejected": -32.725547790527344, + "step": 6170 + }, + { + "epoch": 2.82, + "learning_rate": 1.97869101978691e-08, + "logits/chosen": -2.265056610107422, + "logits/rejected": -1.8394749164581299, + "logps/chosen": -93.9018783569336, + "logps/rejected": -132.54859924316406, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5913444757461548, + "rewards/margins": 33.28534698486328, + "rewards/rejected": -32.694007873535156, + "step": 6180 + }, + { + "epoch": 2.83, + "learning_rate": 1.9279553526128868e-08, + "logits/chosen": -2.234004259109497, + "logits/rejected": -1.9320383071899414, + "logps/chosen": -80.52074432373047, + "logps/rejected": -131.88790893554688, + "loss": 0.0022, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.3767390549182892, + "rewards/margins": 33.22993087768555, + "rewards/rejected": -32.85319137573242, + "step": 6190 + }, + { + "epoch": 2.83, + "learning_rate": 1.8772196854388635e-08, + "logits/chosen": -2.2175235748291016, + "logits/rejected": -1.8579527139663696, + "logps/chosen": -86.18971252441406, + "logps/rejected": -130.14015197753906, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.16683992743492126, + "rewards/margins": 31.512447357177734, + "rewards/rejected": -31.67928695678711, + "step": 6200 + }, + { + "epoch": 2.83, + "eval_logits/chosen": -2.2242860794067383, + "eval_logits/rejected": -1.8746119737625122, + "eval_logps/chosen": -87.8666763305664, + "eval_logps/rejected": -130.38250732421875, + "eval_loss": 0.005393806379288435, + "eval_rewards/accuracies": 0.9972066879272461, + "eval_rewards/chosen": 0.48195433616638184, + "eval_rewards/margins": 32.25774383544922, + "eval_rewards/rejected": -31.775789260864258, + "eval_runtime": 225.1441, + "eval_samples_per_second": 12.712, + "eval_steps_per_second": 0.795, + "step": 6200 + }, + { + "epoch": 2.83, + "learning_rate": 1.82648401826484e-08, + "logits/chosen": -2.2525382041931152, + "logits/rejected": -1.9172885417938232, + "logps/chosen": -85.48805236816406, + "logps/rejected": -133.95077514648438, + "loss": 0.0054, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1090790256857872, + "rewards/margins": 32.306297302246094, + "rewards/rejected": -32.19722366333008, + "step": 6210 + }, + { + "epoch": 2.84, + "learning_rate": 1.7757483510908168e-08, + "logits/chosen": -2.1963915824890137, + "logits/rejected": -1.8396198749542236, + "logps/chosen": -87.08211517333984, + "logps/rejected": -136.7283477783203, + "loss": 0.0023, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.9704242944717407, + "rewards/margins": 33.93294906616211, + "rewards/rejected": -32.96253204345703, + "step": 6220 + }, + { + "epoch": 2.84, + "learning_rate": 1.7250126839167935e-08, + "logits/chosen": -2.220945358276367, + "logits/rejected": -1.8874809741973877, + "logps/chosen": -83.79931640625, + "logps/rejected": -129.1210174560547, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5613209009170532, + "rewards/margins": 32.765995025634766, + "rewards/rejected": -32.204673767089844, + "step": 6230 + }, + { + "epoch": 2.85, + "learning_rate": 1.67427701674277e-08, + "logits/chosen": -2.2060558795928955, + "logits/rejected": -1.8112850189208984, + "logps/chosen": -87.95478820800781, + "logps/rejected": -132.51715087890625, + "loss": 0.0023, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4738547801971436, + "rewards/margins": 34.16747283935547, + "rewards/rejected": -32.69361877441406, + "step": 6240 + }, + { + "epoch": 2.85, + "learning_rate": 1.6235413495687468e-08, + "logits/chosen": -2.1749913692474365, + "logits/rejected": -1.8666985034942627, + "logps/chosen": -88.18854522705078, + "logps/rejected": -133.01121520996094, + "loss": 0.0024, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7993472218513489, + "rewards/margins": 31.564916610717773, + "rewards/rejected": -30.765567779541016, + "step": 6250 + }, + { + "epoch": 2.86, + "learning_rate": 1.5728056823947235e-08, + "logits/chosen": -2.161698341369629, + "logits/rejected": -1.7702264785766602, + "logps/chosen": -83.78431701660156, + "logps/rejected": -125.90284729003906, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.6891142129898071, + "rewards/margins": 30.822668075561523, + "rewards/rejected": -30.133554458618164, + "step": 6260 + }, + { + "epoch": 2.86, + "learning_rate": 1.5220700152207e-08, + "logits/chosen": -2.3119311332702637, + "logits/rejected": -1.9453165531158447, + "logps/chosen": -87.14160919189453, + "logps/rejected": -134.0089569091797, + "loss": 0.0014, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7904703617095947, + "rewards/margins": 33.403202056884766, + "rewards/rejected": -32.61273193359375, + "step": 6270 + }, + { + "epoch": 2.87, + "learning_rate": 1.4713343480466766e-08, + "logits/chosen": -2.2479605674743652, + "logits/rejected": -1.8846473693847656, + "logps/chosen": -94.47314453125, + "logps/rejected": -134.31393432617188, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.504614531993866, + "rewards/margins": 31.21249008178711, + "rewards/rejected": -31.71710205078125, + "step": 6280 + }, + { + "epoch": 2.87, + "learning_rate": 1.4205986808726533e-08, + "logits/chosen": -2.1867895126342773, + "logits/rejected": -1.8338983058929443, + "logps/chosen": -86.80367279052734, + "logps/rejected": -134.02243041992188, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9545267820358276, + "rewards/margins": 32.74871826171875, + "rewards/rejected": -31.794189453125, + "step": 6290 + }, + { + "epoch": 2.88, + "learning_rate": 1.36986301369863e-08, + "logits/chosen": -2.277268648147583, + "logits/rejected": -1.94185471534729, + "logps/chosen": -86.12911224365234, + "logps/rejected": -135.1995849609375, + "loss": 0.0017, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7337052822113037, + "rewards/margins": 33.44802474975586, + "rewards/rejected": -32.71432113647461, + "step": 6300 + }, + { + "epoch": 2.88, + "eval_logits/chosen": -2.2262394428253174, + "eval_logits/rejected": -1.8772982358932495, + "eval_logps/chosen": -87.41510772705078, + "eval_logps/rejected": -129.4889373779297, + "eval_loss": 0.005471652373671532, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.7077398300170898, + "eval_rewards/margins": 32.0367431640625, + "eval_rewards/rejected": -31.329004287719727, + "eval_runtime": 218.9647, + "eval_samples_per_second": 13.071, + "eval_steps_per_second": 0.817, + "step": 6300 + }, + { + "epoch": 2.88, + "learning_rate": 1.3191273465246066e-08, + "logits/chosen": -2.2694547176361084, + "logits/rejected": -1.8749288320541382, + "logps/chosen": -86.23151397705078, + "logps/rejected": -126.11222076416016, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5606023073196411, + "rewards/margins": 32.055213928222656, + "rewards/rejected": -30.49460792541504, + "step": 6310 + }, + { + "epoch": 2.88, + "learning_rate": 1.2683916793505833e-08, + "logits/chosen": -2.271695375442505, + "logits/rejected": -1.8720725774765015, + "logps/chosen": -88.22488403320312, + "logps/rejected": -133.49237060546875, + "loss": 0.0033, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0623927116394043, + "rewards/margins": 33.233009338378906, + "rewards/rejected": -32.170616149902344, + "step": 6320 + }, + { + "epoch": 2.89, + "learning_rate": 1.21765601217656e-08, + "logits/chosen": -2.2026755809783936, + "logits/rejected": -1.8335577249526978, + "logps/chosen": -86.81916809082031, + "logps/rejected": -130.44638061523438, + "loss": 0.0064, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3268142938613892, + "rewards/margins": 32.48900604248047, + "rewards/rejected": -31.16219139099121, + "step": 6330 + }, + { + "epoch": 2.89, + "learning_rate": 1.1669203450025366e-08, + "logits/chosen": -2.2959322929382324, + "logits/rejected": -1.9061601161956787, + "logps/chosen": -92.18937683105469, + "logps/rejected": -137.560302734375, + "loss": 0.0043, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.8249843120574951, + "rewards/margins": 33.91102600097656, + "rewards/rejected": -32.08604049682617, + "step": 6340 + }, + { + "epoch": 2.9, + "learning_rate": 1.1161846778285133e-08, + "logits/chosen": -2.2622501850128174, + "logits/rejected": -1.8619012832641602, + "logps/chosen": -87.53951263427734, + "logps/rejected": -131.9397430419922, + "loss": 0.0025, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.8194286227226257, + "rewards/margins": 32.57026290893555, + "rewards/rejected": -31.750835418701172, + "step": 6350 + }, + { + "epoch": 2.9, + "learning_rate": 1.06544901065449e-08, + "logits/chosen": -2.26688814163208, + "logits/rejected": -1.9139961004257202, + "logps/chosen": -85.9757308959961, + "logps/rejected": -134.81204223632812, + "loss": 0.0031, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.7707549333572388, + "rewards/margins": 33.59237289428711, + "rewards/rejected": -32.821617126464844, + "step": 6360 + }, + { + "epoch": 2.91, + "learning_rate": 1.0147133434804666e-08, + "logits/chosen": -2.1150248050689697, + "logits/rejected": -1.7503303289413452, + "logps/chosen": -88.97874450683594, + "logps/rejected": -134.1490478515625, + "loss": 0.0085, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2682693004608154, + "rewards/margins": 32.085575103759766, + "rewards/rejected": -30.817302703857422, + "step": 6370 + }, + { + "epoch": 2.91, + "learning_rate": 9.639776763064434e-09, + "logits/chosen": -2.1985535621643066, + "logits/rejected": -1.8416798114776611, + "logps/chosen": -83.56170654296875, + "logps/rejected": -127.2623291015625, + "loss": 0.0052, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.5547320246696472, + "rewards/margins": 31.148799896240234, + "rewards/rejected": -30.594066619873047, + "step": 6380 + }, + { + "epoch": 2.92, + "learning_rate": 9.1324200913242e-09, + "logits/chosen": -2.1735215187072754, + "logits/rejected": -1.806305170059204, + "logps/chosen": -85.32138061523438, + "logps/rejected": -126.041015625, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6665178537368774, + "rewards/margins": 31.99408531188965, + "rewards/rejected": -30.327566146850586, + "step": 6390 + }, + { + "epoch": 2.92, + "learning_rate": 8.625063419583967e-09, + "logits/chosen": -2.2531039714813232, + "logits/rejected": -1.8883110284805298, + "logps/chosen": -89.10762023925781, + "logps/rejected": -135.33935546875, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 0.8338155746459961, + "rewards/margins": 32.7072868347168, + "rewards/rejected": -31.873470306396484, + "step": 6400 + }, + { + "epoch": 2.92, + "eval_logits/chosen": -2.227215051651001, + "eval_logits/rejected": -1.8791638612747192, + "eval_logps/chosen": -87.51187133789062, + "eval_logps/rejected": -129.8273468017578, + "eval_loss": 0.005407842341810465, + "eval_rewards/accuracies": 0.994413435459137, + "eval_rewards/chosen": 0.6593578457832336, + "eval_rewards/margins": 32.15756607055664, + "eval_rewards/rejected": -31.498210906982422, + "eval_runtime": 185.4523, + "eval_samples_per_second": 15.433, + "eval_steps_per_second": 0.965, + "step": 6400 + }, + { + "epoch": 2.93, + "learning_rate": 8.117706747843734e-09, + "logits/chosen": -2.1412510871887207, + "logits/rejected": -1.8283464908599854, + "logps/chosen": -90.0947265625, + "logps/rejected": -132.66854858398438, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.1616397649049759, + "rewards/margins": 31.50514793395996, + "rewards/rejected": -31.3435115814209, + "step": 6410 + }, + { + "epoch": 2.93, + "learning_rate": 7.6103500761035e-09, + "logits/chosen": -2.265721321105957, + "logits/rejected": -1.9056390523910522, + "logps/chosen": -87.0054931640625, + "logps/rejected": -132.97547912597656, + "loss": 0.0044, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 1.0425851345062256, + "rewards/margins": 32.99910354614258, + "rewards/rejected": -31.95652198791504, + "step": 6420 + }, + { + "epoch": 2.94, + "learning_rate": 7.1029934043632664e-09, + "logits/chosen": -2.2558608055114746, + "logits/rejected": -1.8824846744537354, + "logps/chosen": -86.92906188964844, + "logps/rejected": -135.2639617919922, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9257342219352722, + "rewards/margins": 33.13534927368164, + "rewards/rejected": -32.209617614746094, + "step": 6430 + }, + { + "epoch": 2.94, + "learning_rate": 6.595636732623033e-09, + "logits/chosen": -2.2064738273620605, + "logits/rejected": -1.859531044960022, + "logps/chosen": -87.9058837890625, + "logps/rejected": -133.92010498046875, + "loss": 0.0034, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3929450511932373, + "rewards/margins": 33.6112174987793, + "rewards/rejected": -32.2182731628418, + "step": 6440 + }, + { + "epoch": 2.94, + "learning_rate": 6.0882800608828e-09, + "logits/chosen": -2.2518255710601807, + "logits/rejected": -1.9297151565551758, + "logps/chosen": -82.05953979492188, + "logps/rejected": -129.396240234375, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9110031127929688, + "rewards/margins": 31.361520767211914, + "rewards/rejected": -30.450519561767578, + "step": 6450 + }, + { + "epoch": 2.95, + "learning_rate": 5.580923389142566e-09, + "logits/chosen": -2.0963399410247803, + "logits/rejected": -1.7235018014907837, + "logps/chosen": -89.75813293457031, + "logps/rejected": -132.9015350341797, + "loss": 0.0033, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": 2.336916923522949, + "rewards/margins": 32.56396484375, + "rewards/rejected": -30.2270450592041, + "step": 6460 + }, + { + "epoch": 2.95, + "learning_rate": 5.073566717402333e-09, + "logits/chosen": -2.23740816116333, + "logits/rejected": -1.8160254955291748, + "logps/chosen": -97.89299774169922, + "logps/rejected": -127.76078796386719, + "loss": 0.0022, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.258462905883789, + "rewards/margins": 32.506343841552734, + "rewards/rejected": -31.247879028320312, + "step": 6470 + }, + { + "epoch": 2.96, + "learning_rate": 4.5662100456621e-09, + "logits/chosen": -2.185804843902588, + "logits/rejected": -1.8170665502548218, + "logps/chosen": -86.79978942871094, + "logps/rejected": -129.39993286132812, + "loss": 0.0044, + "rewards/accuracies": 1.0, + "rewards/chosen": 2.790616989135742, + "rewards/margins": 33.8178825378418, + "rewards/rejected": -31.027271270751953, + "step": 6480 + }, + { + "epoch": 2.96, + "learning_rate": 4.058853373921867e-09, + "logits/chosen": -2.232938766479492, + "logits/rejected": -1.811173439025879, + "logps/chosen": -90.50852966308594, + "logps/rejected": -131.22023010253906, + "loss": 0.0001, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5173299312591553, + "rewards/margins": 34.080116271972656, + "rewards/rejected": -32.562782287597656, + "step": 6490 + }, + { + "epoch": 2.97, + "learning_rate": 3.5514967021816332e-09, + "logits/chosen": -2.2670958042144775, + "logits/rejected": -1.914764165878296, + "logps/chosen": -91.02118682861328, + "logps/rejected": -133.01803588867188, + "loss": 0.0011, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9153534173965454, + "rewards/margins": 31.895156860351562, + "rewards/rejected": -30.979806900024414, + "step": 6500 + }, + { + "epoch": 2.97, + "eval_logits/chosen": -2.227478265762329, + "eval_logits/rejected": -1.8803960084915161, + "eval_logps/chosen": -87.42868041992188, + "eval_logps/rejected": -129.57211303710938, + "eval_loss": 0.005446174647659063, + "eval_rewards/accuracies": 0.9916201233863831, + "eval_rewards/chosen": 0.7009533643722534, + "eval_rewards/margins": 32.07155227661133, + "eval_rewards/rejected": -31.37059211730957, + "eval_runtime": 193.9275, + "eval_samples_per_second": 14.758, + "eval_steps_per_second": 0.923, + "step": 6500 + } + ], + "logging_steps": 10, + "max_steps": 6570, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..15780fa --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34b8889cfe0054a58b64fadecfbb2df66a951d3e88dc300062b95218e48206df +size 5688 diff --git a/zero_to_fp32.py b/zero_to_fp32.py new file mode 100644 index 0000000..c98caae --- /dev/null +++ b/zero_to_fp32.py @@ -0,0 +1,587 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: python zero_to_fp32.py . pytorch_model.bin + +import argparse +import torch +import glob +import math +import os +import re +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + + total_files = len(files) + state_dicts = [] + for f in files: + state_dict = torch.load(f, map_location=device) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + if zero_stage <= 2: + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + elif zero_stage == 3: + # if there is more than one param group, there will be multiple flattened tensors - one + # flattened tensor per group - for simplicity merge them into a single tensor + # + # XXX: could make the script more memory efficient for when there are multiple groups - it + # will require matching the sub-lists of param_shapes for each param group flattened tensor + + fp32_flat_groups = [ + torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts)) + ] + + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = fp32_flat_groups[0].numel() * world_size + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + for name, shape in param_shapes.items(): + + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # XXX: memory usage doubles here + state_dict[name] = torch.cat( + tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)), + 0).narrow(0, 0, unpartitioned_numel).view(shape) + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + + Returns: + - pytorch ``state_dict`` + + Note: this approach may not work if your application doesn't have sufficient free CPU memory and + you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_file, tag=None): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_file``: path to the pytorch fp32 state_dict output file (e.g. path/pytorch_model.bin) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + """ + + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + print(f"Saving fp32 state dict to {output_file}") + torch.save(state_dict, output_file) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument( + "output_file", + type=str, + help="path to the pytorch fp32 state_dict output file (e.g. path/checkpoint-12/pytorch_model.bin)") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, args.output_file, tag=args.tag)