commit 7a74f79f49ab3fc40efb945f2d0f59fdb718f83c Author: ModelHub XC Date: Fri Jun 5 12:06:16 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: vukien2301/llama-3.1-8b-ultrafeedback-dpo-from-epoch1 Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..5695879 --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +--- +library_name: transformers +base_model: vukien2301/llama-3.1-8b-deita-sft-teacher-epoch1 +tags: +- alignment-handbook +- generated_from_trainer +datasets: +- pvdhihihi/ultra-feedback +model-index: +- name: dpo_teacher_epoch1 + results: [] +--- + + + +# dpo_teacher_epoch1 + +This model is a fine-tuned version of [/home/minchan.kwon/ADPA/model/llama3.2-1b-deita-dpomix/ref_teacher_3epochs/checkpoint-191](https://huggingface.co//home/minchan.kwon/ADPA/model/llama3.2-1b-deita-dpomix/ref_teacher_3epochs/checkpoint-191) on the pvdhihihi/ultra-feedback dataset. + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 7e-07 +- train_batch_size: 32 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- total_train_batch_size: 256 +- total_eval_batch_size: 64 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: constant +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + + + +### Framework versions + +- Transformers 4.49.0 +- Pytorch 2.5.1+cu124 +- Datasets 4.8.5 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..e1bde33 --- /dev/null +++ b/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 0.5733826223727876, + "train_runtime": 2164.3688, + "train_samples": 57673, + "train_samples_per_second": 26.647, + "train_steps_per_second": 0.104 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..1d1f530 --- /dev/null +++ b/config.json @@ -0,0 +1,36 @@ +{ + "_name_or_path": "/home/minchan.kwon/ADPA/model/llama3.2-1b-deita-dpomix/ref_teacher_3epochs/checkpoint-191", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.49.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..655c279 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.49.0" +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..16e8621 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c23f1445f142fa191e1d4293478d28f6ed5812cf7008480554a2ede0549c736 +size 4976698672 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..9ad774f --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58ed8e29cb222ba7e931b30949b800215199a8ac434e3aea0ac56fd3269d24c0 +size 4999802720 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..cceaed7 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378efb2957516b2a5abcccebd0260ecd67c2f974a9ade5a62bdfe887a58f2cad +size 4915916176 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..3e54108 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:648f7f560d490083c8ab2f01df863e0b5c32ec77b14cd56164d27cff4990f42b +size 1168138808 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0fd8120 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..251b69a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..e1bde33 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 0.5733826223727876, + "train_runtime": 2164.3688, + "train_samples": 57673, + "train_samples_per_second": 26.647, + "train_steps_per_second": 0.104 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..05575af --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,3884 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 226, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.004424778761061947, + "grad_norm": 2.562241554260254, + "learning_rate": 7e-07, + "logits/chosen": -0.2119140625, + "logits/rejected": -0.1328125, + "logps/chosen": -242.0, + "logps/rejected": -178.0, + "loss": 0.6914, + "loss/chosen-sft": 1.0, + "loss/dpo": 0.69140625, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.008849557522123894, + "grad_norm": 2.733123302459717, + "learning_rate": 7e-07, + "logits/chosen": -0.263671875, + "logits/rejected": -0.19140625, + "logps/chosen": -225.0, + "logps/rejected": -175.0, + "loss": 0.6929, + "loss/chosen-sft": 1.09375, + "loss/dpo": 0.69140625, + "rewards/accuracies": 0.3125, + "rewards/chosen": 0.00019550323486328125, + "rewards/margins": -0.00183868408203125, + "rewards/rejected": 0.0020294189453125, + "step": 2 + }, + { + "epoch": 0.01327433628318584, + "grad_norm": 5.071370601654053, + "learning_rate": 7e-07, + "logits/chosen": -0.287109375, + "logits/rejected": -0.1796875, + "logps/chosen": -258.0, + "logps/rejected": -195.0, + "loss": 0.6914, + "loss/chosen-sft": 1.1171875, + "loss/dpo": 0.69140625, + "rewards/accuracies": 0.21875, + "rewards/chosen": -0.000743865966796875, + "rewards/margins": -3.910064697265625e-05, + "rewards/rejected": -0.000701904296875, + "step": 3 + }, + { + "epoch": 0.017699115044247787, + "grad_norm": 9.871452331542969, + "learning_rate": 7e-07, + "logits/chosen": -0.294921875, + "logits/rejected": -0.310546875, + "logps/chosen": -270.0, + "logps/rejected": -236.0, + "loss": 0.6914, + "loss/chosen-sft": 1.09375, + "loss/dpo": 0.69140625, + "rewards/accuracies": 0.375, + "rewards/chosen": -0.0017242431640625, + "rewards/margins": 0.002197265625, + "rewards/rejected": -0.00390625, + "step": 4 + }, + { + "epoch": 0.022123893805309734, + "grad_norm": 8.816597938537598, + "learning_rate": 7e-07, + "logits/chosen": -0.1328125, + "logits/rejected": -0.2421875, + "logps/chosen": -280.0, + "logps/rejected": -249.0, + "loss": 0.6914, + "loss/chosen-sft": 1.1484375, + "loss/dpo": 0.6953125, + "rewards/accuracies": 0.3125, + "rewards/chosen": -0.00156402587890625, + "rewards/margins": -0.0022735595703125, + "rewards/rejected": 0.000705718994140625, + "step": 5 + }, + { + "epoch": 0.02654867256637168, + "grad_norm": 8.637555122375488, + "learning_rate": 7e-07, + "logits/chosen": -0.12890625, + "logits/rejected": -0.1611328125, + "logps/chosen": -223.0, + "logps/rejected": -172.0, + "loss": 0.6895, + "loss/chosen-sft": 1.0, + "loss/dpo": 0.69140625, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.005889892578125, + "rewards/margins": 0.006195068359375, + "rewards/rejected": -0.00031280517578125, + "step": 6 + }, + { + "epoch": 0.030973451327433628, + "grad_norm": 4.954357147216797, + "learning_rate": 7e-07, + "logits/chosen": -0.27734375, + "logits/rejected": -0.125, + "logps/chosen": -312.0, + "logps/rejected": -228.0, + "loss": 0.6899, + "loss/chosen-sft": 1.078125, + "loss/dpo": 0.69140625, + "rewards/accuracies": 0.40625, + "rewards/chosen": 0.00113677978515625, + "rewards/margins": 0.0034027099609375, + "rewards/rejected": -0.0022735595703125, + "step": 7 + }, + { + "epoch": 0.035398230088495575, + "grad_norm": 7.5598249435424805, + "learning_rate": 7e-07, + "logits/chosen": -0.095703125, + "logits/rejected": -0.1416015625, + "logps/chosen": -240.0, + "logps/rejected": -233.0, + "loss": 0.6875, + "loss/chosen-sft": 1.0546875, + "loss/dpo": 0.6875, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.006988525390625, + "rewards/margins": 0.01007080078125, + "rewards/rejected": -0.003082275390625, + "step": 8 + }, + { + "epoch": 0.03982300884955752, + "grad_norm": 2.6033551692962646, + "learning_rate": 7e-07, + "logits/chosen": -0.294921875, + "logits/rejected": -0.28125, + "logps/chosen": -292.0, + "logps/rejected": -212.0, + "loss": 0.6885, + "loss/chosen-sft": 1.15625, + "loss/dpo": 0.6875, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.00665283203125, + "rewards/margins": 0.0135498046875, + "rewards/rejected": -0.00689697265625, + "step": 9 + }, + { + "epoch": 0.04424778761061947, + "grad_norm": 10.546786308288574, + "learning_rate": 7e-07, + "logits/chosen": -0.267578125, + "logits/rejected": -0.050537109375, + "logps/chosen": -308.0, + "logps/rejected": -210.0, + "loss": 0.687, + "loss/chosen-sft": 1.078125, + "loss/dpo": 0.68359375, + "rewards/accuracies": 0.6875, + "rewards/chosen": 0.0142822265625, + "rewards/margins": 0.02099609375, + "rewards/rejected": -0.006683349609375, + "step": 10 + }, + { + "epoch": 0.048672566371681415, + "grad_norm": 9.844188690185547, + "learning_rate": 7e-07, + "logits/chosen": -0.3046875, + "logits/rejected": -0.177734375, + "logps/chosen": -243.0, + "logps/rejected": -234.0, + "loss": 0.687, + "loss/chosen-sft": 1.1796875, + "loss/dpo": 0.68359375, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.01025390625, + "rewards/margins": 0.016357421875, + "rewards/rejected": -0.006103515625, + "step": 11 + }, + { + "epoch": 0.05309734513274336, + "grad_norm": 12.557535171508789, + "learning_rate": 7e-07, + "logits/chosen": -0.2265625, + "logits/rejected": -0.2275390625, + "logps/chosen": -244.0, + "logps/rejected": -219.0, + "loss": 0.686, + "loss/chosen-sft": 1.078125, + "loss/dpo": 0.6875, + "rewards/accuracies": 0.5625, + "rewards/chosen": 0.0025177001953125, + "rewards/margins": 0.0113525390625, + "rewards/rejected": -0.0087890625, + "step": 12 + }, + { + "epoch": 0.05752212389380531, + "grad_norm": 8.109821319580078, + "learning_rate": 7e-07, + "logits/chosen": -0.1953125, + "logits/rejected": -0.224609375, + "logps/chosen": -272.0, + "logps/rejected": -217.0, + "loss": 0.6836, + "loss/chosen-sft": 1.03125, + "loss/dpo": 0.68359375, + "rewards/accuracies": 0.65625, + "rewards/chosen": 0.00726318359375, + "rewards/margins": 0.0164794921875, + "rewards/rejected": -0.00921630859375, + "step": 13 + }, + { + "epoch": 0.061946902654867256, + "grad_norm": 8.9277982711792, + "learning_rate": 7e-07, + "logits/chosen": -0.19140625, + "logits/rejected": -0.2412109375, + "logps/chosen": -294.0, + "logps/rejected": -198.0, + "loss": 0.6816, + "loss/chosen-sft": 1.109375, + "loss/dpo": 0.6796875, + "rewards/accuracies": 0.75, + "rewards/chosen": 0.007171630859375, + "rewards/margins": 0.025146484375, + "rewards/rejected": -0.0179443359375, + "step": 14 + }, + { + "epoch": 0.06637168141592921, + "grad_norm": 3.4456562995910645, + "learning_rate": 7e-07, + "logits/chosen": -0.07470703125, + "logits/rejected": -0.0712890625, + "logps/chosen": -239.0, + "logps/rejected": -227.0, + "loss": 0.6826, + "loss/chosen-sft": 0.93359375, + "loss/dpo": 0.6796875, + "rewards/accuracies": 0.59375, + "rewards/chosen": 0.00970458984375, + "rewards/margins": 0.0260009765625, + "rewards/rejected": -0.016357421875, + "step": 15 + }, + { + "epoch": 0.07079646017699115, + "grad_norm": 3.63268780708313, + "learning_rate": 7e-07, + "logits/chosen": -0.244140625, + "logits/rejected": -0.255859375, + "logps/chosen": -264.0, + "logps/rejected": -208.0, + "loss": 0.6826, + "loss/chosen-sft": 1.0, + "loss/dpo": 0.6796875, + "rewards/accuracies": 0.53125, + "rewards/chosen": 0.007537841796875, + "rewards/margins": 0.03173828125, + "rewards/rejected": -0.0242919921875, + "step": 16 + }, + { + "epoch": 0.0752212389380531, + "grad_norm": 14.086406707763672, + "learning_rate": 7e-07, + "logits/chosen": -0.30078125, + "logits/rejected": -0.271484375, + "logps/chosen": -214.0, + "logps/rejected": -197.0, + "loss": 0.6807, + "loss/chosen-sft": 0.9296875, + "loss/dpo": 0.6875, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.004791259765625, + "rewards/margins": 0.01422119140625, + "rewards/rejected": -0.0189208984375, + "step": 17 + }, + { + "epoch": 0.07964601769911504, + "grad_norm": 3.81648850440979, + "learning_rate": 7e-07, + "logits/chosen": -0.1435546875, + "logits/rejected": -0.1494140625, + "logps/chosen": -268.0, + "logps/rejected": -223.0, + "loss": 0.6797, + "loss/chosen-sft": 1.0, + "loss/dpo": 0.6796875, + "rewards/accuracies": 0.65625, + "rewards/chosen": 0.0057373046875, + "rewards/margins": 0.02734375, + "rewards/rejected": -0.0216064453125, + "step": 18 + }, + { + "epoch": 0.084070796460177, + "grad_norm": 3.1519317626953125, + "learning_rate": 7e-07, + "logits/chosen": -0.2109375, + "logits/rejected": -0.1220703125, + "logps/chosen": -247.0, + "logps/rejected": -236.0, + "loss": 0.6748, + "loss/chosen-sft": 1.0625, + "loss/dpo": 0.67578125, + "rewards/accuracies": 0.65625, + "rewards/chosen": 0.004730224609375, + "rewards/margins": 0.04052734375, + "rewards/rejected": -0.035888671875, + "step": 19 + }, + { + "epoch": 0.08849557522123894, + "grad_norm": 8.66562271118164, + "learning_rate": 7e-07, + "logits/chosen": -0.1982421875, + "logits/rejected": -0.1904296875, + "logps/chosen": -304.0, + "logps/rejected": -245.0, + "loss": 0.6733, + "loss/chosen-sft": 0.99609375, + "loss/dpo": 0.671875, + "rewards/accuracies": 0.65625, + "rewards/chosen": 0.0106201171875, + "rewards/margins": 0.0400390625, + "rewards/rejected": -0.029296875, + "step": 20 + }, + { + "epoch": 0.09292035398230089, + "grad_norm": 3.798952579498291, + "learning_rate": 7e-07, + "logits/chosen": -0.2578125, + "logits/rejected": -0.0281982421875, + "logps/chosen": -296.0, + "logps/rejected": -190.0, + "loss": 0.6782, + "loss/chosen-sft": 1.125, + "loss/dpo": 0.68359375, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.006195068359375, + "rewards/margins": 0.0238037109375, + "rewards/rejected": -0.030029296875, + "step": 21 + }, + { + "epoch": 0.09734513274336283, + "grad_norm": 6.044543743133545, + "learning_rate": 7e-07, + "logits/chosen": -0.1728515625, + "logits/rejected": -0.263671875, + "logps/chosen": -238.0, + "logps/rejected": -225.0, + "loss": 0.6743, + "loss/chosen-sft": 0.98828125, + "loss/dpo": 0.68359375, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.004638671875, + "rewards/margins": 0.01611328125, + "rewards/rejected": -0.020751953125, + "step": 22 + }, + { + "epoch": 0.10176991150442478, + "grad_norm": 6.223972797393799, + "learning_rate": 7e-07, + "logits/chosen": -0.36328125, + "logits/rejected": -0.37890625, + "logps/chosen": -256.0, + "logps/rejected": -235.0, + "loss": 0.6768, + "loss/chosen-sft": 1.09375, + "loss/dpo": 0.6796875, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0225830078125, + "rewards/margins": 0.03173828125, + "rewards/rejected": -0.05419921875, + "step": 23 + }, + { + "epoch": 0.10619469026548672, + "grad_norm": 2.817391872406006, + "learning_rate": 7e-07, + "logits/chosen": -0.2353515625, + "logits/rejected": -0.27734375, + "logps/chosen": -258.0, + "logps/rejected": -196.0, + "loss": 0.6699, + "loss/chosen-sft": 1.09375, + "loss/dpo": 0.67578125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.00830078125, + "rewards/margins": 0.03515625, + "rewards/rejected": -0.043212890625, + "step": 24 + }, + { + "epoch": 0.11061946902654868, + "grad_norm": 5.743912220001221, + "learning_rate": 7e-07, + "logits/chosen": -0.12890625, + "logits/rejected": -0.07861328125, + "logps/chosen": -268.0, + "logps/rejected": -231.0, + "loss": 0.6685, + "loss/chosen-sft": 0.9140625, + "loss/dpo": 0.66796875, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.0189208984375, + "rewards/margins": 0.050537109375, + "rewards/rejected": -0.0693359375, + "step": 25 + }, + { + "epoch": 0.11504424778761062, + "grad_norm": 3.4631690979003906, + "learning_rate": 7e-07, + "logits/chosen": 0.046142578125, + "logits/rejected": 0.018798828125, + "logps/chosen": -213.0, + "logps/rejected": -252.0, + "loss": 0.6699, + "loss/chosen-sft": 0.87109375, + "loss/dpo": 0.65625, + "rewards/accuracies": 0.84375, + "rewards/chosen": 0.0076904296875, + "rewards/margins": 0.07470703125, + "rewards/rejected": -0.06689453125, + "step": 26 + }, + { + "epoch": 0.11946902654867257, + "grad_norm": 2.302494525909424, + "learning_rate": 7e-07, + "logits/chosen": -0.2734375, + "logits/rejected": -0.296875, + "logps/chosen": -294.0, + "logps/rejected": -219.0, + "loss": 0.6733, + "loss/chosen-sft": 1.1796875, + "loss/dpo": 0.6640625, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.0025177001953125, + "rewards/margins": 0.058837890625, + "rewards/rejected": -0.061279296875, + "step": 27 + }, + { + "epoch": 0.12389380530973451, + "grad_norm": 15.041751861572266, + "learning_rate": 7e-07, + "logits/chosen": -0.326171875, + "logits/rejected": -0.2255859375, + "logps/chosen": -324.0, + "logps/rejected": -215.0, + "loss": 0.6597, + "loss/chosen-sft": 1.1328125, + "loss/dpo": 0.65625, + "rewards/accuracies": 0.78125, + "rewards/chosen": 0.0016632080078125, + "rewards/margins": 0.076171875, + "rewards/rejected": -0.07470703125, + "step": 28 + }, + { + "epoch": 0.12831858407079647, + "grad_norm": 10.651082992553711, + "learning_rate": 7e-07, + "logits/chosen": -0.055908203125, + "logits/rejected": -0.04638671875, + "logps/chosen": -264.0, + "logps/rejected": -233.0, + "loss": 0.6704, + "loss/chosen-sft": 0.9140625, + "loss/dpo": 0.65625, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.0001506805419921875, + "rewards/margins": 0.0751953125, + "rewards/rejected": -0.0751953125, + "step": 29 + }, + { + "epoch": 0.13274336283185842, + "grad_norm": 17.013574600219727, + "learning_rate": 7e-07, + "logits/chosen": -0.21484375, + "logits/rejected": -0.138671875, + "logps/chosen": -225.0, + "logps/rejected": -202.0, + "loss": 0.6685, + "loss/chosen-sft": 0.984375, + "loss/dpo": 0.67578125, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.0390625, + "rewards/margins": 0.038818359375, + "rewards/rejected": -0.07763671875, + "step": 30 + }, + { + "epoch": 0.13716814159292035, + "grad_norm": 3.365304708480835, + "learning_rate": 7e-07, + "logits/chosen": -0.267578125, + "logits/rejected": -0.25390625, + "logps/chosen": -296.0, + "logps/rejected": -262.0, + "loss": 0.6641, + "loss/chosen-sft": 1.1640625, + "loss/dpo": 0.6640625, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.0126953125, + "rewards/margins": 0.06689453125, + "rewards/rejected": -0.07958984375, + "step": 31 + }, + { + "epoch": 0.1415929203539823, + "grad_norm": 5.34974479675293, + "learning_rate": 7e-07, + "logits/chosen": -0.263671875, + "logits/rejected": -0.294921875, + "logps/chosen": -234.0, + "logps/rejected": -255.0, + "loss": 0.668, + "loss/chosen-sft": 1.0546875, + "loss/dpo": 0.65625, + "rewards/accuracies": 0.8125, + "rewards/chosen": 0.0081787109375, + "rewards/margins": 0.0830078125, + "rewards/rejected": -0.07470703125, + "step": 32 + }, + { + "epoch": 0.14601769911504425, + "grad_norm": 5.2689948081970215, + "learning_rate": 7e-07, + "logits/chosen": -0.255859375, + "logits/rejected": -0.38671875, + "logps/chosen": -221.0, + "logps/rejected": -318.0, + "loss": 0.6582, + "loss/chosen-sft": 1.0390625, + "loss/dpo": 0.6484375, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.0267333984375, + "rewards/margins": 0.09814453125, + "rewards/rejected": -0.12451171875, + "step": 33 + }, + { + "epoch": 0.1504424778761062, + "grad_norm": 13.799201011657715, + "learning_rate": 7e-07, + "logits/chosen": -0.275390625, + "logits/rejected": -0.21875, + "logps/chosen": -238.0, + "logps/rejected": -208.0, + "loss": 0.6733, + "loss/chosen-sft": 1.046875, + "loss/dpo": 0.6484375, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.03662109375, + "rewards/margins": 0.09765625, + "rewards/rejected": -0.1337890625, + "step": 34 + }, + { + "epoch": 0.15486725663716813, + "grad_norm": 13.07458782196045, + "learning_rate": 7e-07, + "logits/chosen": -0.369140625, + "logits/rejected": -0.24609375, + "logps/chosen": -280.0, + "logps/rejected": -255.0, + "loss": 0.6631, + "loss/chosen-sft": 1.1015625, + "loss/dpo": 0.66015625, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.078125, + "rewards/margins": 0.07470703125, + "rewards/rejected": -0.15234375, + "step": 35 + }, + { + "epoch": 0.1592920353982301, + "grad_norm": 2.256340742111206, + "learning_rate": 7e-07, + "logits/chosen": -0.287109375, + "logits/rejected": -0.28125, + "logps/chosen": -226.0, + "logps/rejected": -240.0, + "loss": 0.6543, + "loss/chosen-sft": 1.015625, + "loss/dpo": 0.6484375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.049072265625, + "rewards/margins": 0.095703125, + "rewards/rejected": -0.1455078125, + "step": 36 + }, + { + "epoch": 0.16371681415929204, + "grad_norm": 8.795002937316895, + "learning_rate": 7e-07, + "logits/chosen": -0.2294921875, + "logits/rejected": -0.21484375, + "logps/chosen": -225.0, + "logps/rejected": -216.0, + "loss": 0.6523, + "loss/chosen-sft": 0.9765625, + "loss/dpo": 0.671875, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.06787109375, + "rewards/margins": 0.047607421875, + "rewards/rejected": -0.115234375, + "step": 37 + }, + { + "epoch": 0.168141592920354, + "grad_norm": 2.735612154006958, + "learning_rate": 7e-07, + "logits/chosen": -0.271484375, + "logits/rejected": -0.26171875, + "logps/chosen": -322.0, + "logps/rejected": -236.0, + "loss": 0.6455, + "loss/chosen-sft": 1.234375, + "loss/dpo": 0.625, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.029052734375, + "rewards/margins": 0.1474609375, + "rewards/rejected": -0.1767578125, + "step": 38 + }, + { + "epoch": 0.17256637168141592, + "grad_norm": 17.598873138427734, + "learning_rate": 7e-07, + "logits/chosen": -0.23046875, + "logits/rejected": -0.357421875, + "logps/chosen": -237.0, + "logps/rejected": -253.0, + "loss": 0.6367, + "loss/chosen-sft": 1.0625, + "loss/dpo": 0.64453125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.053955078125, + "rewards/margins": 0.10888671875, + "rewards/rejected": -0.1630859375, + "step": 39 + }, + { + "epoch": 0.17699115044247787, + "grad_norm": 4.538353443145752, + "learning_rate": 7e-07, + "logits/chosen": -0.1953125, + "logits/rejected": -0.263671875, + "logps/chosen": -240.0, + "logps/rejected": -290.0, + "loss": 0.6475, + "loss/chosen-sft": 0.9296875, + "loss/dpo": 0.6328125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.0625, + "rewards/margins": 0.13671875, + "rewards/rejected": -0.19921875, + "step": 40 + }, + { + "epoch": 0.18141592920353983, + "grad_norm": 17.900062561035156, + "learning_rate": 7e-07, + "logits/chosen": -0.349609375, + "logits/rejected": -0.306640625, + "logps/chosen": -260.0, + "logps/rejected": -284.0, + "loss": 0.6538, + "loss/chosen-sft": 1.03125, + "loss/dpo": 0.65625, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.09912109375, + "rewards/margins": 0.08154296875, + "rewards/rejected": -0.1806640625, + "step": 41 + }, + { + "epoch": 0.18584070796460178, + "grad_norm": 2.5172457695007324, + "learning_rate": 7e-07, + "logits/chosen": -0.40625, + "logits/rejected": -0.38671875, + "logps/chosen": -272.0, + "logps/rejected": -216.0, + "loss": 0.6426, + "loss/chosen-sft": 1.234375, + "loss/dpo": 0.625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.07080078125, + "rewards/margins": 0.1708984375, + "rewards/rejected": -0.2421875, + "step": 42 + }, + { + "epoch": 0.1902654867256637, + "grad_norm": 10.608353614807129, + "learning_rate": 7e-07, + "logits/chosen": -0.400390625, + "logits/rejected": -0.353515625, + "logps/chosen": -240.0, + "logps/rejected": -224.0, + "loss": 0.6455, + "loss/chosen-sft": 1.140625, + "loss/dpo": 0.6328125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.0888671875, + "rewards/margins": 0.1357421875, + "rewards/rejected": -0.224609375, + "step": 43 + }, + { + "epoch": 0.19469026548672566, + "grad_norm": 17.277069091796875, + "learning_rate": 7e-07, + "logits/chosen": -0.33984375, + "logits/rejected": -0.37109375, + "logps/chosen": -300.0, + "logps/rejected": -215.0, + "loss": 0.6396, + "loss/chosen-sft": 1.1640625, + "loss/dpo": 0.60546875, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.06005859375, + "rewards/margins": 0.2041015625, + "rewards/rejected": -0.263671875, + "step": 44 + }, + { + "epoch": 0.19911504424778761, + "grad_norm": 18.799257278442383, + "learning_rate": 7e-07, + "logits/chosen": -0.0888671875, + "logits/rejected": -0.171875, + "logps/chosen": -234.0, + "logps/rejected": -260.0, + "loss": 0.6382, + "loss/chosen-sft": 0.890625, + "loss/dpo": 0.60546875, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.08203125, + "rewards/margins": 0.2197265625, + "rewards/rejected": -0.302734375, + "step": 45 + }, + { + "epoch": 0.20353982300884957, + "grad_norm": 34.8527717590332, + "learning_rate": 7e-07, + "logits/chosen": -0.3828125, + "logits/rejected": -0.46875, + "logps/chosen": -246.0, + "logps/rejected": -255.0, + "loss": 0.6538, + "loss/chosen-sft": 1.109375, + "loss/dpo": 0.71875, + "rewards/accuracies": 0.46875, + "rewards/chosen": -0.240234375, + "rewards/margins": -0.01708984375, + "rewards/rejected": -0.22265625, + "step": 46 + }, + { + "epoch": 0.2079646017699115, + "grad_norm": 6.215828895568848, + "learning_rate": 7e-07, + "logits/chosen": -0.578125, + "logits/rejected": -0.49609375, + "logps/chosen": -288.0, + "logps/rejected": -284.0, + "loss": 0.6431, + "loss/chosen-sft": 1.25, + "loss/dpo": 0.61328125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.0625, + "rewards/margins": 0.18359375, + "rewards/rejected": -0.2470703125, + "step": 47 + }, + { + "epoch": 0.21238938053097345, + "grad_norm": 2.940314531326294, + "learning_rate": 7e-07, + "logits/chosen": -0.361328125, + "logits/rejected": -0.400390625, + "logps/chosen": -288.0, + "logps/rejected": -214.0, + "loss": 0.627, + "loss/chosen-sft": 1.171875, + "loss/dpo": 0.62109375, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1357421875, + "rewards/margins": 0.1748046875, + "rewards/rejected": -0.310546875, + "step": 48 + }, + { + "epoch": 0.2168141592920354, + "grad_norm": 124.2203598022461, + "learning_rate": 7e-07, + "logits/chosen": -0.2255859375, + "logits/rejected": -0.27734375, + "logps/chosen": -276.0, + "logps/rejected": -280.0, + "loss": 0.6323, + "loss/chosen-sft": 1.015625, + "loss/dpo": 0.61328125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.076171875, + "rewards/margins": 0.177734375, + "rewards/rejected": -0.25390625, + "step": 49 + }, + { + "epoch": 0.22123893805309736, + "grad_norm": 49.72818374633789, + "learning_rate": 7e-07, + "logits/chosen": -0.5859375, + "logits/rejected": -0.55859375, + "logps/chosen": -312.0, + "logps/rejected": -266.0, + "loss": 0.6343, + "loss/chosen-sft": 1.1796875, + "loss/dpo": 0.609375, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.10791015625, + "rewards/margins": 0.2021484375, + "rewards/rejected": -0.310546875, + "step": 50 + }, + { + "epoch": 0.22566371681415928, + "grad_norm": 110.1352310180664, + "learning_rate": 7e-07, + "logits/chosen": -0.44921875, + "logits/rejected": -0.431640625, + "logps/chosen": -274.0, + "logps/rejected": -264.0, + "loss": 0.6245, + "loss/chosen-sft": 1.171875, + "loss/dpo": 0.61328125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.1162109375, + "rewards/margins": 0.208984375, + "rewards/rejected": -0.32421875, + "step": 51 + }, + { + "epoch": 0.23008849557522124, + "grad_norm": 14.234803199768066, + "learning_rate": 7e-07, + "logits/chosen": -0.40625, + "logits/rejected": -0.28515625, + "logps/chosen": -268.0, + "logps/rejected": -256.0, + "loss": 0.627, + "loss/chosen-sft": 1.046875, + "loss/dpo": 0.66796875, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.220703125, + "rewards/margins": 0.0849609375, + "rewards/rejected": -0.306640625, + "step": 52 + }, + { + "epoch": 0.2345132743362832, + "grad_norm": 25.839595794677734, + "learning_rate": 7e-07, + "logits/chosen": -0.61328125, + "logits/rejected": -0.5390625, + "logps/chosen": -302.0, + "logps/rejected": -251.0, + "loss": 0.6152, + "loss/chosen-sft": 1.3203125, + "loss/dpo": 0.58203125, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.0849609375, + "rewards/margins": 0.2578125, + "rewards/rejected": -0.34375, + "step": 53 + }, + { + "epoch": 0.23893805309734514, + "grad_norm": 17.5559024810791, + "learning_rate": 7e-07, + "logits/chosen": -0.32421875, + "logits/rejected": -0.4140625, + "logps/chosen": -241.0, + "logps/rejected": -272.0, + "loss": 0.6279, + "loss/chosen-sft": 1.109375, + "loss/dpo": 0.61328125, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.173828125, + "rewards/margins": 0.21875, + "rewards/rejected": -0.392578125, + "step": 54 + }, + { + "epoch": 0.24336283185840707, + "grad_norm": 4.360561847686768, + "learning_rate": 7e-07, + "logits/chosen": -0.2353515625, + "logits/rejected": -0.10107421875, + "logps/chosen": -284.0, + "logps/rejected": -237.0, + "loss": 0.605, + "loss/chosen-sft": 1.0546875, + "loss/dpo": 0.6015625, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.1689453125, + "rewards/margins": 0.2353515625, + "rewards/rejected": -0.404296875, + "step": 55 + }, + { + "epoch": 0.24778761061946902, + "grad_norm": 16.581727981567383, + "learning_rate": 7e-07, + "logits/chosen": -0.431640625, + "logits/rejected": -0.39453125, + "logps/chosen": -340.0, + "logps/rejected": -294.0, + "loss": 0.6133, + "loss/chosen-sft": 1.25, + "loss/dpo": 0.6484375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.2177734375, + "rewards/margins": 0.1396484375, + "rewards/rejected": -0.357421875, + "step": 56 + }, + { + "epoch": 0.252212389380531, + "grad_norm": 49.88325119018555, + "learning_rate": 7e-07, + "logits/chosen": -0.396484375, + "logits/rejected": -0.173828125, + "logps/chosen": -226.0, + "logps/rejected": -191.0, + "loss": 0.6279, + "loss/chosen-sft": 1.0546875, + "loss/dpo": 0.68359375, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.1884765625, + "rewards/margins": 0.036865234375, + "rewards/rejected": -0.2255859375, + "step": 57 + }, + { + "epoch": 0.25663716814159293, + "grad_norm": 24.68882179260254, + "learning_rate": 7e-07, + "logits/chosen": -0.37890625, + "logits/rejected": -0.28515625, + "logps/chosen": -244.0, + "logps/rejected": -260.0, + "loss": 0.6133, + "loss/chosen-sft": 1.078125, + "loss/dpo": 0.60546875, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.23828125, + "rewards/margins": 0.2255859375, + "rewards/rejected": -0.46484375, + "step": 58 + }, + { + "epoch": 0.2610619469026549, + "grad_norm": 15.78062915802002, + "learning_rate": 7e-07, + "logits/chosen": -0.294921875, + "logits/rejected": -0.375, + "logps/chosen": -300.0, + "logps/rejected": -255.0, + "loss": 0.6138, + "loss/chosen-sft": 1.1796875, + "loss/dpo": 0.59375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.154296875, + "rewards/margins": 0.2421875, + "rewards/rejected": -0.396484375, + "step": 59 + }, + { + "epoch": 0.26548672566371684, + "grad_norm": 20.29654884338379, + "learning_rate": 7e-07, + "logits/chosen": -0.318359375, + "logits/rejected": -0.376953125, + "logps/chosen": -280.0, + "logps/rejected": -300.0, + "loss": 0.6206, + "loss/chosen-sft": 1.2265625, + "loss/dpo": 0.6484375, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.2265625, + "rewards/margins": 0.1357421875, + "rewards/rejected": -0.361328125, + "step": 60 + }, + { + "epoch": 0.26991150442477874, + "grad_norm": 43.65996551513672, + "learning_rate": 7e-07, + "logits/chosen": -0.36328125, + "logits/rejected": -0.3046875, + "logps/chosen": -238.0, + "logps/rejected": -278.0, + "loss": 0.6255, + "loss/chosen-sft": 1.25, + "loss/dpo": 0.59375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.1572265625, + "rewards/margins": 0.2578125, + "rewards/rejected": -0.416015625, + "step": 61 + }, + { + "epoch": 0.2743362831858407, + "grad_norm": 72.5498046875, + "learning_rate": 7e-07, + "logits/chosen": -0.359375, + "logits/rejected": -0.373046875, + "logps/chosen": -318.0, + "logps/rejected": -270.0, + "loss": 0.6245, + "loss/chosen-sft": 1.1015625, + "loss/dpo": 0.59375, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.1884765625, + "rewards/margins": 0.275390625, + "rewards/rejected": -0.46484375, + "step": 62 + }, + { + "epoch": 0.27876106194690264, + "grad_norm": 99.62593078613281, + "learning_rate": 7e-07, + "logits/chosen": -0.265625, + "logits/rejected": -0.33984375, + "logps/chosen": -294.0, + "logps/rejected": -258.0, + "loss": 0.627, + "loss/chosen-sft": 1.0703125, + "loss/dpo": 0.65234375, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.267578125, + "rewards/margins": 0.1328125, + "rewards/rejected": -0.400390625, + "step": 63 + }, + { + "epoch": 0.2831858407079646, + "grad_norm": 71.84874725341797, + "learning_rate": 7e-07, + "logits/chosen": -0.359375, + "logits/rejected": -0.392578125, + "logps/chosen": -260.0, + "logps/rejected": -219.0, + "loss": 0.6138, + "loss/chosen-sft": 1.171875, + "loss/dpo": 0.671875, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.248046875, + "rewards/margins": 0.087890625, + "rewards/rejected": -0.3359375, + "step": 64 + }, + { + "epoch": 0.28761061946902655, + "grad_norm": 43.867332458496094, + "learning_rate": 7e-07, + "logits/chosen": -0.494140625, + "logits/rejected": -0.38671875, + "logps/chosen": -298.0, + "logps/rejected": -276.0, + "loss": 0.6162, + "loss/chosen-sft": 1.234375, + "loss/dpo": 0.6171875, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.251953125, + "rewards/margins": 0.2021484375, + "rewards/rejected": -0.455078125, + "step": 65 + }, + { + "epoch": 0.2920353982300885, + "grad_norm": 15.392335891723633, + "learning_rate": 7e-07, + "logits/chosen": -0.275390625, + "logits/rejected": -0.1953125, + "logps/chosen": -241.0, + "logps/rejected": -234.0, + "loss": 0.6211, + "loss/chosen-sft": 1.1953125, + "loss/dpo": 0.703125, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.251953125, + "rewards/margins": 0.017578125, + "rewards/rejected": -0.26953125, + "step": 66 + }, + { + "epoch": 0.29646017699115046, + "grad_norm": 17.315120697021484, + "learning_rate": 7e-07, + "logits/chosen": -0.271484375, + "logits/rejected": -0.3828125, + "logps/chosen": -268.0, + "logps/rejected": -292.0, + "loss": 0.6226, + "loss/chosen-sft": 1.0078125, + "loss/dpo": 0.6171875, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.30078125, + "rewards/margins": 0.203125, + "rewards/rejected": -0.50390625, + "step": 67 + }, + { + "epoch": 0.3008849557522124, + "grad_norm": 17.68255615234375, + "learning_rate": 7e-07, + "logits/chosen": -0.369140625, + "logits/rejected": -0.50390625, + "logps/chosen": -266.0, + "logps/rejected": -248.0, + "loss": 0.6143, + "loss/chosen-sft": 1.109375, + "loss/dpo": 0.58203125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.138671875, + "rewards/margins": 0.3046875, + "rewards/rejected": -0.443359375, + "step": 68 + }, + { + "epoch": 0.3053097345132743, + "grad_norm": 31.16883087158203, + "learning_rate": 7e-07, + "logits/chosen": -0.373046875, + "logits/rejected": -0.369140625, + "logps/chosen": -256.0, + "logps/rejected": -235.0, + "loss": 0.6191, + "loss/chosen-sft": 1.15625, + "loss/dpo": 0.59765625, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.1884765625, + "rewards/margins": 0.279296875, + "rewards/rejected": -0.46875, + "step": 69 + }, + { + "epoch": 0.30973451327433627, + "grad_norm": 65.01067352294922, + "learning_rate": 7e-07, + "logits/chosen": -0.423828125, + "logits/rejected": -0.5, + "logps/chosen": -272.0, + "logps/rejected": -244.0, + "loss": 0.5884, + "loss/chosen-sft": 1.21875, + "loss/dpo": 0.6015625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1787109375, + "rewards/margins": 0.251953125, + "rewards/rejected": -0.431640625, + "step": 70 + }, + { + "epoch": 0.3141592920353982, + "grad_norm": 9.247684478759766, + "learning_rate": 7e-07, + "logits/chosen": -0.412109375, + "logits/rejected": -0.421875, + "logps/chosen": -284.0, + "logps/rejected": -245.0, + "loss": 0.605, + "loss/chosen-sft": 1.203125, + "loss/dpo": 0.671875, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.2216796875, + "rewards/margins": 0.1396484375, + "rewards/rejected": -0.361328125, + "step": 71 + }, + { + "epoch": 0.3185840707964602, + "grad_norm": 29.392963409423828, + "learning_rate": 7e-07, + "logits/chosen": -0.46875, + "logits/rejected": -0.462890625, + "logps/chosen": -346.0, + "logps/rejected": -292.0, + "loss": 0.5928, + "loss/chosen-sft": 1.25, + "loss/dpo": 0.64453125, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.30078125, + "rewards/margins": 0.1650390625, + "rewards/rejected": -0.46484375, + "step": 72 + }, + { + "epoch": 0.3230088495575221, + "grad_norm": 12.59981918334961, + "learning_rate": 7e-07, + "logits/chosen": -0.6171875, + "logits/rejected": -0.41796875, + "logps/chosen": -296.0, + "logps/rejected": -253.0, + "loss": 0.6255, + "loss/chosen-sft": 1.2734375, + "loss/dpo": 0.62109375, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.228515625, + "rewards/margins": 0.1884765625, + "rewards/rejected": -0.416015625, + "step": 73 + }, + { + "epoch": 0.3274336283185841, + "grad_norm": 27.949209213256836, + "learning_rate": 7e-07, + "logits/chosen": -0.5, + "logits/rejected": -0.4921875, + "logps/chosen": -272.0, + "logps/rejected": -218.0, + "loss": 0.5723, + "loss/chosen-sft": 1.1171875, + "loss/dpo": 0.62109375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.25390625, + "rewards/margins": 0.1806640625, + "rewards/rejected": -0.435546875, + "step": 74 + }, + { + "epoch": 0.33185840707964603, + "grad_norm": 42.84572219848633, + "learning_rate": 7e-07, + "logits/chosen": -0.54296875, + "logits/rejected": -0.5546875, + "logps/chosen": -268.0, + "logps/rejected": -222.0, + "loss": 0.6099, + "loss/chosen-sft": 1.265625, + "loss/dpo": 0.6015625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.1572265625, + "rewards/margins": 0.2392578125, + "rewards/rejected": -0.396484375, + "step": 75 + }, + { + "epoch": 0.336283185840708, + "grad_norm": 58.05986404418945, + "learning_rate": 7e-07, + "logits/chosen": -0.5390625, + "logits/rejected": -0.51171875, + "logps/chosen": -206.0, + "logps/rejected": -262.0, + "loss": 0.6021, + "loss/chosen-sft": 1.0546875, + "loss/dpo": 0.58984375, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.1728515625, + "rewards/margins": 0.24609375, + "rewards/rejected": -0.41796875, + "step": 76 + }, + { + "epoch": 0.3407079646017699, + "grad_norm": 31.465473175048828, + "learning_rate": 7e-07, + "logits/chosen": -0.423828125, + "logits/rejected": -0.1796875, + "logps/chosen": -264.0, + "logps/rejected": -286.0, + "loss": 0.5967, + "loss/chosen-sft": 1.0703125, + "loss/dpo": 0.62890625, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.31640625, + "rewards/margins": 0.19921875, + "rewards/rejected": -0.515625, + "step": 77 + }, + { + "epoch": 0.34513274336283184, + "grad_norm": 57.08030700683594, + "learning_rate": 7e-07, + "logits/chosen": -0.578125, + "logits/rejected": -0.326171875, + "logps/chosen": -304.0, + "logps/rejected": -240.0, + "loss": 0.6187, + "loss/chosen-sft": 1.328125, + "loss/dpo": 0.59375, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.22265625, + "rewards/margins": 0.267578125, + "rewards/rejected": -0.490234375, + "step": 78 + }, + { + "epoch": 0.3495575221238938, + "grad_norm": 29.671070098876953, + "learning_rate": 7e-07, + "logits/chosen": -0.48046875, + "logits/rejected": -0.59765625, + "logps/chosen": -292.0, + "logps/rejected": -256.0, + "loss": 0.6147, + "loss/chosen-sft": 1.2265625, + "loss/dpo": 0.55859375, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.1416015625, + "rewards/margins": 0.33984375, + "rewards/rejected": -0.48046875, + "step": 79 + }, + { + "epoch": 0.35398230088495575, + "grad_norm": 48.64991760253906, + "learning_rate": 7e-07, + "logits/chosen": -0.59375, + "logits/rejected": -0.6328125, + "logps/chosen": -288.0, + "logps/rejected": -264.0, + "loss": 0.6309, + "loss/chosen-sft": 1.2578125, + "loss/dpo": 0.6484375, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.33984375, + "rewards/margins": 0.162109375, + "rewards/rejected": -0.50390625, + "step": 80 + }, + { + "epoch": 0.3584070796460177, + "grad_norm": 44.117034912109375, + "learning_rate": 7e-07, + "logits/chosen": -0.49609375, + "logits/rejected": -0.51953125, + "logps/chosen": -252.0, + "logps/rejected": -262.0, + "loss": 0.5845, + "loss/chosen-sft": 1.0859375, + "loss/dpo": 0.55078125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.2412109375, + "rewards/margins": 0.3828125, + "rewards/rejected": -0.625, + "step": 81 + }, + { + "epoch": 0.36283185840707965, + "grad_norm": 45.00334167480469, + "learning_rate": 7e-07, + "logits/chosen": -0.486328125, + "logits/rejected": -0.5390625, + "logps/chosen": -294.0, + "logps/rejected": -288.0, + "loss": 0.6045, + "loss/chosen-sft": 1.21875, + "loss/dpo": 0.578125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.287109375, + "rewards/margins": 0.328125, + "rewards/rejected": -0.6171875, + "step": 82 + }, + { + "epoch": 0.3672566371681416, + "grad_norm": 6.153012752532959, + "learning_rate": 7e-07, + "logits/chosen": -0.427734375, + "logits/rejected": -0.427734375, + "logps/chosen": -272.0, + "logps/rejected": -278.0, + "loss": 0.582, + "loss/chosen-sft": 1.0546875, + "loss/dpo": 0.5625, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.1689453125, + "rewards/margins": 0.357421875, + "rewards/rejected": -0.52734375, + "step": 83 + }, + { + "epoch": 0.37168141592920356, + "grad_norm": 4.050904273986816, + "learning_rate": 7e-07, + "logits/chosen": -0.349609375, + "logits/rejected": -0.3984375, + "logps/chosen": -288.0, + "logps/rejected": -274.0, + "loss": 0.603, + "loss/chosen-sft": 1.1796875, + "loss/dpo": 0.62109375, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.28515625, + "rewards/margins": 0.2333984375, + "rewards/rejected": -0.51953125, + "step": 84 + }, + { + "epoch": 0.37610619469026546, + "grad_norm": 53.353515625, + "learning_rate": 7e-07, + "logits/chosen": -0.455078125, + "logits/rejected": -0.462890625, + "logps/chosen": -278.0, + "logps/rejected": -227.0, + "loss": 0.6323, + "loss/chosen-sft": 1.109375, + "loss/dpo": 0.609375, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.24609375, + "rewards/margins": 0.2373046875, + "rewards/rejected": -0.482421875, + "step": 85 + }, + { + "epoch": 0.3805309734513274, + "grad_norm": 63.98937225341797, + "learning_rate": 7e-07, + "logits/chosen": -0.400390625, + "logits/rejected": -0.4609375, + "logps/chosen": -300.0, + "logps/rejected": -340.0, + "loss": 0.6182, + "loss/chosen-sft": 1.046875, + "loss/dpo": 0.62890625, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.349609375, + "rewards/margins": 0.1904296875, + "rewards/rejected": -0.5390625, + "step": 86 + }, + { + "epoch": 0.38495575221238937, + "grad_norm": 22.751070022583008, + "learning_rate": 7e-07, + "logits/chosen": -0.416015625, + "logits/rejected": -0.47265625, + "logps/chosen": -270.0, + "logps/rejected": -234.0, + "loss": 0.5938, + "loss/chosen-sft": 1.109375, + "loss/dpo": 0.6015625, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.203125, + "rewards/margins": 0.2734375, + "rewards/rejected": -0.4765625, + "step": 87 + }, + { + "epoch": 0.3893805309734513, + "grad_norm": 15.500909805297852, + "learning_rate": 7e-07, + "logits/chosen": -0.42578125, + "logits/rejected": -0.51171875, + "logps/chosen": -220.0, + "logps/rejected": -222.0, + "loss": 0.5884, + "loss/chosen-sft": 1.3828125, + "loss/dpo": 0.69140625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.33203125, + "rewards/margins": 0.0634765625, + "rewards/rejected": -0.396484375, + "step": 88 + }, + { + "epoch": 0.3938053097345133, + "grad_norm": 21.14480209350586, + "learning_rate": 7e-07, + "logits/chosen": -0.50390625, + "logits/rejected": -0.4921875, + "logps/chosen": -282.0, + "logps/rejected": -223.0, + "loss": 0.584, + "loss/chosen-sft": 1.296875, + "loss/dpo": 0.65234375, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.27734375, + "rewards/margins": 0.1875, + "rewards/rejected": -0.466796875, + "step": 89 + }, + { + "epoch": 0.39823008849557523, + "grad_norm": 14.2146635055542, + "learning_rate": 7e-07, + "logits/chosen": -0.447265625, + "logits/rejected": -0.392578125, + "logps/chosen": -268.0, + "logps/rejected": -215.0, + "loss": 0.6104, + "loss/chosen-sft": 1.09375, + "loss/dpo": 0.65625, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.359375, + "rewards/margins": 0.1923828125, + "rewards/rejected": -0.55078125, + "step": 90 + }, + { + "epoch": 0.4026548672566372, + "grad_norm": 76.13188171386719, + "learning_rate": 7e-07, + "logits/chosen": -0.478515625, + "logits/rejected": -0.462890625, + "logps/chosen": -270.0, + "logps/rejected": -280.0, + "loss": 0.5781, + "loss/chosen-sft": 1.2109375, + "loss/dpo": 0.59765625, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.2421875, + "rewards/margins": 0.2431640625, + "rewards/rejected": -0.486328125, + "step": 91 + }, + { + "epoch": 0.40707964601769914, + "grad_norm": 34.39772033691406, + "learning_rate": 7e-07, + "logits/chosen": -0.5625, + "logits/rejected": -0.4921875, + "logps/chosen": -308.0, + "logps/rejected": -294.0, + "loss": 0.5698, + "loss/chosen-sft": 1.3203125, + "loss/dpo": 0.52734375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.2275390625, + "rewards/margins": 0.49609375, + "rewards/rejected": -0.7265625, + "step": 92 + }, + { + "epoch": 0.41150442477876104, + "grad_norm": 36.51502227783203, + "learning_rate": 7e-07, + "logits/chosen": -0.4296875, + "logits/rejected": -0.51953125, + "logps/chosen": -278.0, + "logps/rejected": -336.0, + "loss": 0.5869, + "loss/chosen-sft": 1.15625, + "loss/dpo": 0.57421875, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.29296875, + "rewards/margins": 0.314453125, + "rewards/rejected": -0.60546875, + "step": 93 + }, + { + "epoch": 0.415929203539823, + "grad_norm": 41.33882141113281, + "learning_rate": 7e-07, + "logits/chosen": -0.291015625, + "logits/rejected": -0.26171875, + "logps/chosen": -255.0, + "logps/rejected": -251.0, + "loss": 0.5908, + "loss/chosen-sft": 1.171875, + "loss/dpo": 0.6171875, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.310546875, + "rewards/margins": 0.232421875, + "rewards/rejected": -0.54296875, + "step": 94 + }, + { + "epoch": 0.42035398230088494, + "grad_norm": 25.335350036621094, + "learning_rate": 7e-07, + "logits/chosen": -0.2578125, + "logits/rejected": -0.26953125, + "logps/chosen": -234.0, + "logps/rejected": -312.0, + "loss": 0.5811, + "loss/chosen-sft": 1.03125, + "loss/dpo": 0.5546875, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.298828125, + "rewards/margins": 0.41796875, + "rewards/rejected": -0.71875, + "step": 95 + }, + { + "epoch": 0.4247787610619469, + "grad_norm": 14.678760528564453, + "learning_rate": 7e-07, + "logits/chosen": -0.46484375, + "logits/rejected": -0.439453125, + "logps/chosen": -304.0, + "logps/rejected": -248.0, + "loss": 0.6094, + "loss/chosen-sft": 1.2265625, + "loss/dpo": 0.6484375, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.287109375, + "rewards/margins": 0.1328125, + "rewards/rejected": -0.41796875, + "step": 96 + }, + { + "epoch": 0.42920353982300885, + "grad_norm": 12.859259605407715, + "learning_rate": 7e-07, + "logits/chosen": -0.50390625, + "logits/rejected": -0.4765625, + "logps/chosen": -255.0, + "logps/rejected": -226.0, + "loss": 0.5938, + "loss/chosen-sft": 1.1953125, + "loss/dpo": 0.64453125, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.353515625, + "rewards/margins": 0.15625, + "rewards/rejected": -0.51171875, + "step": 97 + }, + { + "epoch": 0.4336283185840708, + "grad_norm": 9.326794624328613, + "learning_rate": 7e-07, + "logits/chosen": -0.4765625, + "logits/rejected": -0.447265625, + "logps/chosen": -272.0, + "logps/rejected": -268.0, + "loss": 0.5752, + "loss/chosen-sft": 1.1640625, + "loss/dpo": 0.54296875, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.291015625, + "rewards/margins": 0.4765625, + "rewards/rejected": -0.76953125, + "step": 98 + }, + { + "epoch": 0.43805309734513276, + "grad_norm": 10.561424255371094, + "learning_rate": 7e-07, + "logits/chosen": -0.5625, + "logits/rejected": -0.5234375, + "logps/chosen": -304.0, + "logps/rejected": -282.0, + "loss": 0.5786, + "loss/chosen-sft": 1.1640625, + "loss/dpo": 0.578125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.390625, + "rewards/margins": 0.353515625, + "rewards/rejected": -0.7421875, + "step": 99 + }, + { + "epoch": 0.4424778761061947, + "grad_norm": 102.05797576904297, + "learning_rate": 7e-07, + "logits/chosen": -0.44921875, + "logits/rejected": -0.423828125, + "logps/chosen": -235.0, + "logps/rejected": -284.0, + "loss": 0.6079, + "loss/chosen-sft": 1.1171875, + "loss/dpo": 0.65234375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.423828125, + "rewards/margins": 0.236328125, + "rewards/rejected": -0.66015625, + "step": 100 + }, + { + "epoch": 0.4469026548672566, + "grad_norm": 10.166366577148438, + "learning_rate": 7e-07, + "logits/chosen": -0.51953125, + "logits/rejected": -0.431640625, + "logps/chosen": -342.0, + "logps/rejected": -302.0, + "loss": 0.5864, + "loss/chosen-sft": 1.1953125, + "loss/dpo": 0.53515625, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.333984375, + "rewards/margins": 0.5, + "rewards/rejected": -0.8359375, + "step": 101 + }, + { + "epoch": 0.45132743362831856, + "grad_norm": 9.208560943603516, + "learning_rate": 7e-07, + "logits/chosen": -0.373046875, + "logits/rejected": -0.3359375, + "logps/chosen": -332.0, + "logps/rejected": -326.0, + "loss": 0.584, + "loss/chosen-sft": 1.1640625, + "loss/dpo": 0.5390625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.30078125, + "rewards/margins": 0.435546875, + "rewards/rejected": -0.73828125, + "step": 102 + }, + { + "epoch": 0.4557522123893805, + "grad_norm": 58.12112045288086, + "learning_rate": 7e-07, + "logits/chosen": -0.46484375, + "logits/rejected": -0.486328125, + "logps/chosen": -298.0, + "logps/rejected": -300.0, + "loss": 0.6191, + "loss/chosen-sft": 1.1640625, + "loss/dpo": 0.58984375, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.365234375, + "rewards/margins": 0.40625, + "rewards/rejected": -0.76953125, + "step": 103 + }, + { + "epoch": 0.46017699115044247, + "grad_norm": 32.06050491333008, + "learning_rate": 7e-07, + "logits/chosen": -0.51953125, + "logits/rejected": -0.49609375, + "logps/chosen": -376.0, + "logps/rejected": -346.0, + "loss": 0.5581, + "loss/chosen-sft": 1.2265625, + "loss/dpo": 0.55078125, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.35546875, + "rewards/margins": 0.46875, + "rewards/rejected": -0.82421875, + "step": 104 + }, + { + "epoch": 0.4646017699115044, + "grad_norm": 66.27430725097656, + "learning_rate": 7e-07, + "logits/chosen": -0.578125, + "logits/rejected": -0.53125, + "logps/chosen": -340.0, + "logps/rejected": -284.0, + "loss": 0.5674, + "loss/chosen-sft": 1.2734375, + "loss/dpo": 0.5390625, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.37890625, + "rewards/margins": 0.427734375, + "rewards/rejected": -0.80859375, + "step": 105 + }, + { + "epoch": 0.4690265486725664, + "grad_norm": 43.450740814208984, + "learning_rate": 7e-07, + "logits/chosen": -0.369140625, + "logits/rejected": -0.443359375, + "logps/chosen": -328.0, + "logps/rejected": -310.0, + "loss": 0.5688, + "loss/chosen-sft": 1.1640625, + "loss/dpo": 0.59765625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.404296875, + "rewards/margins": 0.318359375, + "rewards/rejected": -0.72265625, + "step": 106 + }, + { + "epoch": 0.47345132743362833, + "grad_norm": 48.25244903564453, + "learning_rate": 7e-07, + "logits/chosen": -0.4765625, + "logits/rejected": -0.43359375, + "logps/chosen": -320.0, + "logps/rejected": -260.0, + "loss": 0.5781, + "loss/chosen-sft": 1.2421875, + "loss/dpo": 0.62890625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.5234375, + "rewards/margins": 0.283203125, + "rewards/rejected": -0.8046875, + "step": 107 + }, + { + "epoch": 0.4778761061946903, + "grad_norm": 26.64389419555664, + "learning_rate": 7e-07, + "logits/chosen": -0.427734375, + "logits/rejected": -0.41796875, + "logps/chosen": -262.0, + "logps/rejected": -240.0, + "loss": 0.564, + "loss/chosen-sft": 1.1015625, + "loss/dpo": 0.625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.357421875, + "rewards/margins": 0.216796875, + "rewards/rejected": -0.57421875, + "step": 108 + }, + { + "epoch": 0.4823008849557522, + "grad_norm": 51.666202545166016, + "learning_rate": 7e-07, + "logits/chosen": -0.671875, + "logits/rejected": -0.734375, + "logps/chosen": -372.0, + "logps/rejected": -340.0, + "loss": 0.564, + "loss/chosen-sft": 1.3125, + "loss/dpo": 0.5234375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.35546875, + "rewards/margins": 0.5859375, + "rewards/rejected": -0.94140625, + "step": 109 + }, + { + "epoch": 0.48672566371681414, + "grad_norm": 14.793038368225098, + "learning_rate": 7e-07, + "logits/chosen": -0.490234375, + "logits/rejected": -0.57421875, + "logps/chosen": -288.0, + "logps/rejected": -262.0, + "loss": 0.564, + "loss/chosen-sft": 1.2890625, + "loss/dpo": 0.63671875, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.53125, + "rewards/margins": 0.232421875, + "rewards/rejected": -0.765625, + "step": 110 + }, + { + "epoch": 0.4911504424778761, + "grad_norm": 54.62705993652344, + "learning_rate": 7e-07, + "logits/chosen": -0.51171875, + "logits/rejected": -0.59765625, + "logps/chosen": -312.0, + "logps/rejected": -286.0, + "loss": 0.5803, + "loss/chosen-sft": 1.21875, + "loss/dpo": 0.53125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.466796875, + "rewards/margins": 0.515625, + "rewards/rejected": -0.98046875, + "step": 111 + }, + { + "epoch": 0.49557522123893805, + "grad_norm": 45.08600997924805, + "learning_rate": 7e-07, + "logits/chosen": -0.53125, + "logits/rejected": -0.65625, + "logps/chosen": -306.0, + "logps/rejected": -314.0, + "loss": 0.5442, + "loss/chosen-sft": 1.1953125, + "loss/dpo": 0.57421875, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.435546875, + "rewards/margins": 0.34375, + "rewards/rejected": -0.78125, + "step": 112 + }, + { + "epoch": 0.5, + "grad_norm": 29.89005470275879, + "learning_rate": 7e-07, + "logits/chosen": -0.5234375, + "logits/rejected": -0.5078125, + "logps/chosen": -282.0, + "logps/rejected": -242.0, + "loss": 0.5535, + "loss/chosen-sft": 1.1015625, + "loss/dpo": 0.63671875, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.43359375, + "rewards/margins": 0.21484375, + "rewards/rejected": -0.6484375, + "step": 113 + }, + { + "epoch": 0.504424778761062, + "grad_norm": 5.516697406768799, + "learning_rate": 7e-07, + "logits/chosen": -0.376953125, + "logits/rejected": -0.423828125, + "logps/chosen": -249.0, + "logps/rejected": -298.0, + "loss": 0.5713, + "loss/chosen-sft": 1.125, + "loss/dpo": 0.51953125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.298828125, + "rewards/margins": 0.48046875, + "rewards/rejected": -0.77734375, + "step": 114 + }, + { + "epoch": 0.5088495575221239, + "grad_norm": 42.4152946472168, + "learning_rate": 7e-07, + "logits/chosen": -0.5625, + "logits/rejected": -0.48046875, + "logps/chosen": -316.0, + "logps/rejected": -312.0, + "loss": 0.5811, + "loss/chosen-sft": 1.2265625, + "loss/dpo": 0.60546875, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.59765625, + "rewards/margins": 0.29296875, + "rewards/rejected": -0.890625, + "step": 115 + }, + { + "epoch": 0.5132743362831859, + "grad_norm": 43.45073699951172, + "learning_rate": 7e-07, + "logits/chosen": -0.51171875, + "logits/rejected": -0.49609375, + "logps/chosen": -320.0, + "logps/rejected": -368.0, + "loss": 0.5562, + "loss/chosen-sft": 1.203125, + "loss/dpo": 0.58203125, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.51953125, + "rewards/margins": 0.431640625, + "rewards/rejected": -0.94921875, + "step": 116 + }, + { + "epoch": 0.5176991150442478, + "grad_norm": 13.291467666625977, + "learning_rate": 7e-07, + "logits/chosen": -0.392578125, + "logits/rejected": -0.375, + "logps/chosen": -300.0, + "logps/rejected": -316.0, + "loss": 0.5491, + "loss/chosen-sft": 1.2734375, + "loss/dpo": 0.58984375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.5078125, + "rewards/margins": 0.3515625, + "rewards/rejected": -0.859375, + "step": 117 + }, + { + "epoch": 0.5221238938053098, + "grad_norm": 16.5191707611084, + "learning_rate": 7e-07, + "logits/chosen": -0.330078125, + "logits/rejected": -0.240234375, + "logps/chosen": -292.0, + "logps/rejected": -292.0, + "loss": 0.5605, + "loss/chosen-sft": 1.2421875, + "loss/dpo": 0.57421875, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.484375, + "rewards/margins": 0.34375, + "rewards/rejected": -0.828125, + "step": 118 + }, + { + "epoch": 0.5265486725663717, + "grad_norm": 55.267738342285156, + "learning_rate": 7e-07, + "logits/chosen": -0.58984375, + "logits/rejected": -0.62890625, + "logps/chosen": -316.0, + "logps/rejected": -318.0, + "loss": 0.5439, + "loss/chosen-sft": 1.1484375, + "loss/dpo": 0.53515625, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.412109375, + "rewards/margins": 0.482421875, + "rewards/rejected": -0.89453125, + "step": 119 + }, + { + "epoch": 0.5309734513274337, + "grad_norm": 52.895042419433594, + "learning_rate": 7e-07, + "logits/chosen": -0.28515625, + "logits/rejected": -0.380859375, + "logps/chosen": -252.0, + "logps/rejected": -278.0, + "loss": 0.5623, + "loss/chosen-sft": 0.96484375, + "loss/dpo": 0.50390625, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.359375, + "rewards/margins": 0.609375, + "rewards/rejected": -0.96875, + "step": 120 + }, + { + "epoch": 0.5353982300884956, + "grad_norm": 33.5416374206543, + "learning_rate": 7e-07, + "logits/chosen": -0.365234375, + "logits/rejected": -0.45703125, + "logps/chosen": -282.0, + "logps/rejected": -262.0, + "loss": 0.5391, + "loss/chosen-sft": 1.28125, + "loss/dpo": 0.546875, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.4296875, + "rewards/margins": 0.4609375, + "rewards/rejected": -0.890625, + "step": 121 + }, + { + "epoch": 0.5398230088495575, + "grad_norm": 55.33546447753906, + "learning_rate": 7e-07, + "logits/chosen": -0.53515625, + "logits/rejected": -0.5703125, + "logps/chosen": -334.0, + "logps/rejected": -340.0, + "loss": 0.5278, + "loss/chosen-sft": 1.2578125, + "loss/dpo": 0.53515625, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.45703125, + "rewards/margins": 0.609375, + "rewards/rejected": -1.0703125, + "step": 122 + }, + { + "epoch": 0.5442477876106194, + "grad_norm": 46.70622253417969, + "learning_rate": 7e-07, + "logits/chosen": -0.53515625, + "logits/rejected": -0.59765625, + "logps/chosen": -308.0, + "logps/rejected": -274.0, + "loss": 0.55, + "loss/chosen-sft": 1.34375, + "loss/dpo": 0.5703125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.5390625, + "rewards/margins": 0.3671875, + "rewards/rejected": -0.90625, + "step": 123 + }, + { + "epoch": 0.5486725663716814, + "grad_norm": 48.83370590209961, + "learning_rate": 7e-07, + "logits/chosen": -0.5234375, + "logits/rejected": -0.60546875, + "logps/chosen": -322.0, + "logps/rejected": -318.0, + "loss": 0.5825, + "loss/chosen-sft": 1.2734375, + "loss/dpo": 0.59375, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.625, + "rewards/margins": 0.361328125, + "rewards/rejected": -0.984375, + "step": 124 + }, + { + "epoch": 0.5530973451327433, + "grad_norm": 25.2650089263916, + "learning_rate": 7e-07, + "logits/chosen": -0.3203125, + "logits/rejected": -0.25390625, + "logps/chosen": -244.0, + "logps/rejected": -274.0, + "loss": 0.5889, + "loss/chosen-sft": 1.1484375, + "loss/dpo": 0.55859375, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.439453125, + "rewards/margins": 0.41796875, + "rewards/rejected": -0.859375, + "step": 125 + }, + { + "epoch": 0.5575221238938053, + "grad_norm": 36.186500549316406, + "learning_rate": 7e-07, + "logits/chosen": -0.58984375, + "logits/rejected": -0.6640625, + "logps/chosen": -324.0, + "logps/rejected": -358.0, + "loss": 0.585, + "loss/chosen-sft": 1.2890625, + "loss/dpo": 0.54296875, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.6484375, + "rewards/margins": 0.50390625, + "rewards/rejected": -1.15625, + "step": 126 + }, + { + "epoch": 0.5619469026548672, + "grad_norm": 13.623043060302734, + "learning_rate": 7e-07, + "logits/chosen": -0.640625, + "logits/rejected": -0.66796875, + "logps/chosen": -312.0, + "logps/rejected": -332.0, + "loss": 0.5789, + "loss/chosen-sft": 1.2109375, + "loss/dpo": 0.5546875, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.38671875, + "rewards/margins": 0.42578125, + "rewards/rejected": -0.8125, + "step": 127 + }, + { + "epoch": 0.5663716814159292, + "grad_norm": 9.796255111694336, + "learning_rate": 7e-07, + "logits/chosen": -0.494140625, + "logits/rejected": -0.6015625, + "logps/chosen": -290.0, + "logps/rejected": -346.0, + "loss": 0.5703, + "loss/chosen-sft": 1.3203125, + "loss/dpo": 0.609375, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.65234375, + "rewards/margins": 0.359375, + "rewards/rejected": -1.015625, + "step": 128 + }, + { + "epoch": 0.5707964601769911, + "grad_norm": 70.11766052246094, + "learning_rate": 7e-07, + "logits/chosen": -0.345703125, + "logits/rejected": -0.353515625, + "logps/chosen": -255.0, + "logps/rejected": -296.0, + "loss": 0.5767, + "loss/chosen-sft": 1.15625, + "loss/dpo": 0.48046875, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.3046875, + "rewards/margins": 0.75, + "rewards/rejected": -1.0546875, + "step": 129 + }, + { + "epoch": 0.5752212389380531, + "grad_norm": 137.03662109375, + "learning_rate": 7e-07, + "logits/chosen": -0.095703125, + "logits/rejected": -0.38671875, + "logps/chosen": -212.0, + "logps/rejected": -262.0, + "loss": 0.5471, + "loss/chosen-sft": 1.125, + "loss/dpo": 0.515625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.328125, + "rewards/margins": 0.65234375, + "rewards/rejected": -0.98046875, + "step": 130 + }, + { + "epoch": 0.5796460176991151, + "grad_norm": 148.0476531982422, + "learning_rate": 7e-07, + "logits/chosen": -0.5625, + "logits/rejected": -0.59375, + "logps/chosen": -270.0, + "logps/rejected": -284.0, + "loss": 0.5403, + "loss/chosen-sft": 1.3046875, + "loss/dpo": 0.6796875, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.54296875, + "rewards/margins": 0.134765625, + "rewards/rejected": -0.6796875, + "step": 131 + }, + { + "epoch": 0.584070796460177, + "grad_norm": 93.40387725830078, + "learning_rate": 7e-07, + "logits/chosen": -0.7109375, + "logits/rejected": -0.66015625, + "logps/chosen": -340.0, + "logps/rejected": -350.0, + "loss": 0.561, + "loss/chosen-sft": 1.390625, + "loss/dpo": 0.5078125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.51953125, + "rewards/margins": 0.69921875, + "rewards/rejected": -1.21875, + "step": 132 + }, + { + "epoch": 0.588495575221239, + "grad_norm": 55.550758361816406, + "learning_rate": 7e-07, + "logits/chosen": -0.42578125, + "logits/rejected": -0.55859375, + "logps/chosen": -294.0, + "logps/rejected": -296.0, + "loss": 0.5627, + "loss/chosen-sft": 1.140625, + "loss/dpo": 0.6015625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.453125, + "rewards/margins": 0.33203125, + "rewards/rejected": -0.78515625, + "step": 133 + }, + { + "epoch": 0.5929203539823009, + "grad_norm": 80.4654541015625, + "learning_rate": 7e-07, + "logits/chosen": -0.43359375, + "logits/rejected": -0.470703125, + "logps/chosen": -308.0, + "logps/rejected": -338.0, + "loss": 0.5481, + "loss/chosen-sft": 1.1640625, + "loss/dpo": 0.4765625, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.3984375, + "rewards/margins": 0.67578125, + "rewards/rejected": -1.0703125, + "step": 134 + }, + { + "epoch": 0.5973451327433629, + "grad_norm": 109.14212036132812, + "learning_rate": 7e-07, + "logits/chosen": -0.470703125, + "logits/rejected": -0.50390625, + "logps/chosen": -334.0, + "logps/rejected": -304.0, + "loss": 0.542, + "loss/chosen-sft": 1.171875, + "loss/dpo": 0.6171875, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.54296875, + "rewards/margins": 0.2578125, + "rewards/rejected": -0.80078125, + "step": 135 + }, + { + "epoch": 0.6017699115044248, + "grad_norm": 37.90422821044922, + "learning_rate": 7e-07, + "logits/chosen": -0.421875, + "logits/rejected": -0.427734375, + "logps/chosen": -372.0, + "logps/rejected": -298.0, + "loss": 0.5493, + "loss/chosen-sft": 1.3203125, + "loss/dpo": 0.546875, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.50390625, + "rewards/margins": 0.474609375, + "rewards/rejected": -0.9765625, + "step": 136 + }, + { + "epoch": 0.6061946902654868, + "grad_norm": 118.1666030883789, + "learning_rate": 7e-07, + "logits/chosen": -0.46875, + "logits/rejected": -0.515625, + "logps/chosen": -326.0, + "logps/rejected": -394.0, + "loss": 0.5112, + "loss/chosen-sft": 1.234375, + "loss/dpo": 0.5390625, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.419921875, + "rewards/margins": 0.5703125, + "rewards/rejected": -0.9921875, + "step": 137 + }, + { + "epoch": 0.6106194690265486, + "grad_norm": 99.32813262939453, + "learning_rate": 7e-07, + "logits/chosen": -0.427734375, + "logits/rejected": -0.50390625, + "logps/chosen": -288.0, + "logps/rejected": -268.0, + "loss": 0.5508, + "loss/chosen-sft": 1.3984375, + "loss/dpo": 0.61328125, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.5625, + "rewards/margins": 0.353515625, + "rewards/rejected": -0.9140625, + "step": 138 + }, + { + "epoch": 0.6150442477876106, + "grad_norm": 17.352619171142578, + "learning_rate": 7e-07, + "logits/chosen": -0.48046875, + "logits/rejected": -0.546875, + "logps/chosen": -282.0, + "logps/rejected": -272.0, + "loss": 0.5352, + "loss/chosen-sft": 1.1640625, + "loss/dpo": 0.51953125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.466796875, + "rewards/margins": 0.5078125, + "rewards/rejected": -0.97265625, + "step": 139 + }, + { + "epoch": 0.6194690265486725, + "grad_norm": 59.95145797729492, + "learning_rate": 7e-07, + "logits/chosen": -0.6171875, + "logits/rejected": -0.6875, + "logps/chosen": -302.0, + "logps/rejected": -336.0, + "loss": 0.5435, + "loss/chosen-sft": 1.265625, + "loss/dpo": 0.51953125, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.4765625, + "rewards/margins": 0.55078125, + "rewards/rejected": -1.0234375, + "step": 140 + }, + { + "epoch": 0.6238938053097345, + "grad_norm": 55.3637580871582, + "learning_rate": 7e-07, + "logits/chosen": -0.51953125, + "logits/rejected": -0.56640625, + "logps/chosen": -342.0, + "logps/rejected": -382.0, + "loss": 0.5469, + "loss/chosen-sft": 1.3046875, + "loss/dpo": 0.55078125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.5546875, + "rewards/margins": 0.65625, + "rewards/rejected": -1.2109375, + "step": 141 + }, + { + "epoch": 0.6283185840707964, + "grad_norm": 146.2696075439453, + "learning_rate": 7e-07, + "logits/chosen": -0.55859375, + "logits/rejected": -0.62890625, + "logps/chosen": -352.0, + "logps/rejected": -318.0, + "loss": 0.5806, + "loss/chosen-sft": 1.3515625, + "loss/dpo": 0.7578125, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.703125, + "rewards/margins": 0.07275390625, + "rewards/rejected": -0.77734375, + "step": 142 + }, + { + "epoch": 0.6327433628318584, + "grad_norm": 46.21394729614258, + "learning_rate": 7e-07, + "logits/chosen": -0.578125, + "logits/rejected": -0.5859375, + "logps/chosen": -268.0, + "logps/rejected": -296.0, + "loss": 0.5393, + "loss/chosen-sft": 1.265625, + "loss/dpo": 0.54296875, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.341796875, + "rewards/margins": 0.48046875, + "rewards/rejected": -0.82421875, + "step": 143 + }, + { + "epoch": 0.6371681415929203, + "grad_norm": 38.909610748291016, + "learning_rate": 7e-07, + "logits/chosen": -0.60546875, + "logits/rejected": -0.73828125, + "logps/chosen": -320.0, + "logps/rejected": -334.0, + "loss": 0.543, + "loss/chosen-sft": 1.4375, + "loss/dpo": 0.55859375, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.6796875, + "rewards/margins": 0.5234375, + "rewards/rejected": -1.203125, + "step": 144 + }, + { + "epoch": 0.6415929203539823, + "grad_norm": 137.8043975830078, + "learning_rate": 7e-07, + "logits/chosen": -0.53515625, + "logits/rejected": -0.58203125, + "logps/chosen": -340.0, + "logps/rejected": -290.0, + "loss": 0.5903, + "loss/chosen-sft": 1.328125, + "loss/dpo": 0.65234375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.7109375, + "rewards/margins": 0.333984375, + "rewards/rejected": -1.046875, + "step": 145 + }, + { + "epoch": 0.6460176991150443, + "grad_norm": 36.96350860595703, + "learning_rate": 7e-07, + "logits/chosen": -0.6953125, + "logits/rejected": -0.66796875, + "logps/chosen": -398.0, + "logps/rejected": -382.0, + "loss": 0.5288, + "loss/chosen-sft": 1.34375, + "loss/dpo": 0.546875, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.84375, + "rewards/margins": 0.486328125, + "rewards/rejected": -1.328125, + "step": 146 + }, + { + "epoch": 0.6504424778761062, + "grad_norm": 31.765138626098633, + "learning_rate": 7e-07, + "logits/chosen": -0.6796875, + "logits/rejected": -0.66015625, + "logps/chosen": -354.0, + "logps/rejected": -326.0, + "loss": 0.5535, + "loss/chosen-sft": 1.4375, + "loss/dpo": 0.5078125, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.63671875, + "rewards/margins": 0.5546875, + "rewards/rejected": -1.1953125, + "step": 147 + }, + { + "epoch": 0.6548672566371682, + "grad_norm": 73.44290924072266, + "learning_rate": 7e-07, + "logits/chosen": -0.609375, + "logits/rejected": -0.69921875, + "logps/chosen": -356.0, + "logps/rejected": -350.0, + "loss": 0.5137, + "loss/chosen-sft": 1.3046875, + "loss/dpo": 0.474609375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.5234375, + "rewards/margins": 0.67578125, + "rewards/rejected": -1.203125, + "step": 148 + }, + { + "epoch": 0.6592920353982301, + "grad_norm": 14.243433952331543, + "learning_rate": 7e-07, + "logits/chosen": -0.50390625, + "logits/rejected": -0.55078125, + "logps/chosen": -326.0, + "logps/rejected": -316.0, + "loss": 0.5225, + "loss/chosen-sft": 1.2578125, + "loss/dpo": 0.48046875, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.61328125, + "rewards/margins": 0.765625, + "rewards/rejected": -1.3828125, + "step": 149 + }, + { + "epoch": 0.6637168141592921, + "grad_norm": 21.770702362060547, + "learning_rate": 7e-07, + "logits/chosen": -0.55078125, + "logits/rejected": -0.546875, + "logps/chosen": -326.0, + "logps/rejected": -330.0, + "loss": 0.5234, + "loss/chosen-sft": 1.296875, + "loss/dpo": 0.5, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.4609375, + "rewards/margins": 0.60546875, + "rewards/rejected": -1.0703125, + "step": 150 + }, + { + "epoch": 0.668141592920354, + "grad_norm": 77.819091796875, + "learning_rate": 7e-07, + "logits/chosen": -0.7109375, + "logits/rejected": -0.6171875, + "logps/chosen": -404.0, + "logps/rejected": -442.0, + "loss": 0.5, + "loss/chosen-sft": 1.3125, + "loss/dpo": 0.5078125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.7734375, + "rewards/margins": 0.6875, + "rewards/rejected": -1.4609375, + "step": 151 + }, + { + "epoch": 0.672566371681416, + "grad_norm": 37.56740951538086, + "learning_rate": 7e-07, + "logits/chosen": -0.3828125, + "logits/rejected": -0.2275390625, + "logps/chosen": -294.0, + "logps/rejected": -324.0, + "loss": 0.5325, + "loss/chosen-sft": 1.3984375, + "loss/dpo": 0.59375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.73828125, + "rewards/margins": 0.52734375, + "rewards/rejected": -1.265625, + "step": 152 + }, + { + "epoch": 0.6769911504424779, + "grad_norm": 105.4240951538086, + "learning_rate": 7e-07, + "logits/chosen": -0.423828125, + "logits/rejected": -0.54296875, + "logps/chosen": -368.0, + "logps/rejected": -390.0, + "loss": 0.5544, + "loss/chosen-sft": 1.28125, + "loss/dpo": 0.46875, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.77734375, + "rewards/margins": 0.7734375, + "rewards/rejected": -1.546875, + "step": 153 + }, + { + "epoch": 0.6814159292035398, + "grad_norm": 56.64170837402344, + "learning_rate": 7e-07, + "logits/chosen": -0.5390625, + "logits/rejected": -0.6640625, + "logps/chosen": -358.0, + "logps/rejected": -320.0, + "loss": 0.5625, + "loss/chosen-sft": 1.3984375, + "loss/dpo": 0.5859375, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.7578125, + "rewards/margins": 0.453125, + "rewards/rejected": -1.2109375, + "step": 154 + }, + { + "epoch": 0.6858407079646017, + "grad_norm": 22.23441505432129, + "learning_rate": 7e-07, + "logits/chosen": -0.61328125, + "logits/rejected": -0.5703125, + "logps/chosen": -336.0, + "logps/rejected": -314.0, + "loss": 0.5115, + "loss/chosen-sft": 1.453125, + "loss/dpo": 0.52734375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.58984375, + "rewards/margins": 0.5625, + "rewards/rejected": -1.15625, + "step": 155 + }, + { + "epoch": 0.6902654867256637, + "grad_norm": 71.9916000366211, + "learning_rate": 7e-07, + "logits/chosen": -0.69921875, + "logits/rejected": -0.7109375, + "logps/chosen": -354.0, + "logps/rejected": -338.0, + "loss": 0.5227, + "loss/chosen-sft": 1.40625, + "loss/dpo": 0.5546875, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.59375, + "rewards/margins": 0.6171875, + "rewards/rejected": -1.2109375, + "step": 156 + }, + { + "epoch": 0.6946902654867256, + "grad_norm": 11.088499069213867, + "learning_rate": 7e-07, + "logits/chosen": -0.48046875, + "logits/rejected": -0.51171875, + "logps/chosen": -328.0, + "logps/rejected": -358.0, + "loss": 0.5259, + "loss/chosen-sft": 1.328125, + "loss/dpo": 0.60546875, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.84765625, + "rewards/margins": 0.4609375, + "rewards/rejected": -1.3125, + "step": 157 + }, + { + "epoch": 0.6991150442477876, + "grad_norm": 104.77384185791016, + "learning_rate": 7e-07, + "logits/chosen": -0.55859375, + "logits/rejected": -0.7265625, + "logps/chosen": -330.0, + "logps/rejected": -340.0, + "loss": 0.5566, + "loss/chosen-sft": 1.4296875, + "loss/dpo": 0.61328125, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.640625, + "rewards/margins": 0.5390625, + "rewards/rejected": -1.1796875, + "step": 158 + }, + { + "epoch": 0.7035398230088495, + "grad_norm": 87.36003875732422, + "learning_rate": 7e-07, + "logits/chosen": -0.53515625, + "logits/rejected": -0.55078125, + "logps/chosen": -296.0, + "logps/rejected": -342.0, + "loss": 0.5449, + "loss/chosen-sft": 1.359375, + "loss/dpo": 0.58203125, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.8125, + "rewards/margins": 0.65625, + "rewards/rejected": -1.46875, + "step": 159 + }, + { + "epoch": 0.7079646017699115, + "grad_norm": 37.620750427246094, + "learning_rate": 7e-07, + "logits/chosen": -0.455078125, + "logits/rejected": -0.50390625, + "logps/chosen": -376.0, + "logps/rejected": -444.0, + "loss": 0.5303, + "loss/chosen-sft": 1.265625, + "loss/dpo": 0.54296875, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.87890625, + "rewards/margins": 0.62109375, + "rewards/rejected": -1.5, + "step": 160 + }, + { + "epoch": 0.7123893805309734, + "grad_norm": 75.54209899902344, + "learning_rate": 7e-07, + "logits/chosen": -0.546875, + "logits/rejected": -0.64453125, + "logps/chosen": -348.0, + "logps/rejected": -368.0, + "loss": 0.5203, + "loss/chosen-sft": 1.3125, + "loss/dpo": 0.54296875, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.86328125, + "rewards/margins": 0.4921875, + "rewards/rejected": -1.359375, + "step": 161 + }, + { + "epoch": 0.7168141592920354, + "grad_norm": 54.434139251708984, + "learning_rate": 7e-07, + "logits/chosen": -0.60546875, + "logits/rejected": -0.74609375, + "logps/chosen": -298.0, + "logps/rejected": -352.0, + "loss": 0.511, + "loss/chosen-sft": 1.2109375, + "loss/dpo": 0.466796875, + "rewards/accuracies": 0.90625, + "rewards/chosen": -0.58984375, + "rewards/margins": 0.67578125, + "rewards/rejected": -1.265625, + "step": 162 + }, + { + "epoch": 0.7212389380530974, + "grad_norm": 10.78385066986084, + "learning_rate": 7e-07, + "logits/chosen": -0.6875, + "logits/rejected": -0.5859375, + "logps/chosen": -288.0, + "logps/rejected": -306.0, + "loss": 0.5369, + "loss/chosen-sft": 1.2734375, + "loss/dpo": 0.51953125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.72265625, + "rewards/margins": 0.65234375, + "rewards/rejected": -1.375, + "step": 163 + }, + { + "epoch": 0.7256637168141593, + "grad_norm": 46.15651321411133, + "learning_rate": 7e-07, + "logits/chosen": -0.70703125, + "logits/rejected": -0.765625, + "logps/chosen": -364.0, + "logps/rejected": -338.0, + "loss": 0.533, + "loss/chosen-sft": 1.3828125, + "loss/dpo": 0.50390625, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.5703125, + "rewards/margins": 0.6015625, + "rewards/rejected": -1.171875, + "step": 164 + }, + { + "epoch": 0.7300884955752213, + "grad_norm": 76.59629821777344, + "learning_rate": 7e-07, + "logits/chosen": -0.57421875, + "logits/rejected": -0.64453125, + "logps/chosen": -258.0, + "logps/rejected": -324.0, + "loss": 0.5183, + "loss/chosen-sft": 1.1953125, + "loss/dpo": 0.52734375, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.58203125, + "rewards/margins": 0.5390625, + "rewards/rejected": -1.1171875, + "step": 165 + }, + { + "epoch": 0.7345132743362832, + "grad_norm": 73.99260711669922, + "learning_rate": 7e-07, + "logits/chosen": -0.390625, + "logits/rejected": -0.578125, + "logps/chosen": -316.0, + "logps/rejected": -372.0, + "loss": 0.5208, + "loss/chosen-sft": 1.2109375, + "loss/dpo": 0.5, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.8359375, + "rewards/margins": 1.0, + "rewards/rejected": -1.8359375, + "step": 166 + }, + { + "epoch": 0.7389380530973452, + "grad_norm": 47.95753479003906, + "learning_rate": 7e-07, + "logits/chosen": -0.5625, + "logits/rejected": -0.72265625, + "logps/chosen": -356.0, + "logps/rejected": -354.0, + "loss": 0.4956, + "loss/chosen-sft": 1.2890625, + "loss/dpo": 0.6171875, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.66015625, + "rewards/margins": 0.455078125, + "rewards/rejected": -1.1171875, + "step": 167 + }, + { + "epoch": 0.7433628318584071, + "grad_norm": 27.858415603637695, + "learning_rate": 7e-07, + "logits/chosen": -0.7109375, + "logits/rejected": -0.64453125, + "logps/chosen": -336.0, + "logps/rejected": -332.0, + "loss": 0.4971, + "loss/chosen-sft": 1.4375, + "loss/dpo": 0.59375, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.74609375, + "rewards/margins": 0.3515625, + "rewards/rejected": -1.09375, + "step": 168 + }, + { + "epoch": 0.7477876106194691, + "grad_norm": 113.69762420654297, + "learning_rate": 7e-07, + "logits/chosen": -0.79296875, + "logits/rejected": -0.79296875, + "logps/chosen": -420.0, + "logps/rejected": -446.0, + "loss": 0.4646, + "loss/chosen-sft": 1.4453125, + "loss/dpo": 0.439453125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.90625, + "rewards/margins": 0.92578125, + "rewards/rejected": -1.828125, + "step": 169 + }, + { + "epoch": 0.7522123893805309, + "grad_norm": 92.540283203125, + "learning_rate": 7e-07, + "logits/chosen": -0.53125, + "logits/rejected": -0.60546875, + "logps/chosen": -356.0, + "logps/rejected": -412.0, + "loss": 0.5298, + "loss/chosen-sft": 1.3828125, + "loss/dpo": 0.51171875, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.9453125, + "rewards/margins": 0.64453125, + "rewards/rejected": -1.5859375, + "step": 170 + }, + { + "epoch": 0.7566371681415929, + "grad_norm": 34.24614334106445, + "learning_rate": 7e-07, + "logits/chosen": -0.515625, + "logits/rejected": -0.62109375, + "logps/chosen": -336.0, + "logps/rejected": -426.0, + "loss": 0.4983, + "loss/chosen-sft": 1.28125, + "loss/dpo": 0.40625, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.7734375, + "rewards/margins": 1.1328125, + "rewards/rejected": -1.90625, + "step": 171 + }, + { + "epoch": 0.7610619469026548, + "grad_norm": 26.35588264465332, + "learning_rate": 7e-07, + "logits/chosen": -0.7734375, + "logits/rejected": -0.79296875, + "logps/chosen": -358.0, + "logps/rejected": -358.0, + "loss": 0.52, + "loss/chosen-sft": 1.4453125, + "loss/dpo": 0.435546875, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.6875, + "rewards/margins": 0.92578125, + "rewards/rejected": -1.6171875, + "step": 172 + }, + { + "epoch": 0.7654867256637168, + "grad_norm": 70.59893798828125, + "learning_rate": 7e-07, + "logits/chosen": -0.578125, + "logits/rejected": -0.63671875, + "logps/chosen": -336.0, + "logps/rejected": -406.0, + "loss": 0.4802, + "loss/chosen-sft": 1.4609375, + "loss/dpo": 0.451171875, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.83984375, + "rewards/margins": 1.078125, + "rewards/rejected": -1.921875, + "step": 173 + }, + { + "epoch": 0.7699115044247787, + "grad_norm": 35.492210388183594, + "learning_rate": 7e-07, + "logits/chosen": -0.7109375, + "logits/rejected": -0.63671875, + "logps/chosen": -326.0, + "logps/rejected": -396.0, + "loss": 0.4731, + "loss/chosen-sft": 1.4140625, + "loss/dpo": 0.490234375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.69140625, + "rewards/margins": 1.109375, + "rewards/rejected": -1.8046875, + "step": 174 + }, + { + "epoch": 0.7743362831858407, + "grad_norm": 52.300148010253906, + "learning_rate": 7e-07, + "logits/chosen": -0.294921875, + "logits/rejected": -0.431640625, + "logps/chosen": -298.0, + "logps/rejected": -330.0, + "loss": 0.5232, + "loss/chosen-sft": 1.3984375, + "loss/dpo": 0.6796875, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.9453125, + "rewards/margins": 0.330078125, + "rewards/rejected": -1.2734375, + "step": 175 + }, + { + "epoch": 0.7787610619469026, + "grad_norm": 133.416748046875, + "learning_rate": 7e-07, + "logits/chosen": -0.59375, + "logits/rejected": -0.50390625, + "logps/chosen": -330.0, + "logps/rejected": -342.0, + "loss": 0.499, + "loss/chosen-sft": 1.6796875, + "loss/dpo": 0.609375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.8828125, + "rewards/margins": 0.33203125, + "rewards/rejected": -1.21875, + "step": 176 + }, + { + "epoch": 0.7831858407079646, + "grad_norm": 23.086772918701172, + "learning_rate": 7e-07, + "logits/chosen": -0.5625, + "logits/rejected": -0.50390625, + "logps/chosen": -376.0, + "logps/rejected": -506.0, + "loss": 0.4585, + "loss/chosen-sft": 1.421875, + "loss/dpo": 0.392578125, + "rewards/accuracies": 0.90625, + "rewards/chosen": -0.84375, + "rewards/margins": 1.34375, + "rewards/rejected": -2.1875, + "step": 177 + }, + { + "epoch": 0.7876106194690266, + "grad_norm": 206.20298767089844, + "learning_rate": 7e-07, + "logits/chosen": -0.53125, + "logits/rejected": -0.470703125, + "logps/chosen": -276.0, + "logps/rejected": -296.0, + "loss": 0.5459, + "loss/chosen-sft": 1.2890625, + "loss/dpo": 0.5625, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.62890625, + "rewards/margins": 0.58203125, + "rewards/rejected": -1.2109375, + "step": 178 + }, + { + "epoch": 0.7920353982300885, + "grad_norm": 138.48703002929688, + "learning_rate": 7e-07, + "logits/chosen": -0.4375, + "logits/rejected": -0.51171875, + "logps/chosen": -294.0, + "logps/rejected": -364.0, + "loss": 0.4871, + "loss/chosen-sft": 1.328125, + "loss/dpo": 0.470703125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.76171875, + "rewards/margins": 0.7109375, + "rewards/rejected": -1.46875, + "step": 179 + }, + { + "epoch": 0.7964601769911505, + "grad_norm": 7.023651599884033, + "learning_rate": 7e-07, + "logits/chosen": -0.65625, + "logits/rejected": -0.8359375, + "logps/chosen": -338.0, + "logps/rejected": -370.0, + "loss": 0.4651, + "loss/chosen-sft": 1.453125, + "loss/dpo": 0.431640625, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.8203125, + "rewards/margins": 1.015625, + "rewards/rejected": -1.828125, + "step": 180 + }, + { + "epoch": 0.8008849557522124, + "grad_norm": 101.0399169921875, + "learning_rate": 7e-07, + "logits/chosen": -0.51953125, + "logits/rejected": -0.578125, + "logps/chosen": -418.0, + "logps/rejected": -480.0, + "loss": 0.4773, + "loss/chosen-sft": 1.5, + "loss/dpo": 0.431640625, + "rewards/accuracies": 0.8125, + "rewards/chosen": -1.0625, + "rewards/margins": 1.21875, + "rewards/rejected": -2.28125, + "step": 181 + }, + { + "epoch": 0.8053097345132744, + "grad_norm": 42.745174407958984, + "learning_rate": 7e-07, + "logits/chosen": -0.6484375, + "logits/rejected": -0.5859375, + "logps/chosen": -376.0, + "logps/rejected": -418.0, + "loss": 0.4871, + "loss/chosen-sft": 1.5625, + "loss/dpo": 0.45703125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.98828125, + "rewards/margins": 1.046875, + "rewards/rejected": -2.03125, + "step": 182 + }, + { + "epoch": 0.8097345132743363, + "grad_norm": 65.14553833007812, + "learning_rate": 7e-07, + "logits/chosen": -0.6484375, + "logits/rejected": -0.6953125, + "logps/chosen": -298.0, + "logps/rejected": -312.0, + "loss": 0.4883, + "loss/chosen-sft": 1.4921875, + "loss/dpo": 0.5234375, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.8828125, + "rewards/margins": 0.640625, + "rewards/rejected": -1.5234375, + "step": 183 + }, + { + "epoch": 0.8141592920353983, + "grad_norm": 306.6606750488281, + "learning_rate": 7e-07, + "logits/chosen": -0.451171875, + "logits/rejected": -0.5703125, + "logps/chosen": -320.0, + "logps/rejected": -420.0, + "loss": 0.5232, + "loss/chosen-sft": 1.3203125, + "loss/dpo": 0.482421875, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.79296875, + "rewards/margins": 1.2421875, + "rewards/rejected": -2.046875, + "step": 184 + }, + { + "epoch": 0.8185840707964602, + "grad_norm": 20.622095108032227, + "learning_rate": 7e-07, + "logits/chosen": -0.6171875, + "logits/rejected": -0.72265625, + "logps/chosen": -368.0, + "logps/rejected": -396.0, + "loss": 0.5178, + "loss/chosen-sft": 1.59375, + "loss/dpo": 0.490234375, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.76953125, + "rewards/margins": 0.90625, + "rewards/rejected": -1.671875, + "step": 185 + }, + { + "epoch": 0.8230088495575221, + "grad_norm": 15.8814115524292, + "learning_rate": 7e-07, + "logits/chosen": -0.640625, + "logits/rejected": -0.81640625, + "logps/chosen": -350.0, + "logps/rejected": -382.0, + "loss": 0.5017, + "loss/chosen-sft": 1.484375, + "loss/dpo": 0.53515625, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.83203125, + "rewards/margins": 0.8125, + "rewards/rejected": -1.6484375, + "step": 186 + }, + { + "epoch": 0.827433628318584, + "grad_norm": 45.224002838134766, + "learning_rate": 7e-07, + "logits/chosen": -0.478515625, + "logits/rejected": -0.404296875, + "logps/chosen": -294.0, + "logps/rejected": -386.0, + "loss": 0.5002, + "loss/chosen-sft": 1.3046875, + "loss/dpo": 0.453125, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.72265625, + "rewards/margins": 0.83984375, + "rewards/rejected": -1.5625, + "step": 187 + }, + { + "epoch": 0.831858407079646, + "grad_norm": 32.42481994628906, + "learning_rate": 7e-07, + "logits/chosen": -0.38671875, + "logits/rejected": -0.43359375, + "logps/chosen": -336.0, + "logps/rejected": -340.0, + "loss": 0.5051, + "loss/chosen-sft": 1.3359375, + "loss/dpo": 0.5390625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.84765625, + "rewards/margins": 0.5703125, + "rewards/rejected": -1.4140625, + "step": 188 + }, + { + "epoch": 0.8362831858407079, + "grad_norm": 54.047035217285156, + "learning_rate": 7e-07, + "logits/chosen": -0.5234375, + "logits/rejected": -0.37890625, + "logps/chosen": -290.0, + "logps/rejected": -366.0, + "loss": 0.4817, + "loss/chosen-sft": 1.3671875, + "loss/dpo": 0.54296875, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.90234375, + "rewards/margins": 0.8125, + "rewards/rejected": -1.71875, + "step": 189 + }, + { + "epoch": 0.8407079646017699, + "grad_norm": 10.179818153381348, + "learning_rate": 7e-07, + "logits/chosen": -0.41796875, + "logits/rejected": -0.4296875, + "logps/chosen": -296.0, + "logps/rejected": -302.0, + "loss": 0.5049, + "loss/chosen-sft": 1.3984375, + "loss/dpo": 0.55859375, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.796875, + "rewards/margins": 0.482421875, + "rewards/rejected": -1.28125, + "step": 190 + }, + { + "epoch": 0.8451327433628318, + "grad_norm": 63.52503204345703, + "learning_rate": 7e-07, + "logits/chosen": -0.76953125, + "logits/rejected": -0.69921875, + "logps/chosen": -356.0, + "logps/rejected": -416.0, + "loss": 0.5083, + "loss/chosen-sft": 1.46875, + "loss/dpo": 0.486328125, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.93359375, + "rewards/margins": 0.859375, + "rewards/rejected": -1.7890625, + "step": 191 + }, + { + "epoch": 0.8495575221238938, + "grad_norm": 64.59293365478516, + "learning_rate": 7e-07, + "logits/chosen": -0.4609375, + "logits/rejected": -0.474609375, + "logps/chosen": -378.0, + "logps/rejected": -392.0, + "loss": 0.4697, + "loss/chosen-sft": 1.3203125, + "loss/dpo": 0.47265625, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.80078125, + "rewards/margins": 0.87109375, + "rewards/rejected": -1.671875, + "step": 192 + }, + { + "epoch": 0.8539823008849557, + "grad_norm": 48.203311920166016, + "learning_rate": 7e-07, + "logits/chosen": -0.3046875, + "logits/rejected": -0.31640625, + "logps/chosen": -308.0, + "logps/rejected": -390.0, + "loss": 0.5132, + "loss/chosen-sft": 1.171875, + "loss/dpo": 0.4375, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.6796875, + "rewards/margins": 0.9140625, + "rewards/rejected": -1.59375, + "step": 193 + }, + { + "epoch": 0.8584070796460177, + "grad_norm": 86.9441909790039, + "learning_rate": 7e-07, + "logits/chosen": -0.435546875, + "logits/rejected": -0.4609375, + "logps/chosen": -296.0, + "logps/rejected": -358.0, + "loss": 0.4749, + "loss/chosen-sft": 1.2734375, + "loss/dpo": 0.44140625, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.6484375, + "rewards/margins": 0.94921875, + "rewards/rejected": -1.59375, + "step": 194 + }, + { + "epoch": 0.8628318584070797, + "grad_norm": 108.06549835205078, + "learning_rate": 7e-07, + "logits/chosen": -0.4453125, + "logits/rejected": -0.5234375, + "logps/chosen": -342.0, + "logps/rejected": -404.0, + "loss": 0.4771, + "loss/chosen-sft": 1.4453125, + "loss/dpo": 0.65234375, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.9921875, + "rewards/margins": 0.64453125, + "rewards/rejected": -1.640625, + "step": 195 + }, + { + "epoch": 0.8672566371681416, + "grad_norm": 173.70831298828125, + "learning_rate": 7e-07, + "logits/chosen": -0.58984375, + "logits/rejected": -0.62890625, + "logps/chosen": -368.0, + "logps/rejected": -400.0, + "loss": 0.4434, + "loss/chosen-sft": 1.46875, + "loss/dpo": 0.44140625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.9453125, + "rewards/margins": 1.046875, + "rewards/rejected": -1.984375, + "step": 196 + }, + { + "epoch": 0.8716814159292036, + "grad_norm": 41.173553466796875, + "learning_rate": 7e-07, + "logits/chosen": -0.60546875, + "logits/rejected": -0.671875, + "logps/chosen": -392.0, + "logps/rejected": -470.0, + "loss": 0.4729, + "loss/chosen-sft": 1.5, + "loss/dpo": 0.44140625, + "rewards/accuracies": 0.84375, + "rewards/chosen": -1.140625, + "rewards/margins": 1.1015625, + "rewards/rejected": -2.25, + "step": 197 + }, + { + "epoch": 0.8761061946902655, + "grad_norm": 14.614328384399414, + "learning_rate": 7e-07, + "logits/chosen": -0.68359375, + "logits/rejected": -0.76171875, + "logps/chosen": -360.0, + "logps/rejected": -384.0, + "loss": 0.5215, + "loss/chosen-sft": 1.484375, + "loss/dpo": 0.48828125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.046875, + "rewards/margins": 0.91015625, + "rewards/rejected": -1.953125, + "step": 198 + }, + { + "epoch": 0.8805309734513275, + "grad_norm": 86.90143585205078, + "learning_rate": 7e-07, + "logits/chosen": -0.8046875, + "logits/rejected": -0.81640625, + "logps/chosen": -454.0, + "logps/rejected": -482.0, + "loss": 0.541, + "loss/chosen-sft": 1.7578125, + "loss/dpo": 0.55859375, + "rewards/accuracies": 0.78125, + "rewards/chosen": -1.3828125, + "rewards/margins": 1.0625, + "rewards/rejected": -2.4375, + "step": 199 + }, + { + "epoch": 0.8849557522123894, + "grad_norm": 24.392606735229492, + "learning_rate": 7e-07, + "logits/chosen": -0.6171875, + "logits/rejected": -0.73828125, + "logps/chosen": -326.0, + "logps/rejected": -384.0, + "loss": 0.4583, + "loss/chosen-sft": 1.59375, + "loss/dpo": 0.50390625, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.953125, + "rewards/margins": 0.99609375, + "rewards/rejected": -1.953125, + "step": 200 + }, + { + "epoch": 0.8893805309734514, + "grad_norm": 67.55127716064453, + "learning_rate": 7e-07, + "logits/chosen": -0.5078125, + "logits/rejected": -0.5546875, + "logps/chosen": -408.0, + "logps/rejected": -484.0, + "loss": 0.5017, + "loss/chosen-sft": 1.5625, + "loss/dpo": 0.494140625, + "rewards/accuracies": 0.8125, + "rewards/chosen": -1.1328125, + "rewards/margins": 1.25, + "rewards/rejected": -2.375, + "step": 201 + }, + { + "epoch": 0.8938053097345132, + "grad_norm": 35.01057434082031, + "learning_rate": 7e-07, + "logits/chosen": -0.69921875, + "logits/rejected": -0.640625, + "logps/chosen": -336.0, + "logps/rejected": -458.0, + "loss": 0.5005, + "loss/chosen-sft": 1.4609375, + "loss/dpo": 0.478515625, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.75390625, + "rewards/margins": 1.3046875, + "rewards/rejected": -2.0625, + "step": 202 + }, + { + "epoch": 0.8982300884955752, + "grad_norm": 24.339378356933594, + "learning_rate": 7e-07, + "logits/chosen": -0.408203125, + "logits/rejected": -0.435546875, + "logps/chosen": -310.0, + "logps/rejected": -408.0, + "loss": 0.4685, + "loss/chosen-sft": 1.28125, + "loss/dpo": 0.455078125, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.73828125, + "rewards/margins": 1.234375, + "rewards/rejected": -1.9765625, + "step": 203 + }, + { + "epoch": 0.9026548672566371, + "grad_norm": 135.99609375, + "learning_rate": 7e-07, + "logits/chosen": -0.498046875, + "logits/rejected": -0.5625, + "logps/chosen": -314.0, + "logps/rejected": -396.0, + "loss": 0.4561, + "loss/chosen-sft": 1.3125, + "loss/dpo": 0.47265625, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.62109375, + "rewards/margins": 0.9140625, + "rewards/rejected": -1.53125, + "step": 204 + }, + { + "epoch": 0.9070796460176991, + "grad_norm": 137.96900939941406, + "learning_rate": 7e-07, + "logits/chosen": -0.50390625, + "logits/rejected": -0.7109375, + "logps/chosen": -330.0, + "logps/rejected": -432.0, + "loss": 0.5061, + "loss/chosen-sft": 1.3359375, + "loss/dpo": 0.470703125, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.7109375, + "rewards/margins": 0.953125, + "rewards/rejected": -1.6640625, + "step": 205 + }, + { + "epoch": 0.911504424778761, + "grad_norm": 165.43182373046875, + "learning_rate": 7e-07, + "logits/chosen": -0.4609375, + "logits/rejected": -0.3828125, + "logps/chosen": -260.0, + "logps/rejected": -288.0, + "loss": 0.5105, + "loss/chosen-sft": 1.265625, + "loss/dpo": 0.5625, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.609375, + "rewards/margins": 0.494140625, + "rewards/rejected": -1.109375, + "step": 206 + }, + { + "epoch": 0.915929203539823, + "grad_norm": 216.99935913085938, + "learning_rate": 7e-07, + "logits/chosen": -0.43359375, + "logits/rejected": -0.59375, + "logps/chosen": -308.0, + "logps/rejected": -344.0, + "loss": 0.4929, + "loss/chosen-sft": 1.3125, + "loss/dpo": 0.466796875, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.57421875, + "rewards/margins": 0.75390625, + "rewards/rejected": -1.328125, + "step": 207 + }, + { + "epoch": 0.9203539823008849, + "grad_norm": 51.624351501464844, + "learning_rate": 7e-07, + "logits/chosen": -0.5234375, + "logits/rejected": -0.6328125, + "logps/chosen": -304.0, + "logps/rejected": -286.0, + "loss": 0.5669, + "loss/chosen-sft": 1.4140625, + "loss/dpo": 0.59375, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.60546875, + "rewards/margins": 0.314453125, + "rewards/rejected": -0.91796875, + "step": 208 + }, + { + "epoch": 0.9247787610619469, + "grad_norm": 29.490407943725586, + "learning_rate": 7e-07, + "logits/chosen": -0.55859375, + "logits/rejected": -0.5703125, + "logps/chosen": -366.0, + "logps/rejected": -350.0, + "loss": 0.5042, + "loss/chosen-sft": 1.234375, + "loss/dpo": 0.46484375, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.451171875, + "rewards/margins": 0.796875, + "rewards/rejected": -1.25, + "step": 209 + }, + { + "epoch": 0.9292035398230089, + "grad_norm": 159.65997314453125, + "learning_rate": 7e-07, + "logits/chosen": -0.67578125, + "logits/rejected": -0.55859375, + "logps/chosen": -340.0, + "logps/rejected": -346.0, + "loss": 0.5374, + "loss/chosen-sft": 1.3125, + "loss/dpo": 0.474609375, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.6328125, + "rewards/margins": 0.83984375, + "rewards/rejected": -1.46875, + "step": 210 + }, + { + "epoch": 0.9336283185840708, + "grad_norm": 22.276451110839844, + "learning_rate": 7e-07, + "logits/chosen": -0.41015625, + "logits/rejected": -0.46875, + "logps/chosen": -308.0, + "logps/rejected": -324.0, + "loss": 0.5066, + "loss/chosen-sft": 1.359375, + "loss/dpo": 0.57421875, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.78515625, + "rewards/margins": 0.53515625, + "rewards/rejected": -1.3203125, + "step": 211 + }, + { + "epoch": 0.9380530973451328, + "grad_norm": 62.60710525512695, + "learning_rate": 7e-07, + "logits/chosen": -0.68359375, + "logits/rejected": -0.76171875, + "logps/chosen": -378.0, + "logps/rejected": -396.0, + "loss": 0.4917, + "loss/chosen-sft": 1.5, + "loss/dpo": 0.578125, + "rewards/accuracies": 0.71875, + "rewards/chosen": -0.94921875, + "rewards/margins": 0.8984375, + "rewards/rejected": -1.8515625, + "step": 212 + }, + { + "epoch": 0.9424778761061947, + "grad_norm": 183.3363037109375, + "learning_rate": 7e-07, + "logits/chosen": -0.62109375, + "logits/rejected": -0.71875, + "logps/chosen": -328.0, + "logps/rejected": -326.0, + "loss": 0.426, + "loss/chosen-sft": 1.375, + "loss/dpo": 0.47265625, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.80078125, + "rewards/margins": 0.8046875, + "rewards/rejected": -1.609375, + "step": 213 + }, + { + "epoch": 0.9469026548672567, + "grad_norm": 182.4455108642578, + "learning_rate": 7e-07, + "logits/chosen": -0.62890625, + "logits/rejected": -0.57421875, + "logps/chosen": -358.0, + "logps/rejected": -480.0, + "loss": 0.5305, + "loss/chosen-sft": 1.4765625, + "loss/dpo": 0.5234375, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.80078125, + "rewards/margins": 1.609375, + "rewards/rejected": -2.40625, + "step": 214 + }, + { + "epoch": 0.9513274336283186, + "grad_norm": 112.33076477050781, + "learning_rate": 7e-07, + "logits/chosen": -0.52734375, + "logits/rejected": -0.58203125, + "logps/chosen": -344.0, + "logps/rejected": -448.0, + "loss": 0.4475, + "loss/chosen-sft": 1.3359375, + "loss/dpo": 0.3359375, + "rewards/accuracies": 0.90625, + "rewards/chosen": -0.61328125, + "rewards/margins": 1.4453125, + "rewards/rejected": -2.0625, + "step": 215 + }, + { + "epoch": 0.9557522123893806, + "grad_norm": 125.87716674804688, + "learning_rate": 7e-07, + "logits/chosen": -0.4296875, + "logits/rejected": -0.47265625, + "logps/chosen": -314.0, + "logps/rejected": -462.0, + "loss": 0.4885, + "loss/chosen-sft": 1.5078125, + "loss/dpo": 0.365234375, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.84375, + "rewards/margins": 1.5546875, + "rewards/rejected": -2.390625, + "step": 216 + }, + { + "epoch": 0.9601769911504425, + "grad_norm": 159.3242950439453, + "learning_rate": 7e-07, + "logits/chosen": -0.53515625, + "logits/rejected": -0.53125, + "logps/chosen": -330.0, + "logps/rejected": -398.0, + "loss": 0.4597, + "loss/chosen-sft": 1.34375, + "loss/dpo": 0.4140625, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.765625, + "rewards/margins": 1.171875, + "rewards/rejected": -1.9453125, + "step": 217 + }, + { + "epoch": 0.9646017699115044, + "grad_norm": 36.168601989746094, + "learning_rate": 7e-07, + "logits/chosen": -0.451171875, + "logits/rejected": -0.44140625, + "logps/chosen": -326.0, + "logps/rejected": -366.0, + "loss": 0.4858, + "loss/chosen-sft": 1.3203125, + "loss/dpo": 0.52734375, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.953125, + "rewards/margins": 0.94921875, + "rewards/rejected": -1.8984375, + "step": 218 + }, + { + "epoch": 0.9690265486725663, + "grad_norm": 19.605365753173828, + "learning_rate": 7e-07, + "logits/chosen": -0.4921875, + "logits/rejected": -0.55859375, + "logps/chosen": -284.0, + "logps/rejected": -370.0, + "loss": 0.4822, + "loss/chosen-sft": 1.3046875, + "loss/dpo": 0.40234375, + "rewards/accuracies": 0.84375, + "rewards/chosen": -0.90234375, + "rewards/margins": 1.046875, + "rewards/rejected": -1.9453125, + "step": 219 + }, + { + "epoch": 0.9734513274336283, + "grad_norm": 143.3219451904297, + "learning_rate": 7e-07, + "logits/chosen": -0.37890625, + "logits/rejected": -0.4765625, + "logps/chosen": -366.0, + "logps/rejected": -510.0, + "loss": 0.509, + "loss/chosen-sft": 1.515625, + "loss/dpo": 0.54296875, + "rewards/accuracies": 0.6875, + "rewards/chosen": -1.2734375, + "rewards/margins": 1.03125, + "rewards/rejected": -2.296875, + "step": 220 + }, + { + "epoch": 0.9778761061946902, + "grad_norm": 268.8928527832031, + "learning_rate": 7e-07, + "logits/chosen": -0.392578125, + "logits/rejected": -0.30859375, + "logps/chosen": -249.0, + "logps/rejected": -414.0, + "loss": 0.5212, + "loss/chosen-sft": 1.3984375, + "loss/dpo": 0.404296875, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.78125, + "rewards/margins": 1.53125, + "rewards/rejected": -2.3125, + "step": 221 + }, + { + "epoch": 0.9823008849557522, + "grad_norm": 295.1648864746094, + "learning_rate": 7e-07, + "logits/chosen": -0.56640625, + "logits/rejected": -0.6328125, + "logps/chosen": -368.0, + "logps/rejected": -496.0, + "loss": 0.5183, + "loss/chosen-sft": 1.5, + "loss/dpo": 0.5, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.265625, + "rewards/margins": 0.96875, + "rewards/rejected": -2.234375, + "step": 222 + }, + { + "epoch": 0.9867256637168141, + "grad_norm": 21.13129425048828, + "learning_rate": 7e-07, + "logits/chosen": -0.498046875, + "logits/rejected": -0.6328125, + "logps/chosen": -346.0, + "logps/rejected": -374.0, + "loss": 0.4485, + "loss/chosen-sft": 1.5234375, + "loss/dpo": 0.5234375, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.0390625, + "rewards/margins": 0.984375, + "rewards/rejected": -2.03125, + "step": 223 + }, + { + "epoch": 0.9911504424778761, + "grad_norm": 199.18679809570312, + "learning_rate": 7e-07, + "logits/chosen": -0.62890625, + "logits/rejected": -0.625, + "logps/chosen": -338.0, + "logps/rejected": -406.0, + "loss": 0.4338, + "loss/chosen-sft": 1.484375, + "loss/dpo": 0.5, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.109375, + "rewards/margins": 0.94140625, + "rewards/rejected": -2.046875, + "step": 224 + }, + { + "epoch": 0.995575221238938, + "grad_norm": 238.7730255126953, + "learning_rate": 7e-07, + "logits/chosen": -0.478515625, + "logits/rejected": -0.30078125, + "logps/chosen": -300.0, + "logps/rejected": -416.0, + "loss": 0.4624, + "loss/chosen-sft": 1.4765625, + "loss/dpo": 0.451171875, + "rewards/accuracies": 0.78125, + "rewards/chosen": -0.9609375, + "rewards/margins": 1.2890625, + "rewards/rejected": -2.25, + "step": 225 + }, + { + "epoch": 1.0, + "grad_norm": 94.32278442382812, + "learning_rate": 7e-07, + "logits/chosen": -0.482421875, + "logits/rejected": -0.55078125, + "logps/chosen": -312.0, + "logps/rejected": -424.0, + "loss": 0.4558, + "loss/chosen-sft": 1.359375, + "loss/dpo": 0.478515625, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.828125, + "rewards/margins": 1.109375, + "rewards/rejected": -1.9296875, + "step": 226 + }, + { + "epoch": 1.0, + "step": 226, + "total_flos": 0.0, + "train_loss": 0.5733826223727876, + "train_runtime": 2164.3688, + "train_samples_per_second": 26.647, + "train_steps_per_second": 0.104 + } + ], + "logging_steps": 1, + "max_steps": 226, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..b0303f7 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:151f33baf927924d9aac9f4a95f3d9e3c3f97c461aab35d45e6b66edc8d8d820 +size 9016