From cf60694ff3b738cbb556ee43b981110f8997d47d Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Wed, 3 Jun 2026 22:08:02 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: jackf857/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0 Source: Original Platform --- .gitattributes | 36 + README.md | 78 + all_results.json | 23 + config.json | 29 + eval_results.json | 17 + generation_config.json | 9 + margin_logs/margins.jsonl | 661 ++ model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 +++++ train_results.json | 9 + trainer_state.json | 12653 +++++++++++++++++++++++++++++ 20 files changed, 15924 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 margin_logs/margins.jsonl create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..de405dc --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-hh-harmless-4xh200 +tags: +- alignment-handbook +- new-dpo +- generated_from_trainer +datasets: +- Anthropic/hh-rlhf +model-index: +- name: llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0 + results: [] +--- + + + +# llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0 + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-hh-harmless-4xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-hh-harmless-4xh200) on the Anthropic/hh-rlhf dataset. +It achieves the following results on the evaluation set: +- Loss: 0.5680 +- Fcm Dpo/beta: 0.6569 +- Margin Dpo/margin Mean: 1.3983 +- Margin Dpo/margin Std: 2.4811 +- Logps/chosen: -77.8234 +- Logps/rejected: -83.9112 +- Logps/ref Chosen: -74.8595 +- Logps/ref Rejected: -79.5490 +- Logits/chosen: 0.1962 +- Logits/rejected: 0.1585 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 4 +- gradient_accumulation_steps: 2 +- total_train_batch_size: 64 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Fcm Dpo/beta | Margin Dpo/margin Mean | Margin Dpo/margin Std | Logps/chosen | Logps/rejected | Logps/ref Chosen | Logps/ref Rejected | Logits/chosen | Logits/rejected | +|:-------------:|:------:|:----:|:---------------:|:------------:|:----------------------:|:---------------------:|:------------:|:--------------:|:----------------:|:------------------:|:-------------:|:---------------:| +| 1.0309 | 0.3023 | 200 | 0.6114 | 1.5800 | 0.5045 | 1.0288 | -75.8633 | -81.0573 | -74.8595 | -79.5490 | 0.1685 | 0.1337 | +| 1.0799 | 0.6047 | 400 | 0.5736 | 0.7420 | 1.1067 | 2.0391 | -77.2471 | -83.0433 | -74.8595 | -79.5490 | 0.2053 | 0.1675 | +| 1.0999 | 0.9070 | 600 | 0.5680 | 0.6569 | 1.3983 | 2.4811 | -77.8234 | -83.9112 | -74.8595 | -79.5490 | 0.1962 | 0.1585 | + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..377d5fc --- /dev/null +++ b/all_results.json @@ -0,0 +1,23 @@ +{ + "epoch": 0.999244142101285, + "eval_fcm_dpo/beta": 0.567912757396698, + "eval_logits/chosen": 0.20129863917827606, + "eval_logits/rejected": 0.16339144110679626, + "eval_logps/chosen": -77.83040618896484, + "eval_logps/ref_chosen": -74.85946655273438, + "eval_logps/ref_rejected": -79.54898834228516, + "eval_logps/rejected": -83.91458892822266, + "eval_loss": 0.5609715580940247, + "eval_margin_dpo/margin_mean": 1.3946577310562134, + "eval_margin_dpo/margin_std": 2.489074230194092, + "eval_runtime": 37.9328, + "eval_samples": 2303, + "eval_samples_per_second": 60.713, + "eval_steps_per_second": 1.898, + "total_flos": 0.0, + "train_loss": 1.1173522615757363, + "train_runtime": 1752.852, + "train_samples": 42336, + "train_samples_per_second": 24.153, + "train_steps_per_second": 0.377 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..f12b4e8 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,17 @@ +{ + "epoch": 0.999244142101285, + "eval_fcm_dpo/beta": 0.567912757396698, + "eval_logits/chosen": 0.20129863917827606, + "eval_logits/rejected": 0.16339144110679626, + "eval_logps/chosen": -77.83040618896484, + "eval_logps/ref_chosen": -74.85946655273438, + "eval_logps/ref_rejected": -79.54898834228516, + "eval_logps/rejected": -83.91458892822266, + "eval_loss": 0.5609715580940247, + "eval_margin_dpo/margin_mean": 1.3946577310562134, + "eval_margin_dpo/margin_std": 2.489074230194092, + "eval_runtime": 37.9328, + "eval_samples": 2303, + "eval_samples_per_second": 60.713, + "eval_steps_per_second": 1.898 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/margin_logs/margins.jsonl b/margin_logs/margins.jsonl new file mode 100644 index 0000000..6263df5 --- /dev/null +++ b/margin_logs/margins.jsonl @@ -0,0 +1,661 @@ +{"epoch": 0.0, "step": 1, "batch_size": 64, "mean": -0.0013527870178222656, "std": 0.2564818859100342, "min": -0.736083984375, "p10": -0.3432229995727539, "median": 0.038166046142578125, "p90": 0.29227676391601565, "max": 0.645111083984375, "pos_frac": 0.578125, "sample": [0.1120758056640625, 0.12518310546875, 0.31621551513671875, 0.13765716552734375, -0.12592506408691406, 0.23141098022460938, -0.21887779235839844, 0.21950721740722656, 0.04480743408203125, 0.020877838134765625, 0.0570220947265625, 0.058269500732421875, -0.4338226318359375, -0.030628204345703125, 0.645111083984375, -0.395477294921875, 0.09050941467285156, 0.0007190704345703125, -0.34615325927734375, 0.016077041625976562, -0.33638572692871094, 0.293853759765625, 0.17610931396484375, 0.22386932373046875, 0.21470260620117188, -0.08536529541015625, 0.0907745361328125, -0.03816986083984375, 0.39190101623535156, 0.16336441040039062, 0.08024787902832031, -0.031158447265625, 0.08477020263671875, 0.002460479736328125, -0.242034912109375, 0.07232666015625, -0.60186767578125, 0.20531463623046875, 0.155731201171875, -0.14299774169921875, -0.25698089599609375, 0.12331962585449219, -0.26497650146484375, 0.15140533447265625, -0.0920257568359375, -0.18599319458007812, 0.19028091430664062, 0.2496490478515625, 0.42162322998046875, 0.17873382568359375, -0.1525421142578125, -0.4972076416015625, 0.32010650634765625, -0.10365867614746094, -0.233795166015625, -0.19828224182128906, -0.4018898010253906, -0.13407135009765625, -0.09596633911132812, 0.031524658203125, 0.28859710693359375, -0.192962646484375, -0.736083984375, 0.3026123046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000001.npy"} +{"epoch": 0.0015117157974300832, "step": 2, "batch_size": 64, "mean": 0.03744968771934509, "std": 0.2875921130180359, "min": -0.7604827880859375, "p10": -0.2812448501586914, "median": 0.03963661193847656, "p90": 0.3654294967651367, "max": 0.8134727478027344, "pos_frac": 0.5625, "sample": [0.30594635009765625, -0.24289894104003906, -0.11509323120117188, -0.13417816162109375, 0.06942558288574219, 0.36568641662597656, -0.14640045166015625, 0.1497650146484375, 0.30261993408203125, 0.10124588012695312, 0.13028717041015625, -0.0031890869140625, 0.0361480712890625, 0.5662612915039062, 0.09694290161132812, -0.01091766357421875, 0.1128997802734375, 0.0411834716796875, -0.21860504150390625, -0.1236419677734375, -0.08812713623046875, 0.10360527038574219, 0.1790008544921875, -0.5114288330078125, 0.3056755065917969, -0.14553451538085938, 0.28168487548828125, 0.26990509033203125, 0.1686878204345703, 0.038089752197265625, 0.19541168212890625, -0.10783576965332031, -0.2644004821777344, -0.19707489013671875, -0.140472412109375, 0.1349811553955078, 0.19672012329101562, -0.0714111328125, 0.53369140625, 0.1271820068359375, 0.8134727478027344, 0.2990264892578125, -0.7604827880859375, -0.08274078369140625, 0.05890846252441406, 0.029361724853515625, 0.4510040283203125, -0.1599273681640625, -0.29346656799316406, 0.10005569458007812, -0.27509117126464844, -0.1937713623046875, 0.19167327880859375, 0.28173065185546875, -0.09406471252441406, -0.3380699157714844, -0.29186248779296875, 0.36483001708984375, 0.009979248046875, 0.44391632080078125, -0.126708984375, -0.6550216674804688, 0.6160736083984375, -0.28388214111328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000002.npy"} +{"epoch": 0.0030234315948601664, "step": 3, "batch_size": 64, "mean": 0.0022667646408081055, "std": 0.2785518169403076, "min": -0.7256278991699219, "p10": -0.33563461303710934, "median": 0.0020198822021484375, "p90": 0.30558624267578127, "max": 0.7148895263671875, "pos_frac": 0.5, "sample": [-0.1335296630859375, -0.23439788818359375, -0.26641845703125, -0.3172454833984375, 0.2731170654296875, 0.147308349609375, -0.21738815307617188, 0.19343185424804688, 0.59539794921875, -0.009796142578125, -0.11566543579101562, -0.326934814453125, 0.3101043701171875, -0.06134033203125, -0.18508148193359375, 0.19443511962890625, -0.02222442626953125, -0.1602325439453125, -0.038330078125, -0.33936309814453125, -0.1295623779296875, 0.04879951477050781, -0.34272003173828125, 0.06082916259765625, -0.034725189208984375, 0.2518901824951172, 0.2950439453125, 0.207366943359375, 0.045196533203125, -0.107879638671875, 0.09398078918457031, -0.7256278991699219, -0.4303436279296875, -0.17113494873046875, -0.13869476318359375, -0.13547706604003906, -0.38543701171875, 0.1047821044921875, 0.12008476257324219, 0.5058517456054688, 0.5989532470703125, -0.316436767578125, -0.2975616455078125, 0.7148895263671875, 0.11515045166015625, 0.41417694091796875, -0.11043930053710938, 0.2758960723876953, 0.10410690307617188, -0.05535125732421875, -0.35755157470703125, 0.2571868896484375, 0.21576309204101562, 0.021724700927734375, -0.11432647705078125, 0.07358551025390625, 0.27191162109375, 0.0352325439453125, -0.08795166015625, 0.3825531005859375, 0.013835906982421875, 0.02032470703125, -0.5776214599609375, 0.1289520263671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000003.npy"} +{"epoch": 0.0045351473922902496, "step": 4, "batch_size": 64, "mean": -0.031194984912872314, "std": 0.34349071979522705, "min": -0.964447021484375, "p10": -0.5006227493286133, "median": 0.039707183837890625, "p90": 0.27875442504882814, "max": 0.912384033203125, "pos_frac": 0.53125, "sample": [-0.3875732421875, 0.18585205078125, 0.05181121826171875, -0.771881103515625, -0.01505279541015625, -0.4566497802734375, 0.15564727783203125, 0.1598663330078125, 0.10662460327148438, 0.198028564453125, 0.14223861694335938, 0.17592430114746094, 0.2145233154296875, -0.5481643676757812, -0.2858314514160156, -0.106475830078125, 0.2795257568359375, -0.2725715637207031, 0.49642181396484375, -0.64630126953125, -0.02791595458984375, -0.10422897338867188, 0.31833648681640625, -0.25714111328125, 0.24838638305664062, 0.0796966552734375, 0.2517890930175781, 0.24019432067871094, -0.11464309692382812, 0.11104583740234375, 0.912384033203125, -0.1468963623046875, 0.3227081298828125, -0.00308990478515625, -0.6574172973632812, 0.1010284423828125, 0.1962890625, -0.29535675048828125, -0.964447021484375, -0.15958404541015625, 0.127288818359375, -0.17996978759765625, 0.20616912841796875, -0.44506072998046875, 0.00177764892578125, 0.5060577392578125, -0.7436428070068359, -0.14168167114257812, -0.3272247314453125, 0.10292816162109375, 0.27695465087890625, -0.44652557373046875, -0.200469970703125, 0.13289260864257812, 0.169677734375, -0.244140625, -0.00115966796875, 0.5714302062988281, 0.2568092346191406, -0.5194683074951172, 0.0732421875, 0.0276031494140625, 0.10190963745117188, -0.0289764404296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000004.npy"} +{"epoch": 0.006046863189720333, "step": 5, "batch_size": 64, "mean": -0.02210336923599243, "std": 0.28807249665260315, "min": -0.5874366760253906, "p10": -0.3598323822021484, "median": -0.021406173706054688, "p90": 0.30419540405273454, "max": 0.9056777954101562, "pos_frac": 0.46875, "sample": [-0.18388938903808594, 0.06473541259765625, 0.07703399658203125, -0.233123779296875, 0.0389404296875, 0.322265625, -0.010120391845703125, -0.10947418212890625, 0.0882720947265625, 0.238311767578125, 0.49353790283203125, 0.0670166015625, -0.3456878662109375, -0.012369155883789062, -0.5874366760253906, 0.10887908935546875, 0.165496826171875, -0.1350116729736328, -0.34661102294921875, -0.11896896362304688, 0.13079452514648438, 0.05218696594238281, 0.0621795654296875, 0.0640411376953125, -0.49609375, 0.2327880859375, -0.4980010986328125, 0.134674072265625, 0.4241943359375, -0.049442291259765625, 0.26203155517578125, -0.3637542724609375, -0.188934326171875, 0.18089866638183594, 0.2332916259765625, -0.3506813049316406, -0.128509521484375, -0.1629180908203125, 0.14988136291503906, -0.15407752990722656, -0.051189422607421875, -0.3643951416015625, 0.03202056884765625, -0.23852920532226562, 0.9056777954101562, -0.2521038055419922, 0.2529449462890625, -0.1923370361328125, 0.20958709716796875, -0.5314788818359375, -0.030443191528320312, 0.32723236083984375, 0.20049667358398438, 0.37860870361328125, 0.72650146484375, -0.2650871276855469, -0.2344207763671875, -0.263336181640625, -0.13327789306640625, -0.26276397705078125, -0.2266521453857422, -0.16651153564453125, -0.39785003662109375, 0.046344757080078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000005.npy"} +{"epoch": 0.007558578987150416, "step": 6, "batch_size": 64, "mean": -0.07127270102500916, "std": 0.2968938946723938, "min": -0.787506103515625, "p10": -0.5217666625976562, "median": -0.052906036376953125, "p90": 0.21348171234130864, "max": 0.9662628173828125, "pos_frac": 0.375, "sample": [-0.16788101196289062, -0.06393623352050781, -0.16868209838867188, 0.11241912841796875, -0.17227935791015625, -0.045948028564453125, 0.016468048095703125, -0.3143310546875, -0.31369781494140625, -0.2483692169189453, 0.13115692138671875, -0.54339599609375, -0.6362876892089844, 0.15639495849609375, -0.158203125, 0.0072784423828125, 0.14914703369140625, 0.3206024169921875, -0.0978546142578125, -0.6529979705810547, -0.0256805419921875, -0.5755844116210938, -0.09212684631347656, 0.18206024169921875, 0.17657470703125, -0.4712982177734375, -0.054779052734375, 0.9662628173828125, -0.035007476806640625, 0.22343063354492188, -0.029964447021484375, 0.16070556640625, 0.1797332763671875, -0.1494293212890625, -0.5858650207519531, 0.2191162109375, -0.2789649963378906, -0.12911224365234375, -0.15825653076171875, -0.01615142822265625, -0.05103302001953125, -0.18470382690429688, 0.11974334716796875, 0.15742111206054688, -0.17621612548828125, -0.135162353515625, 0.5350341796875, -0.07584762573242188, -0.27907562255859375, 0.32977294921875, -0.0283355712890625, 0.01799774169921875, -0.6747894287109375, -0.3296794891357422, 0.15123748779296875, -0.061305999755859375, -0.19736671447753906, -0.28319549560546875, -0.787506103515625, 0.15460205078125, -0.044891357421875, 0.0092010498046875, 0.2570457458496094, 0.2003345489501953], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000006.npy"} +{"epoch": 0.009070294784580499, "step": 7, "batch_size": 64, "mean": -0.005805850028991699, "std": 0.28707170486450195, "min": -0.8571624755859375, "p10": -0.39522438049316405, "median": -0.00482177734375, "p90": 0.3309381484985352, "max": 0.7373809814453125, "pos_frac": 0.484375, "sample": [0.08347320556640625, -0.4468116760253906, -0.0419921875, 0.0078029632568359375, 0.20220947265625, 0.28870201110839844, 0.3243999481201172, 0.512542724609375, -0.2318878173828125, -0.000751495361328125, -0.15338897705078125, -0.08062934875488281, 0.3780059814453125, 0.19078826904296875, -0.12360954284667969, -0.021326065063476562, 0.16266250610351562, 0.32132720947265625, 0.033916473388671875, -0.45380401611328125, -0.09845924377441406, -0.22510528564453125, -0.395660400390625, 0.26678466796875, 0.2750701904296875, 0.12055587768554688, -0.08890151977539062, -0.019256591796875, -0.040679931640625, 0.015888214111328125, -0.1197662353515625, -0.13890838623046875, -0.29454803466796875, 0.13608932495117188, -0.04583740234375, 0.34487152099609375, 0.11080551147460938, 0.16455459594726562, 0.13422012329101562, -0.10843467712402344, -0.008892059326171875, -0.2325267791748047, 0.209716796875, 0.7373809814453125, -0.4583740234375, -0.8571624755859375, -0.3942070007324219, 0.2057170867919922, 0.338043212890625, -0.0355224609375, -0.11462593078613281, -0.4972114562988281, 0.27088165283203125, 0.04410552978515625, 0.16833877563476562, -0.5021438598632812, -0.067779541015625, 0.05416107177734375, -0.37652587890625, -0.3345947265625, 0.051300048828125, 0.333740234375, 0.4871826171875, -0.33748817443847656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000007.npy"} +{"epoch": 0.010582010582010581, "step": 8, "batch_size": 64, "mean": -0.02905610203742981, "std": 0.33728453516960144, "min": -0.8250236511230469, "p10": -0.38114471435546876, "median": -0.08983325958251953, "p90": 0.37100143432617194, "max": 0.9498672485351562, "pos_frac": 0.40625, "sample": [-0.10104560852050781, -0.1317901611328125, 0.476593017578125, -0.34343719482421875, -0.6429367065429688, -0.126922607421875, 0.071990966796875, 0.552398681640625, -0.4311695098876953, -0.17474365234375, -0.3572845458984375, 0.1612396240234375, -0.11370468139648438, -0.3422126770019531, 0.8626174926757812, 0.8192062377929688, 0.9498672485351562, 0.030076980590820312, -0.2663459777832031, 0.048004150390625, -0.07599830627441406, -0.5549888610839844, -0.09139251708984375, 0.073394775390625, -0.08827400207519531, -0.1902179718017578, -0.05792045593261719, -0.3406715393066406, -0.06482315063476562, -0.1393413543701172, 0.38204193115234375, 0.1881561279296875, -0.09346771240234375, -0.187347412109375, -0.17213821411132812, 0.1107330322265625, 0.2589569091796875, 0.30562591552734375, 0.1594390869140625, 0.0598602294921875, 0.1646575927734375, -0.1012115478515625, 0.3555030822753906, -0.3703460693359375, -0.14794540405273438, 0.18465423583984375, 0.32423973083496094, -0.09828567504882812, -0.385772705078125, -0.07151031494140625, -0.11908531188964844, 0.18192481994628906, -0.8250236511230469, -0.24076080322265625, -0.5899505615234375, 0.058803558349609375, -0.13187789916992188, -0.29985809326171875, -0.13731002807617188, 0.3776435852050781, 0.000476837158203125, 0.15627288818359375, -0.5504074096679688, -0.016448974609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000008.npy"} +{"epoch": 0.012093726379440665, "step": 9, "batch_size": 64, "mean": 0.02298620343208313, "std": 0.30197957158088684, "min": -0.63385009765625, "p10": -0.36639518737792964, "median": 0.02261066436767578, "p90": 0.39187774658203134, "max": 0.7379074096679688, "pos_frac": 0.546875, "sample": [-0.34169769287109375, 0.18222808837890625, 0.07757949829101562, 0.196319580078125, -0.2557506561279297, -0.11728286743164062, -0.06576919555664062, -0.3958091735839844, -0.2425079345703125, -0.0695953369140625, 0.045013427734375, -0.06219673156738281, 0.7379074096679688, -0.18547821044921875, 0.564788818359375, -0.6310615539550781, -0.17768096923828125, -0.137359619140625, -0.0497283935546875, 0.06577873229980469, 0.1320648193359375, 0.014225006103515625, 0.3216094970703125, 0.4574737548828125, -0.5373401641845703, 0.0106048583984375, 0.11235809326171875, 0.33521270751953125, 0.023271560668945312, -0.3322410583496094, 0.082977294921875, -0.3363456726074219, 0.47551727294921875, 0.7282867431640625, 0.134552001953125, -0.23248291015625, 0.061492919921875, -0.23020172119140625, -0.4615936279296875, 0.39891815185546875, 0.37545013427734375, -0.3822746276855469, 0.215484619140625, 0.5519866943359375, -0.009235382080078125, 0.277557373046875, 0.06859779357910156, 0.141815185546875, -0.0933074951171875, -0.3769798278808594, 0.25414276123046875, -0.01531219482421875, 0.02194976806640625, 0.2739410400390625, 0.08419036865234375, 0.2799224853515625, 0.08056640625, -0.053466796875, 0.28426361083984375, -0.07065391540527344, -0.24896240234375, -0.63385009765625, -0.14089584350585938, 0.290130615234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000009.npy"} +{"epoch": 0.013605442176870748, "step": 10, "batch_size": 64, "mean": 0.01253288984298706, "std": 0.3735847473144531, "min": -0.886688232421875, "p10": -0.37908458709716797, "median": -0.0025177001953125, "p90": 0.502562713623047, "max": 0.9697265625, "pos_frac": 0.484375, "sample": [-0.709442138671875, -0.08751678466796875, -0.0773162841796875, -0.1573772430419922, 0.20557403564453125, -0.2420501708984375, -0.3807239532470703, 0.22370147705078125, -0.886688232421875, -0.03076934814453125, 0.117401123046875, 0.3504600524902344, -0.2128143310546875, 0.26593017578125, 0.2321624755859375, -0.0008087158203125, 0.1490955352783203, -0.49810791015625, 0.9697265625, 0.10160064697265625, 0.060550689697265625, 0.30670166015625, 0.1109466552734375, 0.8077239990234375, 0.1370830535888672, -0.2616119384765625, -0.0042266845703125, 0.2336578369140625, -0.3464698791503906, -0.24466705322265625, 0.04512786865234375, 0.1611175537109375, -0.48210906982421875, -0.28047943115234375, 0.10541152954101562, 0.38990020751953125, -0.8483772277832031, 0.05847740173339844, -0.03951263427734375, 0.5450630187988281, -0.15266990661621094, -0.27276611328125, 0.04681396484375, -0.0853271484375, -0.2793159484863281, -0.1605682373046875, -0.4781532287597656, -0.2285308837890625, 0.75244140625, -0.008697509765625, -0.2059173583984375, 0.6060714721679688, 0.02350616455078125, 0.2845172882080078, -0.20197677612304688, -0.06262969970703125, 0.4522705078125, 0.10108757019042969, 0.19934844970703125, -0.04846954345703125, 0.5241165161132812, -0.3752593994140625, 0.9027328491210938, -0.316864013671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000010.npy"} +{"epoch": 0.015117157974300832, "step": 11, "batch_size": 64, "mean": 0.04408392310142517, "std": 0.29245850443840027, "min": -0.9148406982421875, "p10": -0.28933143615722656, "median": 0.05824089050292969, "p90": 0.4254898071289064, "max": 0.6855010986328125, "pos_frac": 0.578125, "sample": [-0.01363372802734375, 0.19062423706054688, 0.07764053344726562, 0.06755828857421875, -0.2871417999267578, 0.4451560974121094, 0.21708297729492188, 0.0100555419921875, 0.023193359375, -0.074127197265625, 0.19326019287109375, 0.35623931884765625, -0.04730987548828125, 0.19104766845703125, -0.2902698516845703, -0.5032463073730469, 0.6855010986328125, 0.1173095703125, -0.4266204833984375, 0.054931640625, 0.10660171508789062, 0.3958282470703125, 0.07830047607421875, -0.06064605712890625, -0.1613025665283203, -0.04669189453125, -0.04144287109375, 0.30437660217285156, 0.05863761901855469, 0.13625335693359375, -0.0943145751953125, 0.24970245361328125, -0.11304473876953125, 0.44078826904296875, -0.22788429260253906, 0.33277130126953125, -0.128204345703125, 0.15573883056640625, 0.5170135498046875, 0.23220062255859375, -0.9148406982421875, -0.03620147705078125, -0.5359039306640625, -0.35938262939453125, 0.047214508056640625, -0.6865081787109375, -0.01142120361328125, 0.05784416198730469, 0.1393585205078125, 0.6174697875976562, 0.37410736083984375, -0.12860107421875, -0.046291351318359375, 0.13743972778320312, 0.165191650390625, -0.015045166015625, -0.216339111328125, 0.08874893188476562, -0.1241607666015625, -0.0081634521484375, 0.13520050048828125, 0.07161712646484375, 0.5099029541015625, 0.438201904296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000011.npy"} +{"epoch": 0.016628873771730914, "step": 12, "batch_size": 64, "mean": -0.013815999031066895, "std": 0.2774692177772522, "min": -0.802093505859375, "p10": -0.36778793334960935, "median": 0.010743141174316406, "p90": 0.30760974884033204, "max": 0.6596755981445312, "pos_frac": 0.546875, "sample": [0.012514114379882812, -0.24437713623046875, 0.1469707489013672, 0.09990882873535156, -0.15584945678710938, -0.26636505126953125, 0.04360771179199219, 0.1885986328125, 0.052440643310546875, -0.049404144287109375, -0.35149383544921875, 0.0651092529296875, -0.549591064453125, 0.571685791015625, -0.061748504638671875, -0.38784027099609375, -0.3747711181640625, -0.49309539794921875, 0.6220550537109375, 0.2253265380859375, -0.14475250244140625, 0.129638671875, -0.802093505859375, 0.306884765625, 0.12219047546386719, -0.19588470458984375, -0.30391693115234375, -0.038196563720703125, -0.12111663818359375, 0.08619117736816406, -0.09258270263671875, 0.0026702880859375, 0.13272476196289062, 0.050262451171875, -0.15825271606445312, 0.031070709228515625, -0.29625701904296875, 0.33358001708984375, 0.131256103515625, -0.44377899169921875, 0.1356658935546875, -0.08117103576660156, -0.0894927978515625, 0.00897216796875, 0.156158447265625, 0.3079204559326172, 0.43560791015625, -0.002803802490234375, 0.39897918701171875, 0.00722503662109375, -0.07039260864257812, 0.20444869995117188, 0.24007606506347656, -0.54217529296875, -0.2541656494140625, 0.6596755981445312, -0.3264141082763672, 0.1051025390625, -0.007659912109375, 0.019868850708007812, 0.1158599853515625, 0.030246734619140625, 0.023061752319335938, -0.18213653564453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000012.npy"} +{"epoch": 0.018140589569160998, "step": 13, "batch_size": 64, "mean": -0.03572601079940796, "std": 0.25037863850593567, "min": -0.8836708068847656, "p10": -0.31398582458496094, "median": -0.006000518798828125, "p90": 0.21410217285156252, "max": 0.431793212890625, "pos_frac": 0.484375, "sample": [-0.009368896484375, 0.072235107421875, -0.051300048828125, 0.431793212890625, 0.155120849609375, -0.8836708068847656, -0.12087631225585938, -0.07729530334472656, 0.0009002685546875, -0.14844512939453125, -0.045169830322265625, -0.2256145477294922, 0.21506118774414062, 0.130096435546875, -0.14429855346679688, 0.28294944763183594, 0.135650634765625, -0.036945343017578125, 0.1205902099609375, 0.21186447143554688, 0.103271484375, -0.4331016540527344, -0.07257080078125, -0.104095458984375, -0.19224166870117188, -0.3149528503417969, -0.31172943115234375, -0.15598297119140625, -0.07147216796875, 0.1891632080078125, -0.09400558471679688, -0.24491119384765625, 0.3922386169433594, -0.5948600769042969, 0.088226318359375, 0.08018302917480469, -0.07498931884765625, -0.13672637939453125, 0.086883544921875, -0.44898223876953125, 0.018341064453125, -0.00263214111328125, -0.49552154541015625, -0.10474395751953125, -0.7656021118164062, 0.042209625244140625, 0.08184051513671875, -0.16139984130859375, 0.001277923583984375, 0.0571136474609375, 0.3382415771484375, 0.11862945556640625, -0.06416130065917969, 0.11132049560546875, 0.19580078125, -0.22467803955078125, -0.10650634765625, 0.13154983520507812, 0.327484130859375, 0.37368202209472656, 0.068267822265625, 0.094146728515625, -0.09744834899902344, 0.07370185852050781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000013.npy"} +{"epoch": 0.019652305366591082, "step": 14, "batch_size": 64, "mean": 0.07501909136772156, "std": 0.2818731963634491, "min": -0.6002197265625, "p10": -0.20700645446777344, "median": 0.039112091064453125, "p90": 0.39643325805664076, "max": 1.094024658203125, "pos_frac": 0.578125, "sample": [0.047649383544921875, 0.0390472412109375, -0.16443634033203125, -0.14453125, -0.00975799560546875, 0.35968017578125, 0.2397308349609375, 0.501373291015625, -0.18218612670898438, 0.022291183471679688, 0.36275482177734375, 0.6243133544921875, -0.21429824829101562, -0.00357818603515625, 0.17449188232421875, -0.194580078125, 0.0033588409423828125, 0.21430206298828125, 0.1936798095703125, -0.18911361694335938, -0.20717239379882812, -0.485687255859375, 0.16423988342285156, 0.3524665832519531, 0.12744903564453125, -0.08736419677734375, -0.44403076171875, 0.37010955810546875, -0.06006622314453125, 0.47992706298828125, 0.18305587768554688, 1.094024658203125, 0.3200950622558594, 0.40771484375, 0.10571479797363281, 0.1184234619140625, 0.46825408935546875, -0.6002197265625, 0.21536827087402344, -0.2066192626953125, -0.231170654296875, 0.4329719543457031, 0.3670158386230469, -0.0517578125, 0.1988506317138672, -0.1412830352783203, -0.05083465576171875, 0.03917694091796875, 0.0013065338134765625, 0.2308063507080078, -0.10489273071289062, 0.27569580078125, 0.126190185546875, 0.29750823974609375, 0.012197494506835938, -0.3396148681640625, -0.0673065185546875, -0.15405654907226562, -0.13361167907714844, -0.053218841552734375, -0.140838623046875, -0.02774810791015625, 0.039813995361328125, 0.2801475524902344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000014.npy"} +{"epoch": 0.021164021164021163, "step": 15, "batch_size": 64, "mean": -0.025244086980819702, "std": 0.33543509244918823, "min": -1.102569580078125, "p10": -0.390802001953125, "median": -0.02589130401611328, "p90": 0.38189659118652347, "max": 0.7826309204101562, "pos_frac": 0.4375, "sample": [0.4103221893310547, 0.3875274658203125, -0.21797943115234375, -0.000835418701171875, -0.129150390625, -0.01384735107421875, -0.1754150390625, -0.5448722839355469, -0.210052490234375, -0.3795623779296875, 0.4204368591308594, -0.2730140686035156, -0.393402099609375, -0.017826080322265625, 0.3514862060546875, -0.03330039978027344, 0.28021240234375, 0.27516937255859375, 0.16625595092773438, -0.36675262451171875, 0.6758880615234375, -0.22868728637695312, -0.047168731689453125, -0.2994117736816406, 0.10391616821289062, 0.3662261962890625, 0.56158447265625, -0.46478271484375, 0.079498291015625, -0.293792724609375, 0.17881011962890625, -0.30639076232910156, -0.29798126220703125, -0.384735107421875, 0.3594551086425781, -0.06824493408203125, -0.17530059814453125, -0.459716796875, 0.03427314758300781, -0.12119293212890625, 0.28363037109375, 0.30413818359375, -0.245330810546875, -0.1381683349609375, 0.1236724853515625, -1.102569580078125, -0.0630645751953125, -0.17899703979492188, 0.2035808563232422, -0.3227386474609375, -0.69049072265625, 0.10723114013671875, 0.08616447448730469, 0.386322021484375, -0.07585716247558594, 0.3715705871582031, 0.066375732421875, 0.190399169921875, 0.12050437927246094, -0.08502769470214844, 0.7826309204101562, -0.4918212890625, -0.018482208251953125, 0.023059844970703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000015.npy"} +{"epoch": 0.022675736961451247, "step": 16, "batch_size": 64, "mean": 0.014522776007652283, "std": 0.27756667137145996, "min": -0.79071044921875, "p10": -0.35687904357910155, "median": 0.005279541015625, "p90": 0.34666080474853517, "max": 0.8043060302734375, "pos_frac": 0.515625, "sample": [0.3303413391113281, 0.2130451202392578, -0.4725341796875, 0.35089111328125, 0.4099884033203125, -0.09922218322753906, 0.34624290466308594, -0.4020538330078125, 0.8043060302734375, -0.08839607238769531, -0.16461563110351562, 0.3634796142578125, 0.11883544921875, -0.11966705322265625, 0.2437744140625, -0.08173370361328125, 0.34683990478515625, -0.11546897888183594, 0.010284423828125, -0.35284423828125, -0.0102386474609375, -0.21628189086914062, 0.235443115234375, 0.3019752502441406, 0.1097259521484375, -0.79071044921875, 0.3145942687988281, -0.1458740234375, 0.000274658203125, 0.3092193603515625, 0.19144439697265625, 0.16875457763671875, 0.057464599609375, -0.12314987182617188, 0.03813934326171875, -0.08213043212890625, 0.55377197265625, -0.3218994140625, -0.031345367431640625, -0.3865928649902344, 0.11354255676269531, 0.11085700988769531, -0.08838272094726562, 0.33267974853515625, 0.39502716064453125, 0.2778358459472656, 0.14057540893554688, -0.3419342041015625, -0.08796882629394531, -0.15464401245117188, 0.044979095458984375, -0.06342315673828125, -0.11080265045166016, -0.3586082458496094, -0.01596832275390625, -0.04419898986816406, -0.28658294677734375, -0.4285430908203125, -0.362762451171875, 0.01587677001953125, 0.06024742126464844, 0.0128936767578125, 0.0802459716796875, -0.12556076049804688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000016.npy"} +{"epoch": 0.02418745275888133, "step": 17, "batch_size": 64, "mean": -0.00017789006233215332, "std": 0.32830777764320374, "min": -1.0072174072265625, "p10": -0.39383773803710936, "median": 0.013564109802246094, "p90": 0.31275634765625, "max": 0.938934326171875, "pos_frac": 0.546875, "sample": [0.1767292022705078, -0.0293426513671875, 0.0023479461669921875, 0.2825603485107422, 0.022308349609375, 0.5171051025390625, -0.827880859375, 0.2003765106201172, 0.2349090576171875, -0.38954925537109375, -0.0939178466796875, -0.4197998046875, -0.3956756591796875, 0.004405975341796875, -0.21886062622070312, -0.00067901611328125, -0.18669891357421875, -0.21538162231445312, 0.03765106201171875, -0.582000732421875, 0.2113494873046875, 0.020475387573242188, -0.725860595703125, 0.07624053955078125, -0.5312042236328125, -0.10511589050292969, -0.25555419921875, -0.26694297790527344, 0.09749412536621094, -0.07489776611328125, -0.01761627197265625, -0.005523681640625, 0.555755615234375, 0.096710205078125, 0.47173309326171875, 0.938934326171875, 0.31223297119140625, 0.4319305419921875, -0.15093231201171875, 0.08245849609375, -0.2741241455078125, -0.10114288330078125, 0.2340373992919922, 0.31298065185546875, 0.06300735473632812, 0.0596923828125, -1.0072174072265625, 0.3083343505859375, 0.2374744415283203, 0.00665283203125, -0.1748046875, -0.0379791259765625, -0.07616424560546875, 0.24373817443847656, 0.2972869873046875, 0.044036865234375, 0.12048149108886719, -0.13370323181152344, 0.4615325927734375, 0.09483718872070312, 0.26900482177734375, 0.125518798828125, -0.24477005004882812, -0.12036895751953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000017.npy"} +{"epoch": 0.025699168556311415, "step": 18, "batch_size": 64, "mean": -0.029924869537353516, "std": 0.3162975013256073, "min": -0.9867095947265625, "p10": -0.49208278656005855, "median": -0.02612590789794922, "p90": 0.3776012420654298, "max": 0.7068710327148438, "pos_frac": 0.46875, "sample": [-0.10485458374023438, 0.06446456909179688, -0.1224212646484375, -0.0189208984375, 0.1858386993408203, -0.4470710754394531, -0.16263198852539062, 0.2872467041015625, 0.6223907470703125, 0.295989990234375, 0.2085247039794922, -0.560089111328125, -0.02423095703125, -0.19495010375976562, -0.5241012573242188, -0.0496978759765625, 0.2613792419433594, -0.24146270751953125, 0.40395355224609375, -0.9867095947265625, 0.027973175048828125, 0.7068710327148438, -0.27086639404296875, -0.05712127685546875, 0.1607379913330078, 0.07867622375488281, 0.0655059814453125, 0.11154556274414062, -0.2760887145996094, 0.1578807830810547, 0.03920936584472656, 0.28839874267578125, 0.09574127197265625, -0.5738372802734375, -0.20902061462402344, -0.0411834716796875, 0.1482982635498047, -0.10951042175292969, 0.0157318115234375, -0.0471038818359375, 0.10967254638671875, 0.35101318359375, -0.38359832763671875, -0.389312744140625, -0.603759765625, 0.4593944549560547, -0.09215927124023438, -0.05731773376464844, -0.3357887268066406, -0.587188720703125, -0.23272323608398438, -0.028020858764648438, -0.5113735198974609, -0.1864604949951172, -0.07073974609375, 0.05280303955078125, -0.07902717590332031, -0.09754753112792969, 0.42105865478515625, 0.18223190307617188, 0.01723480224609375, 0.08362579345703125, 0.3889961242675781, 0.4693107604980469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000018.npy"} +{"epoch": 0.027210884353741496, "step": 19, "batch_size": 64, "mean": 0.03535632789134979, "std": 0.33726075291633606, "min": -0.7266845703125, "p10": -0.34807014465332026, "median": 0.03446006774902344, "p90": 0.4266769409179688, "max": 1.2894134521484375, "pos_frac": 0.578125, "sample": [0.322357177734375, -0.2752532958984375, -0.31630706787109375, 0.11676025390625, 0.02564239501953125, 0.6161098480224609, -0.06900787353515625, -0.14359283447265625, -0.4620819091796875, 0.14816665649414062, 0.07812118530273438, -0.2220458984375, 0.43167877197265625, 0.10226821899414062, -0.5655441284179688, 0.08852386474609375, 0.032054901123046875, -0.1696758270263672, 0.285675048828125, 0.2876853942871094, 0.110198974609375, -0.50250244140625, -0.14296722412109375, -0.7266845703125, 0.4337615966796875, 0.25789642333984375, -0.36395263671875, 0.0701141357421875, -0.04570770263671875, 0.036865234375, 1.2894134521484375, -0.040771484375, 0.0173187255859375, 0.5774002075195312, -0.72509765625, 0.071533203125, 0.08498382568359375, -0.3616828918457031, 0.401123046875, -0.07410812377929688, 0.2795295715332031, -0.2131938934326172, 0.225128173828125, 0.014659881591796875, 0.0044403076171875, 0.3906879425048828, -0.1732177734375, 0.676116943359375, -0.17196083068847656, 0.25096893310546875, 0.06786346435546875, -0.10268783569335938, 0.24322509765625, -0.09261035919189453, 0.41619873046875, 0.15937042236328125, -0.09806060791015625, -0.2959022521972656, -0.2205791473388672, 0.11444854736328125, -0.208465576171875, -0.1709747314453125, 0.4311676025390625, 0.0579833984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000019.npy"} +{"epoch": 0.02872260015117158, "step": 20, "batch_size": 64, "mean": 0.03790910542011261, "std": 0.2807377874851227, "min": -0.786865234375, "p10": -0.2829437255859375, "median": 0.0542912483215332, "p90": 0.39161529541015644, "max": 0.6380691528320312, "pos_frac": 0.625, "sample": [0.02797698974609375, 0.312530517578125, -0.15996551513671875, -0.26110076904296875, 0.43627166748046875, -0.05681610107421875, -0.07502365112304688, -0.786865234375, 0.41199493408203125, 0.09199905395507812, 0.12740707397460938, 0.059795379638671875, -0.135711669921875, 0.16592025756835938, -0.047943115234375, -0.18460464477539062, 0.0071868896484375, 0.123321533203125, 0.5790252685546875, -0.38300323486328125, 0.09811782836914062, -0.12986183166503906, -0.03929901123046875, 0.34169769287109375, 0.10203170776367188, 0.6380691528320312, 0.20867347717285156, 0.038089752197265625, 0.19045257568359375, 0.054131507873535156, -0.52044677734375, -0.33420562744140625, -0.16338348388671875, 0.04557228088378906, -0.2739715576171875, 0.17352294921875, 0.2019176483154297, 0.15384674072265625, -0.24535369873046875, 0.41458892822265625, -0.042232513427734375, 0.10280609130859375, -0.17659759521484375, -0.2867889404296875, 0.34406280517578125, 0.5059051513671875, 0.0052337646484375, 0.035736083984375, 0.2944450378417969, 0.1133270263671875, -0.6404953002929688, 0.1932525634765625, 0.24578857421875, 0.6337432861328125, 0.05445098876953125, 0.08791351318359375, 0.014133453369140625, 0.1380462646484375, 0.2318267822265625, 0.07785797119140625, -0.1937541961669922, -0.007785797119140625, -0.5003185272216797, -0.010959625244140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000020.npy"} +{"epoch": 0.030234315948601664, "step": 21, "batch_size": 64, "mean": 0.0060374438762664795, "std": 0.3424542546272278, "min": -0.6829986572265625, "p10": -0.36134624481201166, "median": -0.01897907257080078, "p90": 0.3568359375000001, "max": 1.0990066528320312, "pos_frac": 0.46875, "sample": [0.05404853820800781, 0.11016845703125, -0.0076084136962890625, -0.2637939453125, 0.95526123046875, 0.2511444091796875, 0.048465728759765625, -0.049602508544921875, 0.08031272888183594, -0.3273334503173828, 0.1503448486328125, -0.3076324462890625, -0.4298248291015625, 0.329193115234375, 0.20818328857421875, 0.400238037109375, 0.2432708740234375, 0.9128952026367188, -0.5747108459472656, -0.1670093536376953, -0.20159149169921875, 0.178314208984375, -0.021108627319335938, -0.42110443115234375, 0.0354156494140625, -0.59564208984375, -0.22836875915527344, -0.27462005615234375, 1.0990066528320312, 0.07854461669921875, 0.368682861328125, 0.22902488708496094, 0.218017578125, -0.20740509033203125, -0.6829986572265625, 0.7218761444091797, 0.4802093505859375, -0.06049346923828125, -0.19500732421875, -0.15860939025878906, -0.13332748413085938, -0.2991790771484375, 0.2603759765625, 0.1908130645751953, 0.2117767333984375, -0.2745475769042969, -0.47289276123046875, 0.055149078369140625, -0.37592315673828125, -0.08670806884765625, 0.15091323852539062, -0.01983642578125, -0.08846473693847656, 0.1182403564453125, -0.018121719360351562, -0.20904922485351562, -0.020198822021484375, -0.07177925109863281, -0.09491920471191406, 0.0289764404296875, -0.21279144287109375, 0.029399871826171875, 0.0452728271484375, -0.3049354553222656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000021.npy"} +{"epoch": 0.031746031746031744, "step": 22, "batch_size": 64, "mean": -0.008793413639068604, "std": 0.2363279163837433, "min": -0.718109130859375, "p10": -0.32538681030273436, "median": -0.006674766540527344, "p90": 0.2584787368774414, "max": 0.4165191650390625, "pos_frac": 0.484375, "sample": [-0.20245361328125, -0.38423919677734375, -0.27069854736328125, 0.211273193359375, -0.06422615051269531, -0.31415557861328125, 0.08753395080566406, -0.4756927490234375, -0.008037567138671875, -0.047710418701171875, 0.17957687377929688, -0.1256866455078125, -0.03354835510253906, 0.02873992919921875, 0.09412765502929688, 0.1564788818359375, -0.17899703979492188, -0.19907760620117188, 0.4028472900390625, -0.30939483642578125, 0.20600128173828125, 0.373260498046875, 0.21506500244140625, 0.08352279663085938, 0.014720916748046875, -0.718109130859375, -0.07494163513183594, -0.21507644653320312, -0.24144554138183594, -0.037227630615234375, -0.4150543212890625, -0.4220428466796875, -0.10223388671875, -0.1073150634765625, 0.146240234375, -0.014369964599609375, 0.10858154296875, 0.21813201904296875, -0.1676483154296875, -0.34461212158203125, 0.25848960876464844, -0.1790008544921875, -0.120452880859375, 0.235565185546875, 0.13612747192382812, 0.07733154296875, 0.195404052734375, -0.3302001953125, 0.17699432373046875, -0.030364990234375, 0.15191268920898438, 0.4165191650390625, 0.3849067687988281, -0.12276077270507812, 0.24915313720703125, 0.1447772979736328, -0.09171295166015625, -0.0053119659423828125, 0.28881072998046875, 0.2631072998046875, 0.03243255615234375, -0.08470916748046875, 0.07964324951171875, 0.258453369140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000022.npy"} +{"epoch": 0.03325774754346183, "step": 23, "batch_size": 64, "mean": -0.007578998804092407, "std": 0.3810504078865051, "min": -1.0094680786132812, "p10": -0.4279975891113281, "median": -0.037944793701171875, "p90": 0.44253120422363285, "max": 1.1433258056640625, "pos_frac": 0.453125, "sample": [-1.0094680786132812, -0.095855712890625, 0.40540313720703125, -0.6990966796875, 0.05535697937011719, -0.04856109619140625, -0.052825927734375, 0.155792236328125, -0.16271209716796875, 0.5859222412109375, 0.19598388671875, -0.4098930358886719, 0.4399223327636719, 0.07558059692382812, 0.2585735321044922, -0.210601806640625, -0.366424560546875, 0.20529556274414062, -0.57183837890625, 0.27426910400390625, -0.07674789428710938, 0.13660430908203125, 0.08191108703613281, -0.3533287048339844, -0.09498214721679688, 0.3920135498046875, -0.23528289794921875, -0.1036224365234375, 0.7171249389648438, -0.34635162353515625, -0.904998779296875, 0.22110748291015625, 0.540740966796875, 0.30313682556152344, 0.6433525085449219, -0.16967010498046875, -0.045013427734375, 1.1433258056640625, 0.000179290771484375, -0.45420074462890625, -0.15472412109375, -0.02857208251953125, 0.12291336059570312, -0.38118743896484375, -0.03087615966796875, 0.56805419921875, -0.27700042724609375, 0.18968582153320312, -0.18114471435546875, -0.7530975341796875, -0.32000732421875, -0.08430099487304688, -0.09166526794433594, 0.06213951110839844, -0.08910751342773438, -0.0027637481689453125, 0.20125579833984375, -0.0453643798828125, 0.15592193603515625, 0.0756378173828125, -0.1059417724609375, -0.4357566833496094, 0.2570762634277344, 0.4436492919921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000023.npy"} +{"epoch": 0.03476946334089191, "step": 24, "batch_size": 64, "mean": -0.02340218424797058, "std": 0.2508276104927063, "min": -0.5865478515625, "p10": -0.387322998046875, "median": -0.057159423828125, "p90": 0.3110240936279297, "max": 0.50372314453125, "pos_frac": 0.421875, "sample": [-0.15773773193359375, -0.24045562744140625, 0.0333709716796875, -0.00371551513671875, 0.4256591796875, 0.3203849792480469, 0.17229080200195312, 0.33750152587890625, 0.3024864196777344, -0.384246826171875, 0.13632583618164062, -0.15685653686523438, -0.1542644500732422, -0.088653564453125, 0.24542999267578125, 0.50372314453125, -0.20401763916015625, -0.3971748352050781, -0.07799148559570312, -0.06888389587402344, -0.388641357421875, 0.4253692626953125, -0.13848114013671875, -0.1619110107421875, 0.47579193115234375, -0.2851448059082031, -0.09174346923828125, -0.4899330139160156, -0.5865478515625, -0.3169708251953125, 0.004314422607421875, -0.09958457946777344, -0.45793914794921875, 0.071533203125, 0.03073883056640625, -0.1818389892578125, -0.056610107421875, 0.03726387023925781, -0.17636871337890625, 0.30426025390625, 0.09723663330078125, 0.09370040893554688, 0.19197845458984375, 0.3139228820800781, -0.11822509765625, 0.303802490234375, -0.0173492431640625, -0.0754241943359375, 0.0088958740234375, -0.097808837890625, -0.129547119140625, -0.057708740234375, -0.045562744140625, -0.23488998413085938, -0.1497344970703125, 0.2812690734863281, -0.013156890869140625, 0.121734619140625, 0.11800384521484375, -0.4216461181640625, 0.25887298583984375, -0.42929840087890625, 0.151123046875, -0.10865974426269531], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000024.npy"} +{"epoch": 0.036281179138321996, "step": 25, "batch_size": 64, "mean": 0.07055890560150146, "std": 0.2794402837753296, "min": -0.911468505859375, "p10": -0.20666961669921874, "median": 0.05002593994140625, "p90": 0.4726039886474611, "max": 0.788848876953125, "pos_frac": 0.5625, "sample": [0.2861289978027344, -0.28765869140625, -0.18532180786132812, 0.14563369750976562, -0.0103759765625, -0.20246124267578125, 0.03520965576171875, -0.047313690185546875, 0.5036163330078125, 0.21780776977539062, 0.137451171875, 0.024005889892578125, -0.20847320556640625, -0.1981658935546875, 0.615509033203125, -0.003192901611328125, -0.2328643798828125, 0.07083511352539062, 0.1252899169921875, 0.4326324462890625, -0.1515045166015625, -0.08826065063476562, -0.3448028564453125, 0.788848876953125, 0.09058761596679688, -0.08071517944335938, -0.054439544677734375, 0.4189453125, 0.13567733764648438, 0.033527374267578125, 0.23299598693847656, 0.2511310577392578, 0.5836257934570312, -0.08975601196289062, 0.5211257934570312, -0.3177165985107422, 0.084136962890625, -0.031314849853515625, -0.018209457397460938, -0.2303314208984375, -0.911468505859375, -0.16933822631835938, 0.23065185546875, 0.2015552520751953, 0.30599212646484375, -0.09110260009765625, 0.4897346496582031, 0.08960342407226562, -0.156341552734375, 0.15921783447265625, 0.0455780029296875, -0.08443069458007812, -0.116241455078125, 0.12225723266601562, 0.10991668701171875, 0.1416473388671875, 0.35623931884765625, 0.17415618896484375, 0.7208404541015625, -0.071533203125, -0.0563201904296875, 0.054473876953125, 0.08558082580566406, -0.066741943359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000025.npy"} +{"epoch": 0.03779289493575208, "step": 26, "batch_size": 64, "mean": 0.011123299598693848, "std": 0.35256409645080566, "min": -0.7085418701171875, "p10": -0.4167091369628906, "median": 0.01899433135986328, "p90": 0.39461708068847673, "max": 1.37481689453125, "pos_frac": 0.53125, "sample": [0.14672470092773438, -0.5027618408203125, -0.345001220703125, 0.09357452392578125, 0.5209846496582031, 0.2609825134277344, -0.5209808349609375, 0.02700042724609375, -0.15219497680664062, 0.4110679626464844, 0.11687088012695312, 1.37481689453125, 0.0335540771484375, -0.5354499816894531, -0.7039031982421875, 0.00971221923828125, 0.17821121215820312, 0.26252174377441406, 0.47435760498046875, -0.13439369201660156, -0.165008544921875, -0.17010116577148438, -0.01093292236328125, -0.44744110107421875, 0.09991455078125, -0.31299591064453125, 0.18484115600585938, -0.3156776428222656, 0.32874107360839844, -0.06982421875, 0.06306266784667969, 0.8013439178466797, 0.09323883056640625, 0.2699470520019531, -0.30049896240234375, -0.7085418701171875, 0.23991775512695312, 0.13100624084472656, 0.055572509765625, 0.356231689453125, -0.10249137878417969, -0.025585174560546875, -0.2704429626464844, -0.08318328857421875, 0.561981201171875, -0.33220672607421875, -0.2372283935546875, 0.22929763793945312, 0.56329345703125, 0.06351852416992188, 0.10527801513671875, -0.5511398315429688, 0.2969169616699219, 0.06877899169921875, -0.24410629272460938, -0.0010986328125, -0.1123504638671875, 0.010988235473632812, -0.11447906494140625, 0.10433197021484375, -0.25646209716796875, 0.16854095458984375, -0.19953346252441406, -0.06921577453613281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000026.npy"} +{"epoch": 0.039304610733182165, "step": 27, "batch_size": 64, "mean": -0.0189303457736969, "std": 0.3279438316822052, "min": -1.0904998779296875, "p10": -0.3490425109863281, "median": 0.024160385131835938, "p90": 0.28136024475097654, "max": 1.0778656005859375, "pos_frac": 0.546875, "sample": [0.20100021362304688, 0.00054931640625, 0.12639236450195312, 0.06844139099121094, 0.2856731414794922, 0.1858367919921875, -0.17422103881835938, -0.6455612182617188, 0.21474456787109375, -0.7075653076171875, -0.1542816162109375, -1.0904998779296875, 0.09161567687988281, 0.18839263916015625, -0.3030986785888672, -0.34583282470703125, -0.12763404846191406, -0.3504180908203125, -0.9043350219726562, -0.18268585205078125, 0.089996337890625, 0.27618408203125, -0.0780487060546875, 0.222930908203125, 0.6041107177734375, -0.1974334716796875, 0.03481292724609375, 0.028835296630859375, 0.3725738525390625, -0.13118362426757812, -0.008831024169921875, 0.07445526123046875, 0.18490028381347656, 0.22866058349609375, 0.2815055847167969, -0.3631591796875, 0.14532089233398438, 1.0778656005859375, 0.0787506103515625, 0.12880325317382812, 0.3161773681640625, -0.32170867919921875, -0.22781753540039062, -0.15520858764648438, -0.18601608276367188, -0.0186309814453125, 0.2810211181640625, 0.3327178955078125, 0.21038436889648438, 0.034542083740234375, 0.15940093994140625, -0.08295440673828125, -0.17346763610839844, -0.3652496337890625, 0.24798011779785156, -0.18201255798339844, 0.0194854736328125, -0.20595359802246094, -0.0296630859375, -0.21929359436035156, 0.034145355224609375, -0.30686187744140625, 0.004680633544921875, 0.19519805908203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000027.npy"} +{"epoch": 0.04081632653061224, "step": 28, "batch_size": 64, "mean": 0.019986748695373535, "std": 0.3330937325954437, "min": -0.787139892578125, "p10": -0.3903282165527343, "median": 0.030664443969726562, "p90": 0.5282810211181641, "max": 0.756988525390625, "pos_frac": 0.53125, "sample": [0.4106903076171875, -0.25146484375, 0.671630859375, 0.5287857055664062, -0.0552825927734375, 0.08203506469726562, -0.439727783203125, 0.2113494873046875, 0.1721019744873047, 0.695098876953125, -0.2703056335449219, -0.08956146240234375, -0.24199676513671875, -0.4153289794921875, 0.042266845703125, -0.0599212646484375, -0.2966899871826172, -0.20920181274414062, -0.0040130615234375, 0.15459060668945312, -0.04862213134765625, 0.2000732421875, 0.03603363037109375, 0.00629425048828125, -0.11628150939941406, -0.014261245727539062, 0.14516830444335938, -0.17771148681640625, -0.7072601318359375, 0.10340118408203125, -0.28707122802734375, -0.17817306518554688, 0.5271034240722656, 0.6894378662109375, 0.0763397216796875, 0.5358543395996094, 0.031330108642578125, -0.2614479064941406, 0.2558708190917969, -0.787139892578125, 0.09545135498046875, -0.5573501586914062, -0.16859817504882812, 0.2255725860595703, 0.050975799560546875, 0.05502891540527344, 0.19031715393066406, -0.5108489990234375, 0.12095260620117188, -0.09172439575195312, 0.5674247741699219, 0.45294952392578125, 0.20831298828125, 0.28035736083984375, 0.756988525390625, -0.2075347900390625, -0.1527996063232422, 0.109405517578125, -0.4569244384765625, -0.106475830078125, 0.029998779296875, 0.16214561462402344, -0.10647392272949219, -0.33199310302734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000028.npy"} +{"epoch": 0.042328042328042326, "step": 29, "batch_size": 64, "mean": 0.053196460008621216, "std": 0.2655872106552124, "min": -0.662994384765625, "p10": -0.2833808898925781, "median": 0.022658348083496094, "p90": 0.445524024963379, "max": 0.68359375, "pos_frac": 0.546875, "sample": [0.17522048950195312, 0.1154022216796875, 0.45682334899902344, -0.394989013671875, 0.68359375, -0.22421646118164062, -0.09003448486328125, -0.08066940307617188, -0.662994384765625, -0.214935302734375, -0.04538917541503906, 0.051136016845703125, 0.216583251953125, -0.033649444580078125, 0.23685073852539062, 0.4868316650390625, -0.1591644287109375, -0.28676605224609375, -0.275482177734375, 0.242431640625, 0.016656875610351562, -0.3321990966796875, 0.1023406982421875, -0.29899024963378906, -0.11011505126953125, 0.26374053955078125, -0.3291168212890625, 0.09038162231445312, 0.355316162109375, -0.042972564697265625, -0.058574676513671875, -0.07354736328125, -0.2234649658203125, 0.10258293151855469, 0.419158935546875, 0.08634757995605469, -0.05442047119140625, 0.5138320922851562, 0.14080238342285156, 0.54486083984375, -0.21746444702148438, -0.13344955444335938, -0.133636474609375, 0.02817535400390625, -0.06694412231445312, 0.089385986328125, 0.15380859375, 0.122589111328125, 0.004974365234375, 0.5285015106201172, -0.10443115234375, 0.33863067626953125, 0.49042510986328125, 0.017141342163085938, -0.35153961181640625, 0.14884185791015625, 0.30841064453125, -0.07698822021484375, -0.001617431640625, 0.1385040283203125, 0.1108551025390625, -0.0041961669921875, 0.3445930480957031, 0.36080169677734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000029.npy"} +{"epoch": 0.04383975812547241, "step": 30, "batch_size": 64, "mean": -0.004268676042556763, "std": 0.3223721981048584, "min": -0.7320480346679688, "p10": -0.3191535949707031, "median": 0.005652427673339844, "p90": 0.2939197540283203, "max": 1.2720489501953125, "pos_frac": 0.515625, "sample": [-0.1594696044921875, -0.02584075927734375, -0.18072509765625, -0.27635955810546875, -0.06690216064453125, 0.10559463500976562, 0.10801124572753906, 0.12754058837890625, -0.27671051025390625, -0.6468353271484375, -0.31964111328125, 0.12728118896484375, -0.1500091552734375, 0.385406494140625, 0.1713409423828125, -0.241424560546875, -0.16931724548339844, 1.2720489501953125, -0.01813507080078125, 0.16292572021484375, 0.00960540771484375, -0.49410247802734375, -0.191925048828125, 0.04605674743652344, -0.15313720703125, -0.005279541015625, -0.12546539306640625, -0.5375518798828125, 0.1815338134765625, 0.376556396484375, 0.7281646728515625, 0.0016994476318359375, -0.14842987060546875, 0.2965850830078125, 0.062347412109375, 0.21184539794921875, -0.7320480346679688, 0.09018898010253906, 0.21001434326171875, -0.30910491943359375, 0.03467369079589844, 0.04027557373046875, 0.6177978515625, -0.17067718505859375, -0.4109153747558594, 0.15280914306640625, -0.31801605224609375, 0.050868988037109375, 0.166412353515625, -0.2542724609375, -0.107025146484375, 0.238037109375, -0.10661506652832031, 0.09665679931640625, 0.39752197265625, -0.020273208618164062, -0.1110382080078125, 0.037921905517578125, -0.680267333984375, 0.018247604370117188, 0.2877006530761719, 0.24697113037109375, -0.18297576904296875, 0.25665283203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000030.npy"} +{"epoch": 0.045351473922902494, "step": 31, "batch_size": 64, "mean": 0.03101155161857605, "std": 0.3591644763946533, "min": -1.1030426025390625, "p10": -0.31824340820312497, "median": -0.0027713775634765625, "p90": 0.47428913116455085, "max": 1.031707763671875, "pos_frac": 0.484375, "sample": [0.5080795288085938, 0.30690765380859375, -0.1868896484375, 0.19536399841308594, -0.2740764617919922, 0.0840911865234375, -0.1970062255859375, 0.5068435668945312, 0.17830657958984375, -0.30663299560546875, -0.08505439758300781, 0.09149169921875, 0.1884765625, 0.0089111328125, 0.3377532958984375, -0.28155517578125, 0.27374267578125, 0.0294342041015625, -0.0037994384765625, -0.20790863037109375, 0.975067138671875, 0.25625038146972656, -0.06630325317382812, -0.2348194122314453, 0.48343467712402344, 0.365753173828125, 0.7744293212890625, 0.20735931396484375, -0.28075408935546875, -0.342071533203125, -0.1074066162109375, -0.18613052368164062, -0.32321929931640625, -0.267791748046875, -0.127166748046875, 0.013736724853515625, -0.10543060302734375, -0.17757415771484375, -0.37268638610839844, -0.15192604064941406, -0.0142669677734375, -0.5265426635742188, -0.2958717346191406, 0.686309814453125, 1.031707763671875, 0.009616851806640625, -0.001743316650390625, -0.4695911407470703, 0.20093154907226562, -0.05979156494140625, -1.1030426025390625, 0.2871360778808594, 0.327606201171875, 0.11095237731933594, 0.14111328125, -0.03113555908203125, 0.31441497802734375, 0.117706298828125, -0.16331863403320312, -0.17276763916015625, 0.45294952392578125, 0.30153465270996094, -0.24962615966796875, -0.4087715148925781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000031.npy"} +{"epoch": 0.04686318972033258, "step": 32, "batch_size": 64, "mean": 0.045270055532455444, "std": 0.29323917627334595, "min": -0.6567420959472656, "p10": -0.30681076049804684, "median": 0.04212760925292969, "p90": 0.42455368041992186, "max": 0.6558837890625, "pos_frac": 0.546875, "sample": [-0.3844146728515625, -0.5873794555664062, -0.06158447265625, -0.0286407470703125, 0.0850372314453125, 0.16766357421875, 0.424560546875, -0.5722503662109375, 0.6224365234375, -0.08213043212890625, 0.175445556640625, 0.6331596374511719, 0.23779678344726562, 0.11688232421875, 0.16582870483398438, -0.2548065185546875, 0.3901824951171875, 0.16004562377929688, -0.0195465087890625, -0.09986495971679688, -0.08317375183105469, 0.0027332305908203125, 0.3209724426269531, 0.492340087890625, 0.3695335388183594, -0.11112403869628906, 0.3724822998046875, 0.3950653076171875, 0.53564453125, 0.23091888427734375, 0.045345306396484375, 0.42453765869140625, -0.09797286987304688, 0.39694976806640625, 0.0746612548828125, -0.05020904541015625, -0.084564208984375, -0.4031105041503906, -0.1487274169921875, 0.43761444091796875, 0.05621147155761719, 0.08558273315429688, -0.10689544677734375, -0.2605743408203125, -0.3132171630859375, 0.2816619873046875, 0.05192756652832031, -0.6567420959472656, -0.29186248779296875, -0.03887939453125, -0.1827850341796875, 0.11912918090820312, -0.26760101318359375, -0.041172027587890625, -0.051578521728515625, -0.14762115478515625, -0.316986083984375, 0.6558837890625, 0.08549880981445312, 0.14728164672851562, 0.00389862060546875, -0.20841217041015625, 0.038909912109375, 0.0472869873046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000032.npy"} +{"epoch": 0.04837490551776266, "step": 33, "batch_size": 64, "mean": 0.042654380202293396, "std": 0.3292621076107025, "min": -0.785919189453125, "p10": -0.3036689758300781, "median": 0.02935504913330078, "p90": 0.5340984344482422, "max": 0.7345123291015625, "pos_frac": 0.546875, "sample": [-0.6283721923828125, -0.785919189453125, 0.21262359619140625, -0.6758804321289062, 0.1745777130126953, 0.11887359619140625, -0.30379486083984375, 0.04739570617675781, 0.6572036743164062, 0.6926040649414062, -0.2488250732421875, 0.44248390197753906, -0.03389549255371094, -0.22769927978515625, 0.63861083984375, -0.32619476318359375, -0.0782928466796875, 0.1417694091796875, -0.0254058837890625, 0.103179931640625, 0.01387786865234375, 0.5625152587890625, 0.12429428100585938, -0.0022792816162109375, -0.7022476196289062, 0.1621551513671875, -0.1196441650390625, 0.2663612365722656, 0.5261383056640625, 0.1842193603515625, 0.1226959228515625, -0.03225517272949219, 0.26294708251953125, -0.11348342895507812, -0.0706329345703125, 0.04483222961425781, 0.12730789184570312, -0.20397186279296875, 0.001079559326171875, -0.1387653350830078, -0.282684326171875, -0.19268798828125, 0.012104034423828125, -0.07453155517578125, -0.21944046020507812, -0.03995513916015625, 0.21671295166015625, 0.04788398742675781, 0.1774892807006836, -0.11783981323242188, -0.303375244140625, 0.3145408630371094, -0.00043487548828125, 0.37328338623046875, -0.06344985961914062, 0.1914215087890625, 0.14403915405273438, 0.23580169677734375, -0.475372314453125, 0.5948944091796875, -0.18851852416992188, 0.5375099182128906, 0.19779014587402344, 0.7345123291015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000033.npy"} +{"epoch": 0.049886621315192746, "step": 34, "batch_size": 64, "mean": 0.04391142725944519, "std": 0.28214555978775024, "min": -0.7831802368164062, "p10": -0.29800319671630854, "median": 0.04517555236816406, "p90": 0.3175662994384766, "max": 1.127288818359375, "pos_frac": 0.546875, "sample": [-0.03550148010253906, -0.02664947509765625, 0.40985107421875, 0.16603469848632812, 0.258026123046875, -0.16071319580078125, -0.08225059509277344, -0.22408676147460938, 0.042514801025390625, 0.30083465576171875, 0.2649421691894531, -0.053558349609375, -0.3200645446777344, 0.0854339599609375, -0.0106964111328125, -0.213134765625, 0.21700286865234375, 0.2915992736816406, 0.32828330993652344, 0.0755462646484375, 0.14997100830078125, 0.03984260559082031, -0.037567138671875, -0.43201446533203125, 0.16181182861328125, -0.0185394287109375, -0.03436279296875, 0.47592926025390625, 0.17923736572265625, -0.24652671813964844, 0.14391326904296875, 0.0587310791015625, -0.10215187072753906, 0.07406044006347656, 0.3177528381347656, 0.1324005126953125, 0.0478363037109375, 0.2282867431640625, 0.31713104248046875, -0.00449371337890625, -0.01615142822265625, 0.02497100830078125, -0.08492469787597656, 0.12439346313476562, 0.6196441650390625, 1.127288818359375, -0.07669830322265625, 0.12628555297851562, 0.1575469970703125, 0.23088645935058594, 0.13037490844726562, -0.01080322265625, 0.106201171875, -0.3786773681640625, -0.5720024108886719, -0.3805274963378906, 0.1629180908203125, -0.1864490509033203, -0.09815216064453125, -0.012083053588867188, 0.3360137939453125, -0.041778564453125, -0.7831802368164062, -0.4594268798828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000034.npy"} +{"epoch": 0.05139833711262283, "step": 35, "batch_size": 64, "mean": 0.0079115629196167, "std": 0.32617810368537903, "min": -1.011016845703125, "p10": -0.39235115051269526, "median": -0.004673004150390625, "p90": 0.3125791549682617, "max": 0.97381591796875, "pos_frac": 0.484375, "sample": [0.8196792602539062, -0.21397018432617188, 0.3138751983642578, -0.00124359130859375, -0.2596321105957031, 0.97381591796875, -0.46903228759765625, -0.1673450469970703, -0.07749176025390625, -0.22163009643554688, 0.25433349609375, 0.06894493103027344, 0.17850875854492188, 0.1549091339111328, -0.13181686401367188, 0.13674163818359375, 0.6362152099609375, -0.10385894775390625, -0.2869110107421875, 0.009975433349609375, 0.2466278076171875, 0.31836700439453125, 0.24438095092773438, -0.2667083740234375, 0.0302581787109375, -0.07451248168945312, -0.3535919189453125, -0.148590087890625, -0.05794525146484375, 0.22643280029296875, -1.011016845703125, 0.2846832275390625, 0.150970458984375, -0.11066055297851562, 0.08608245849609375, -0.4693145751953125, -0.02854156494140625, -0.628662109375, 0.3095550537109375, -0.11072158813476562, -0.4089622497558594, 0.2107105255126953, 0.052845001220703125, -0.48622894287109375, -0.06732177734375, 0.15756988525390625, -0.00922393798828125, -0.12377166748046875, -0.09910202026367188, -0.2217998504638672, 0.19182777404785156, -0.16991424560546875, 0.06461334228515625, 0.2472686767578125, -0.4999542236328125, -0.0081024169921875, 0.0411376953125, -0.04431915283203125, 0.4243621826171875, 0.5617218017578125, 0.014410018920898438, 0.2991943359375, -0.17527389526367188, 0.3034934997558594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000035.npy"} +{"epoch": 0.05291005291005291, "step": 36, "batch_size": 64, "mean": 0.013609647750854492, "std": 0.3320777416229248, "min": -0.6502151489257812, "p10": -0.35355281829833984, "median": -0.03901481628417969, "p90": 0.4506950378417971, "max": 1.120208740234375, "pos_frac": 0.484375, "sample": [0.2223358154296875, 1.120208740234375, 0.4010009765625, -0.2818489074707031, -0.23558807373046875, -0.4508171081542969, 0.1640625, -0.11031723022460938, 0.23735618591308594, 0.00234222412109375, 0.07215118408203125, 0.13521194458007812, 0.7884979248046875, 0.33880615234375, -0.11505317687988281, 0.041675567626953125, 0.119049072265625, -0.22620773315429688, -0.101043701171875, 0.5757331848144531, -0.06192207336425781, 0.11489105224609375, -0.215606689453125, -0.05235862731933594, -0.36223602294921875, -0.4137420654296875, -0.3009452819824219, 0.247589111328125, -0.2765388488769531, -0.5509033203125, 0.10771942138671875, -0.1493206024169922, -0.24964141845703125, 0.3019866943359375, -0.45919036865234375, -0.18902587890625, 0.1620941162109375, -0.0735626220703125, 0.030588150024414062, -0.0856475830078125, 0.038730621337890625, 0.695220947265625, 0.37487030029296875, -0.33329200744628906, 0.12439918518066406, -0.025671005249023438, 0.47199249267578125, 0.30908203125, -0.16493988037109375, 0.5107765197753906, 0.05127716064453125, -0.07865142822265625, -0.3102550506591797, -0.2934112548828125, -0.3625297546386719, -0.6502151489257812, 0.23867034912109375, -0.10710906982421875, -0.2280731201171875, 0.191680908203125, -0.10265922546386719, 0.48443603515625, -0.30518531799316406, 0.12009048461914062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000036.npy"} +{"epoch": 0.05442176870748299, "step": 37, "batch_size": 64, "mean": 0.10784146189689636, "std": 0.41081690788269043, "min": -0.5790557861328125, "p10": -0.26399154663085933, "median": 0.0012493133544921875, "p90": 0.5684219360351566, "max": 1.9254608154296875, "pos_frac": 0.5, "sample": [-0.3826141357421875, 0.6205215454101562, 0.5052108764648438, -0.1932830810546875, 0.23162841796875, 0.1279144287109375, 0.030391693115234375, 0.11949539184570312, 0.1622486114501953, 0.0263824462890625, 1.26531982421875, 0.15562820434570312, 0.5955123901367188, 0.33514404296875, -0.17650604248046875, -0.023345947265625, 0.06008148193359375, -0.16761016845703125, 0.42443084716796875, 0.7442474365234375, -0.1530170440673828, -0.1763591766357422, -0.5790557861328125, -0.1062164306640625, -0.23191070556640625, 0.328125, -0.19414520263671875, -0.03883934020996094, 0.23912811279296875, -0.19698333740234375, -0.3172454833984375, -0.28423309326171875, 0.1584320068359375, -0.1327362060546875, 0.963958740234375, 0.9833602905273438, -0.03763580322265625, -0.044910430908203125, 0.10853958129882812, -0.040927886962890625, 0.216217041015625, -0.172393798828125, -0.3229713439941406, -0.06435394287109375, -0.32888031005859375, -0.13489913940429688, 0.4410400390625, -0.2087249755859375, 0.282196044921875, -0.277740478515625, -0.11237144470214844, -0.13819122314453125, -0.001689910888671875, 0.030605316162109375, -0.0792388916015625, 0.29119873046875, -0.07501220703125, 0.26869964599609375, 0.085235595703125, 0.31162261962890625, 0.3847503662109375, -0.13101959228515625, 0.00418853759765625, 1.9254608154296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000037.npy"} +{"epoch": 0.055933484504913075, "step": 38, "batch_size": 64, "mean": 0.040194928646087646, "std": 0.38157227635383606, "min": -0.6832733154296875, "p10": -0.4043827056884765, "median": 0.04180908203125, "p90": 0.4426597595214844, "max": 1.31121826171875, "pos_frac": 0.578125, "sample": [0.13129425048828125, 0.5447731018066406, 0.1536102294921875, 1.0018386840820312, -0.32968902587890625, 0.19285202026367188, 0.03423118591308594, 0.3172798156738281, -0.25965118408203125, 0.5166473388671875, -0.10410499572753906, 0.1274871826171875, 0.2818870544433594, -0.00049591064453125, -0.022525787353515625, -0.3688812255859375, -0.1106414794921875, -0.2207660675048828, 0.08154296875, 0.17697906494140625, 0.27431488037109375, 0.170074462890625, -0.57818603515625, -0.5432891845703125, 0.1470184326171875, 0.270355224609375, 0.03509521484375, -0.6832733154296875, 0.0332489013671875, 0.04111480712890625, -0.45618629455566406, -0.3073883056640625, 0.12015724182128906, 0.2641792297363281, 1.0263938903808594, 0.2464599609375, -0.3563385009765625, -0.4195976257324219, 0.19742965698242188, -0.10600662231445312, -0.02703857421875, 0.624603271484375, 0.029081344604492188, -0.15394210815429688, 0.08795547485351562, -0.2533607482910156, -0.2735557556152344, -0.36080169677734375, 0.44597625732421875, 1.31121826171875, -0.4564208984375, 0.15560150146484375, 0.4349212646484375, -0.3487091064453125, -0.2522869110107422, 0.337799072265625, 0.159423828125, 0.04250335693359375, -0.6451492309570312, 0.19812774658203125, -0.24990081787109375, 0.12066650390625, 0.2906055450439453, -0.16408538818359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000038.npy"} +{"epoch": 0.05744520030234316, "step": 39, "batch_size": 64, "mean": 0.033740073442459106, "std": 0.3678494095802307, "min": -1.0661849975585938, "p10": -0.48603782653808586, "median": 0.101409912109375, "p90": 0.4213708877563477, "max": 0.7163238525390625, "pos_frac": 0.59375, "sample": [0.4502105712890625, -0.09227752685546875, -0.2813262939453125, 0.26554107666015625, -0.0572509765625, -0.79705810546875, 0.18527984619140625, 0.33379364013671875, 0.20496749877929688, 0.18276023864746094, -0.156158447265625, 0.005863189697265625, 0.42046165466308594, 0.18590736389160156, 0.4364013671875, 0.26770591735839844, -0.11978721618652344, 0.2622261047363281, 0.35739898681640625, 0.2832183837890625, -0.7088985443115234, -0.6408233642578125, -0.09575843811035156, 0.20330810546875, 0.1824951171875, -0.10564041137695312, -0.716766357421875, -0.06881332397460938, 0.3561859130859375, -0.3887672424316406, 0.09725570678710938, 0.6700286865234375, -0.210968017578125, 0.2544517517089844, 0.3120765686035156, 0.5445404052734375, -0.19391632080078125, -0.01605987548828125, 0.10556411743164062, -0.8825798034667969, 0.3154449462890625, -1.0661849975585938, 0.07616233825683594, 0.2601509094238281, -0.276824951171875, -0.2603645324707031, 0.223236083984375, 0.1761474609375, 0.42176055908203125, -0.11584281921386719, -0.18074798583984375, -0.5277252197265625, 0.05322265625, -0.02877044677734375, 0.09462738037109375, 0.365631103515625, 0.18509864807128906, 0.7163238525390625, 0.11545562744140625, -0.12515640258789062, 0.5488815307617188, 0.23386001586914062, -0.11017799377441406, 0.030364990234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000039.npy"} +{"epoch": 0.05895691609977324, "step": 40, "batch_size": 64, "mean": 0.00636136531829834, "std": 0.3532005846500397, "min": -1.0004119873046875, "p10": -0.4409809112548828, "median": 0.014364242553710938, "p90": 0.406638526916504, "max": 0.8570976257324219, "pos_frac": 0.5, "sample": [-0.138671875, 0.356658935546875, 0.48030853271484375, 0.09006500244140625, -0.25304412841796875, 0.31215667724609375, 0.8570976257324219, -0.0845489501953125, 0.2601165771484375, 0.21069717407226562, -0.015716552734375, 0.23508644104003906, 0.0788116455078125, -0.06882095336914062, 0.046642303466796875, 0.10502815246582031, 0.22381591796875, 0.496124267578125, 0.5719165802001953, -0.18375587463378906, 0.09848785400390625, -0.0326385498046875, -0.45319366455078125, 0.6933174133300781, 0.06592178344726562, -0.44446563720703125, -0.19143295288085938, 0.05484771728515625, 0.772003173828125, -0.06540489196777344, 0.31123924255371094, -0.822296142578125, 0.338165283203125, -0.22299575805664062, 0.1528453826904297, 0.1149444580078125, 0.08136749267578125, 0.12822914123535156, 0.2591094970703125, -0.4576873779296875, -0.2587718963623047, -0.017425537109375, 0.41683006286621094, -0.017303466796875, -0.5004653930664062, -0.06890869140625, -0.019382476806640625, -0.3058738708496094, -0.1743316650390625, -0.042510986328125, 0.3828582763671875, -1.0004119873046875, -0.2701606750488281, 0.21923828125, 0.21974945068359375, -0.4328498840332031, -0.725250244140625, -0.30240631103515625, -0.1740570068359375, -0.21167755126953125, 0.23321533203125, 0.044445037841796875, -0.1976470947265625, -0.35010528564453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000040.npy"} +{"epoch": 0.06046863189720333, "step": 41, "batch_size": 64, "mean": 0.03788486123085022, "std": 0.39017215371131897, "min": -0.8190155029296875, "p10": -0.3597698211669922, "median": 0.016625404357910156, "p90": 0.5941291809082032, "max": 0.9897994995117188, "pos_frac": 0.53125, "sample": [0.09593963623046875, -0.23546218872070312, -0.014127731323242188, -0.19117355346679688, 0.0201873779296875, -0.8190155029296875, -0.3023357391357422, 0.6005630493164062, -0.253509521484375, -0.2836112976074219, -0.3484344482421875, 0.1303863525390625, -0.08441162109375, 0.2993927001953125, -0.10675048828125, 0.9196014404296875, -0.5242290496826172, 0.7748565673828125, 0.1217193603515625, -0.13445663452148438, 0.32871246337890625, -0.573211669921875, -0.21480560302734375, 0.19431304931640625, 0.07728767395019531, -0.4599761962890625, 0.099609375, 0.16253662109375, 0.09574508666992188, 0.0855712890625, 0.15279769897460938, 0.84930419921875, 0.718414306640625, 0.3399810791015625, 0.013063430786132812, -0.19695281982421875, 0.4882354736328125, -0.05364227294921875, 0.1632232666015625, 0.011079788208007812, -0.034275054931640625, -0.30770111083984375, -0.3646278381347656, -0.045566558837890625, 0.9897994995117188, -0.42261505126953125, 0.038181304931640625, -0.29102325439453125, 0.782318115234375, 0.5791168212890625, -0.24908447265625, -0.32330322265625, -0.20909881591796875, -0.19521331787109375, 0.14230728149414062, -0.763214111328125, 0.3886070251464844, -0.10541534423828125, 0.5076103210449219, 0.03460693359375, 0.0869903564453125, 0.44351959228515625, 0.08653450012207031, -0.2902374267578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000041.npy"} +{"epoch": 0.06198034769463341, "step": 42, "batch_size": 64, "mean": 0.008484512567520142, "std": 0.3420381546020508, "min": -1.0212936401367188, "p10": -0.3977447509765625, "median": 0.019430160522460938, "p90": 0.40484275817871096, "max": 0.7050018310546875, "pos_frac": 0.546875, "sample": [0.334564208984375, 0.498291015625, -0.24184417724609375, -0.0534820556640625, 0.13132286071777344, -0.38067626953125, -0.007450103759765625, 0.000461578369140625, -0.447113037109375, -0.00513458251953125, -0.490325927734375, -0.1325836181640625, -0.15038681030273438, 0.174224853515625, 0.37075042724609375, 0.08171844482421875, 0.7050018310546875, 0.31683349609375, 0.025968551635742188, 0.2194366455078125, 0.04668426513671875, 0.348876953125, -0.049861907958984375, 0.00861358642578125, -0.19493865966796875, -0.28331756591796875, -0.014217376708984375, -0.2896881103515625, -0.14554214477539062, -0.2685375213623047, 0.4065284729003906, 0.2817096710205078, -0.030628204345703125, -0.0387420654296875, 0.400909423828125, 0.158050537109375, -0.02831268310546875, -0.11492919921875, -0.9143295288085938, 0.6872406005859375, 0.02562713623046875, -0.31174468994140625, -1.0212936401367188, -0.5875701904296875, -0.5637435913085938, 0.05889892578125, -0.35381317138671875, 0.4904022216796875, 0.2936210632324219, -0.31450653076171875, 0.0847320556640625, 0.17731094360351562, 0.26226043701171875, 0.20182037353515625, 0.4502525329589844, -0.405059814453125, -0.29959869384765625, 0.03591156005859375, 0.35721588134765625, 0.4084663391113281, 0.37145423889160156, 0.10905838012695312, 0.144927978515625, 0.013233184814453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000042.npy"} +{"epoch": 0.06349206349206349, "step": 43, "batch_size": 64, "mean": 0.0620463490486145, "std": 0.3307415544986725, "min": -0.7825469970703125, "p10": -0.31061325073242185, "median": 0.0854024887084961, "p90": 0.4856201171875002, "max": 1.0201568603515625, "pos_frac": 0.59375, "sample": [0.18719482421875, -0.29677581787109375, -0.63134765625, -0.21675872802734375, -0.21169281005859375, -0.2503795623779297, -0.15816116333007812, 0.03254508972167969, 0.1878204345703125, -0.49277496337890625, 0.2518138885498047, 0.1697845458984375, 0.055877685546875, -0.07283782958984375, 0.440460205078125, 0.25628662109375, 0.1544628143310547, -0.257415771484375, -0.10630416870117188, 0.3253326416015625, 0.32958221435546875, 0.35721588134765625, 0.0135650634765625, 0.1954326629638672, 0.3925933837890625, 0.04084968566894531, 0.24814796447753906, 0.13587188720703125, 0.09288787841796875, 0.08721923828125, 1.0201568603515625, -0.026885986328125, 0.5128555297851562, -0.0221405029296875, 0.09713172912597656, -0.230072021484375, 0.174774169921875, -0.36429405212402344, 0.6017684936523438, 0.504974365234375, 0.23828887939453125, -0.09833145141601562, 0.09365081787109375, -0.7825469970703125, -0.0473785400390625, 0.217742919921875, 0.06874847412109375, 0.39910316467285156, 0.08638572692871094, 0.2833728790283203, 0.1482982635498047, 0.5357284545898438, 0.08441925048828125, -0.3165435791015625, -0.0988311767578125, -0.14868927001953125, -0.2768096923828125, 0.65399169921875, -0.267578125, 0.77166748046875, -0.296600341796875, -0.34928131103515625, -0.0023365020751953125, -0.4542694091796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000043.npy"} +{"epoch": 0.06500377928949358, "step": 44, "batch_size": 64, "mean": -0.00314408540725708, "std": 0.39816951751708984, "min": -0.92034912109375, "p10": -0.4574440002441406, "median": 0.0017242431640625, "p90": 0.4423044204711915, "max": 1.263580322265625, "pos_frac": 0.5, "sample": [-0.24835205078125, -0.23196792602539062, -0.92034912109375, 0.054157257080078125, 0.152435302734375, 0.2017822265625, 0.0144500732421875, -0.27323150634765625, -0.607025146484375, -0.342254638671875, 0.2574958801269531, 0.1811065673828125, -0.417327880859375, 0.22702789306640625, 0.24450302124023438, -0.1073760986328125, 0.11659622192382812, 0.4960498809814453, 0.22357940673828125, 0.2110137939453125, -0.31183624267578125, 0.3831443786621094, 0.321075439453125, -0.57781982421875, -0.13342857360839844, 0.238922119140625, 0.14096832275390625, 0.09210968017578125, -0.300323486328125, 0.34574127197265625, -0.3252410888671875, -0.3342247009277344, -0.16909027099609375, 0.2830352783203125, 0.13069534301757812, -0.21133995056152344, 1.048095703125, 0.10089302062988281, -0.46701812744140625, -0.39989471435546875, -0.12372207641601562, -0.16186141967773438, -0.0280914306640625, -0.25030517578125, -0.0110015869140625, 0.5133895874023438, -0.2515106201171875, 0.8065032958984375, 0.16788482666015625, 0.029466629028320312, -0.4351043701171875, -0.12916946411132812, 0.414337158203125, 0.121002197265625, -0.7154388427734375, -0.21831512451171875, -0.141510009765625, 1.263580322265625, 0.45429039001464844, 0.5450668334960938, -0.0833740234375, 0.149810791015625, -0.6223831176757812, -0.58154296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000044.npy"} +{"epoch": 0.06651549508692366, "step": 45, "batch_size": 64, "mean": 0.1365204155445099, "std": 0.3398631811141968, "min": -0.8239288330078125, "p10": -0.2400030136108398, "median": 0.1341075897216797, "p90": 0.5825576782226566, "max": 0.9787139892578125, "pos_frac": 0.671875, "sample": [0.35033416748046875, -0.1510009765625, 0.48455047607421875, 0.30511474609375, 0.12841415405273438, 0.02448272705078125, -0.2113361358642578, 0.4703369140625, 0.3707733154296875, -0.8239288330078125, -0.046993255615234375, 0.4586334228515625, 0.3815803527832031, 0.30519866943359375, -0.2015399932861328, 0.17243576049804688, 0.11779212951660156, 0.00585174560546875, -0.02410888671875, 0.22963333129882812, -0.2101593017578125, 0.2112274169921875, 0.50848388671875, -0.18270492553710938, 0.4339866638183594, -0.0300445556640625, -0.1020355224609375, 0.2874183654785156, 0.6143035888671875, 0.2116851806640625, 0.4184989929199219, 0.39752197265625, -0.565826416015625, 0.139801025390625, 0.3434562683105469, 0.9787139892578125, -0.3735313415527344, 0.2226715087890625, -0.252288818359375, 0.16181182861328125, -0.6996536254882812, 0.62481689453125, -0.2684364318847656, 0.03719902038574219, 0.1246185302734375, -0.0049343109130859375, 0.104156494140625, 0.07703590393066406, 0.6168212890625, 0.32610321044921875, -0.17430877685546875, 0.191162109375, 0.6280593872070312, 0.3849029541015625, 0.03266716003417969, 0.0891571044921875, 0.64312744140625, 0.10061264038085938, 0.3069019317626953, -0.2110462188720703, -0.08778762817382812, 0.7135009765625, -0.00170135498046875, -0.3748817443847656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000045.npy"} +{"epoch": 0.06802721088435375, "step": 46, "batch_size": 64, "mean": 0.12037333846092224, "std": 0.37904706597328186, "min": -0.8275985717773438, "p10": -0.2687124252319336, "median": 0.08695602416992188, "p90": 0.5686946868896485, "max": 1.081512451171875, "pos_frac": 0.609375, "sample": [-0.1051177978515625, 0.07178115844726562, -0.8275985717773438, -0.0107269287109375, 0.3169097900390625, -0.2140045166015625, -0.09444236755371094, -0.08034133911132812, 0.27386474609375, -0.5693359375, 0.5571403503417969, 0.3356895446777344, 0.2681121826171875, 0.814056396484375, -0.7547378540039062, 0.013217926025390625, -0.403961181640625, 0.3192481994628906, 0.5156326293945312, 0.3128490447998047, 0.34716033935546875, 0.2906646728515625, 0.15177536010742188, 0.6290206909179688, 0.30216217041015625, -0.147918701171875, 0.48920440673828125, 1.081512451171875, -0.04047393798828125, -0.16742706298828125, -0.39049530029296875, 0.5255355834960938, 0.10254287719726562, 0.02982330322265625, -0.2266979217529297, 0.039215087890625, -0.07887649536132812, 0.12259101867675781, -0.2699775695800781, 0.7571144104003906, 0.0167083740234375, -0.12221527099609375, 0.797943115234375, 0.37731170654296875, 0.3465309143066406, 0.07436370849609375, -0.2351531982421875, -0.32634735107421875, -0.069610595703125, 0.2713508605957031, 0.14960098266601562, -0.13110923767089844, -0.10923385620117188, 0.5736465454101562, 0.09954833984375, 0.32171630859375, -0.1739044189453125, 0.291412353515625, -0.2516040802001953, 0.971160888671875, 0.01947021484375, 0.3379478454589844, -0.2657604217529297, 0.4554290771484375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000046.npy"} +{"epoch": 0.06953892668178382, "step": 47, "batch_size": 64, "mean": 0.1566331386566162, "std": 0.31434059143066406, "min": -0.7296066284179688, "p10": -0.20336990356445311, "median": 0.11134529113769531, "p90": 0.5734451293945313, "max": 0.8456649780273438, "pos_frac": 0.671875, "sample": [0.3552207946777344, -0.16855239868164062, -0.029254913330078125, 0.34770965576171875, 0.11035919189453125, 0.10430145263671875, -0.04018592834472656, 0.72430419921875, -0.04502296447753906, -0.09487152099609375, 0.11662101745605469, -0.03676414489746094, 0.5605316162109375, -0.7296066284179688, -0.2782135009765625, 0.03433990478515625, 0.2949104309082031, 6.103515625e-05, 0.1556243896484375, -0.009571075439453125, 0.11233139038085938, -0.23509979248046875, -0.20513916015625, -0.056507110595703125, 0.00562286376953125, 0.1532135009765625, 0.34449005126953125, 0.09543800354003906, -0.029613494873046875, 0.439727783203125, 0.09655380249023438, 0.5497798919677734, -0.19924163818359375, 0.8456649780273438, 0.07624053955078125, 0.3035087585449219, 0.6493186950683594, 0.357635498046875, 0.142181396484375, 0.5510940551757812, -0.2586212158203125, 0.687255859375, -0.16207122802734375, 0.21967315673828125, 0.5132713317871094, 0.0878143310546875, 0.7339324951171875, -0.08353424072265625, 0.4490547180175781, 0.46666717529296875, 0.39144134521484375, 0.18901824951171875, -0.22771453857421875, -0.0397796630859375, -0.5460720062255859, 0.7033958435058594, 0.11969757080078125, 0.312469482421875, 0.34661102294921875, 0.02550506591796875, 0.17897415161132812, 0.0936737060546875, 0.5789794921875, -0.12426185607910156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000047.npy"} +{"epoch": 0.0710506424792139, "step": 48, "batch_size": 64, "mean": 0.03382223844528198, "std": 0.39063599705696106, "min": -0.6717681884765625, "p10": -0.4418785095214844, "median": 0.01686573028564453, "p90": 0.45892276763916023, "max": 1.48370361328125, "pos_frac": 0.515625, "sample": [-0.024946212768554688, -0.42620849609375, 0.1025543212890625, 0.085968017578125, 0.2575836181640625, 0.8634605407714844, 0.13970947265625, 0.0214996337890625, -0.4832954406738281, 0.09589195251464844, 0.5656967163085938, 0.39768028259277344, -0.3623542785644531, -0.4483489990234375, 0.76397705078125, 0.141204833984375, 0.3676338195800781, 0.11377906799316406, -0.6717681884765625, 0.2987823486328125, 0.3951835632324219, 0.4818611145019531, -0.5550994873046875, -0.00112152099609375, 0.44272613525390625, -0.12701416015625, -0.0468597412109375, 0.19427108764648438, -0.19203948974609375, 0.43039703369140625, 0.097442626953125, 0.216888427734375, -0.07407569885253906, -0.2295379638671875, 0.06566238403320312, 0.39688873291015625, -0.22782516479492188, -0.0635833740234375, 0.4658641815185547, -0.335784912109375, -0.49651527404785156, -0.488983154296875, -0.343780517578125, 0.18520545959472656, -0.42678070068359375, -0.22650909423828125, 0.2726612091064453, 0.23372268676757812, -0.2672576904296875, -0.34145545959472656, 0.40486907958984375, -0.04358673095703125, -0.5612907409667969, 0.31481170654296875, -0.38262939453125, -0.04132270812988281, 1.48370361328125, -0.1255474090576172, -0.2936553955078125, -0.24838638305664062, 0.012231826782226562, 0.5168380737304688, 0.14066123962402344, -0.24512481689453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000048.npy"} +{"epoch": 0.07256235827664399, "step": 49, "batch_size": 64, "mean": 0.04481416940689087, "std": 0.37005603313446045, "min": -1.288330078125, "p10": -0.3584815979003906, "median": -0.016414642333984375, "p90": 0.44753913879394547, "max": 1.1644210815429688, "pos_frac": 0.46875, "sample": [0.6564712524414062, 0.4718894958496094, -0.04526329040527344, 0.49954986572265625, 0.0958099365234375, -0.2660045623779297, -0.31290435791015625, 0.13863182067871094, -0.04302024841308594, -0.6953659057617188, -0.33788299560546875, -0.053165435791015625, 0.23026275634765625, 0.19415283203125, -0.1359729766845703, -0.28930091857910156, -0.13271713256835938, -0.058258056640625, 0.2676811218261719, -0.4594879150390625, -0.03975868225097656, -0.06420707702636719, -0.036914825439453125, -0.20440673828125, 0.11304283142089844, -0.020050048828125, -0.0392913818359375, 0.3894805908203125, 0.3510303497314453, -0.4674224853515625, -0.08215522766113281, 0.3006477355957031, 0.07758331298828125, 0.6117000579833984, -0.038074493408203125, 0.3715057373046875, 0.22013092041015625, 0.103240966796875, 0.3209075927734375, 0.3827056884765625, -0.436737060546875, 0.08141708374023438, -0.10404014587402344, -0.0903778076171875, -0.2418975830078125, 0.46246337890625, 0.4127159118652344, 0.7136440277099609, -0.1574554443359375, -0.0076313018798828125, -0.03890228271484375, 0.39817047119140625, -1.288330078125, -0.2396392822265625, -0.2895317077636719, -0.3673095703125, -0.01277923583984375, 1.1644210815429688, 0.39904022216796875, 0.25441932678222656, 0.305328369140625, -0.40139007568359375, 0.27303314208984375, 0.10467529296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000049.npy"} +{"epoch": 0.07407407407407407, "step": 50, "batch_size": 64, "mean": 0.11278638243675232, "std": 0.3989274501800537, "min": -0.8755645751953125, "p10": -0.2955160140991211, "median": 0.0904083251953125, "p90": 0.5936304092407226, "max": 1.247833251953125, "pos_frac": 0.59375, "sample": [0.35764312744140625, 0.327423095703125, -0.8755645751953125, -0.057567596435546875, 0.4130821228027344, 0.02883148193359375, 0.744140625, -0.2348480224609375, 0.9079971313476562, 0.4029693603515625, 0.11110496520996094, 0.41064453125, -0.43445587158203125, 0.0703277587890625, -0.10874557495117188, -0.2683391571044922, 0.279449462890625, 0.7713088989257812, 0.44815826416015625, -0.11773681640625, -0.17962646484375, 0.358154296875, -0.4749183654785156, 0.19243621826171875, 0.22028350830078125, 1.247833251953125, 0.10801315307617188, 0.00232696533203125, -0.23145675659179688, -0.0696868896484375, 0.5121688842773438, -0.064453125, 0.23420333862304688, 0.3617095947265625, -0.23717498779296875, 0.21100616455078125, 0.09999847412109375, 1.0351409912109375, 0.12603759765625, -0.09468841552734375, -0.1916961669921875, 0.9516448974609375, 0.31819915771484375, -0.30719757080078125, -0.1844959259033203, 0.20721817016601562, -0.1858062744140625, -0.1439208984375, 0.1608428955078125, 0.5952377319335938, -0.1007232666015625, -0.06171607971191406, -0.10213661193847656, 0.0006351470947265625, 0.5898799896240234, -0.7037200927734375, -0.3071632385253906, -0.5642280578613281, 0.01404571533203125, 0.19791412353515625, 0.14864349365234375, 0.3447723388671875, -0.07184982299804688, 0.08081817626953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000050.npy"} +{"epoch": 0.07558578987150416, "step": 51, "batch_size": 64, "mean": 0.15886464715003967, "std": 0.4284476041793823, "min": -0.5874557495117188, "p10": -0.29571762084960934, "median": 0.07484054565429688, "p90": 0.5937408447265625, "max": 1.38397216796875, "pos_frac": 0.578125, "sample": [0.4603729248046875, 0.20541763305664062, -0.04695701599121094, 0.5188102722167969, -0.11937713623046875, -0.15631866455078125, 0.10612297058105469, 0.3863372802734375, -0.02338409423828125, -0.03423309326171875, -0.09483146667480469, 0.596771240234375, -0.4608650207519531, 0.5831527709960938, -0.15148162841796875, -0.146453857421875, 0.586669921875, 0.5055389404296875, 0.18254852294921875, 0.5692138671875, 0.359619140625, 0.2089099884033203, 0.4869098663330078, 0.2859039306640625, -0.12445068359375, -0.1970367431640625, -0.3788909912109375, -0.26051902770996094, 0.041576385498046875, 0.08936309814453125, -0.30796051025390625, 0.99237060546875, 0.0254058837890625, 1.38397216796875, -0.5241928100585938, 0.04248046875, 0.6462059020996094, -0.14247512817382812, -0.5874557495117188, 0.04782867431640625, -0.42221832275390625, 0.16269683837890625, 0.2708625793457031, 1.087890625, 0.0603179931640625, -0.029651641845703125, -0.2411346435546875, 1.118560791015625, 0.2210540771484375, 0.5142059326171875, -0.04158782958984375, 0.5843124389648438, 0.37885284423828125, -0.26715087890625, 0.3748626708984375, 0.12367439270019531, 0.19641494750976562, 1.2487602233886719, -0.07176399230957031, -0.006328582763671875, -0.20940399169921875, -0.2111053466796875, 0.3170299530029297, -0.5464324951171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000051.npy"} +{"epoch": 0.07709750566893424, "step": 52, "batch_size": 64, "mean": 0.10791890323162079, "std": 0.4844755232334137, "min": -0.895172119140625, "p10": -0.45303649902343746, "median": 0.0943145751953125, "p90": 0.5055122375488282, "max": 2.572052001953125, "pos_frac": 0.640625, "sample": [0.7197799682617188, -0.4715728759765625, -0.1629486083984375, 0.2982025146484375, 0.9138717651367188, 0.078765869140625, 0.2984657287597656, -0.27600860595703125, -0.21259689331054688, 0.9009857177734375, 0.512115478515625, 0.309295654296875, 0.01908111572265625, 0.49010467529296875, 0.6545944213867188, 0.11894989013671875, 0.5938796997070312, -0.34002685546875, 0.252685546875, -0.2573375701904297, -0.22699737548828125, 0.10115814208984375, 0.17432403564453125, 0.06921577453613281, 0.3730621337890625, 0.12218475341796875, 0.10760498046875, -0.006656646728515625, -0.0210418701171875, 0.07477569580078125, -0.05760002136230469, -0.657318115234375, 0.20790863037109375, -0.1656475067138672, 0.17704010009765625, 0.32501983642578125, 0.08747100830078125, 0.10821151733398438, 0.418060302734375, -0.895172119140625, 0.1471710205078125, 0.23083114624023438, 0.391937255859375, 0.3051433563232422, 0.12265586853027344, 0.4272575378417969, -0.015437126159667969, -0.415924072265625, 2.572052001953125, 0.0701141357421875, 0.38262939453125, -0.8673019409179688, -0.4962120056152344, 0.059825897216796875, 0.452392578125, -0.43310546875, 0.3959197998046875, -0.16955184936523438, -0.47406005859375, 0.061519622802734375, 0.03037261962890625, -0.13633346557617188, -0.029397964477539062, -0.461578369140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000052.npy"} +{"epoch": 0.07860922146636433, "step": 53, "batch_size": 64, "mean": 0.11565083265304565, "std": 0.32359933853149414, "min": -0.8312606811523438, "p10": -0.34054718017578123, "median": 0.1444530487060547, "p90": 0.5010215759277343, "max": 0.8616409301757812, "pos_frac": 0.6875, "sample": [0.3956871032714844, 0.17839813232421875, 0.5012893676757812, 0.3355560302734375, 0.45734596252441406, 0.1571025848388672, 0.4639434814453125, 0.3629150390625, 0.18246841430664062, -0.18316650390625, 0.60101318359375, -0.000244140625, 0.02364349365234375, 0.24837875366210938, -0.2165069580078125, 0.22239303588867188, 0.12375450134277344, -0.1916046142578125, -0.1962871551513672, -0.2240753173828125, 0.0406646728515625, 0.42406463623046875, -0.18929290771484375, 0.6178321838378906, -0.8312606811523438, 0.4079132080078125, -0.3209953308105469, 0.06676483154296875, 0.5943183898925781, 0.124114990234375, -0.11517524719238281, -0.3513336181640625, 0.08000564575195312, -0.167633056640625, 0.36228179931640625, 0.14141845703125, 0.2489604949951172, 0.22000885009765625, -0.3702812194824219, -0.4581928253173828, -0.2737102508544922, 0.1042938232421875, 0.1358661651611328, -0.1385650634765625, 0.204498291015625, 0.437347412109375, 0.59674072265625, 0.11876678466796875, 0.20332717895507812, 0.682891845703125, 0.18532562255859375, 0.19351577758789062, -0.3685302734375, 0.3609771728515625, -0.09342193603515625, -0.3489265441894531, 0.500396728515625, 0.24399185180664062, 0.2445068359375, 0.14748764038085938, -0.3847084045410156, 0.8616409301757812, 0.011705398559570312, 0.01004791259765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000053.npy"} +{"epoch": 0.0801209372637944, "step": 54, "batch_size": 64, "mean": 0.09295853972434998, "std": 0.39413711428642273, "min": -1.0519485473632812, "p10": -0.32390785217285156, "median": 0.09784603118896484, "p90": 0.5021442413330078, "max": 1.27728271484375, "pos_frac": 0.578125, "sample": [-0.4429588317871094, 1.0933990478515625, -0.1161041259765625, -0.022329330444335938, 0.13593292236328125, 0.05712890625, 0.45168304443359375, -0.07752227783203125, 0.40663909912109375, -0.6881103515625, -0.1923351287841797, -0.06041908264160156, 0.3756103515625, -0.7012176513671875, 0.22774505615234375, 0.21930503845214844, 0.52294921875, 0.08355140686035156, 0.188629150390625, -1.0519485473632812, 0.1890411376953125, 1.27728271484375, -0.019689559936523438, 0.5048255920410156, -0.3328857421875, -0.08260726928710938, 0.11240577697753906, 0.3598785400390625, 0.09781646728515625, 0.15819931030273438, -0.06391143798828125, -0.15271377563476562, 0.158905029296875, -0.12127113342285156, 0.5205459594726562, 0.35272216796875, 0.49588775634765625, -0.17217254638671875, 0.4477119445800781, 0.4117012023925781, 0.3672294616699219, -0.205810546875, 0.3778038024902344, 0.32636260986328125, -0.23709869384765625, 0.39673423767089844, -0.23239517211914062, -0.04301261901855469, 0.09787559509277344, 0.195587158203125, -0.41983795166015625, 0.13567352294921875, -0.3029594421386719, 0.19813919067382812, 0.67779541015625, -0.0500335693359375, 0.17851638793945312, 0.009368896484375, -0.026508331298828125, -0.2227630615234375, 0.03227996826171875, -0.03765869140625, 0.821044921875, -0.6382865905761719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000054.npy"} +{"epoch": 0.08163265306122448, "step": 55, "batch_size": 64, "mean": 0.2320151925086975, "std": 0.4300665855407715, "min": -0.6689605712890625, "p10": -0.24718589782714842, "median": 0.19739341735839844, "p90": 0.6757518768310548, "max": 1.42108154296875, "pos_frac": 0.6875, "sample": [1.1859130859375, 0.388275146484375, -0.24024581909179688, 0.3235015869140625, -0.18706703186035156, -0.1702880859375, 0.2998504638671875, -0.30572509765625, -0.3324241638183594, 0.11968231201171875, 0.11701202392578125, 0.255889892578125, -0.193084716796875, -0.06574249267578125, 0.08860015869140625, 1.3429183959960938, 0.9163131713867188, 0.34181976318359375, -0.0397796630859375, -0.301605224609375, 0.43058013916015625, -0.01727294921875, 0.26602935791015625, 0.14998245239257812, 1.42108154296875, 0.19837570190429688, 0.020849227905273438, 0.1618194580078125, 0.6605606079101562, 0.034423828125, 0.47943878173828125, -0.6689605712890625, 0.6535491943359375, -0.39377593994140625, 0.5113906860351562, -0.10294342041015625, 0.21003341674804688, 0.5398502349853516, 0.6292343139648438, -0.5172080993652344, 0.07527923583984375, 0.39362335205078125, -0.22520065307617188, 0.1964111328125, 0.2517242431640625, -0.15387725830078125, 1.1879730224609375, -0.0532989501953125, -0.25016021728515625, -0.0278778076171875, 0.21311569213867188, 0.6822624206542969, 0.453765869140625, 0.13443756103515625, 0.14956283569335938, 0.4247245788574219, 0.416412353515625, 0.21341514587402344, 0.07286834716796875, 1.052459716796875, 0.3704986572265625, 0.5049667358398438, 0.6287879943847656, -0.07375335693359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000055.npy"} +{"epoch": 0.08314436885865457, "step": 56, "batch_size": 64, "mean": 0.1485109031200409, "std": 0.3642500936985016, "min": -1.4323577880859375, "p10": -0.23987693786621092, "median": 0.16664505004882812, "p90": 0.5058380126953126, "max": 1.0334930419921875, "pos_frac": 0.75, "sample": [0.19441986083984375, 0.3681907653808594, 0.1521739959716797, -0.07610130310058594, -0.13532257080078125, 0.5876312255859375, 0.705810546875, -0.0047760009765625, 0.3789100646972656, -0.24254798889160156, -0.3530464172363281, -0.0417633056640625, 0.4705047607421875, -0.26993560791015625, 0.36212158203125, 0.3370819091796875, 0.08744049072265625, -0.6714229583740234, 0.5209808349609375, 0.5649909973144531, -0.38729095458984375, 0.6115264892578125, 0.41641807556152344, -0.018978118896484375, 0.06517982482910156, 0.15776824951171875, 0.10302352905273438, 0.10196685791015625, -0.1532306671142578, 0.4513816833496094, -0.07049942016601562, 0.3404388427734375, 0.44986724853515625, 0.07125663757324219, 0.41594696044921875, 0.1755218505859375, -1.4323577880859375, -0.06441879272460938, 0.0029239654541015625, 0.3200569152832031, 0.11163520812988281, 0.3508453369140625, 1.0334930419921875, -0.5921287536621094, 0.13274765014648438, 0.19365692138671875, 0.17992401123046875, 0.01197052001953125, 0.2406597137451172, 0.33954429626464844, 0.18766403198242188, 0.09320449829101562, 0.2454681396484375, 0.20380020141601562, 0.444671630859375, -0.2336444854736328, 0.10332870483398438, 0.26798248291015625, 0.4401741027832031, 0.28159332275390625, 0.025907516479492188, 0.8231582641601562, 0.03689002990722656, 0.09030914306640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000056.npy"} +{"epoch": 0.08465608465608465, "step": 57, "batch_size": 64, "mean": 0.14022645354270935, "std": 0.45907482504844666, "min": -1.022369384765625, "p10": -0.3596908569335937, "median": 0.16501998901367188, "p90": 0.6786140441894531, "max": 1.86309814453125, "pos_frac": 0.640625, "sample": [-0.3745269775390625, -0.17000961303710938, 0.3582897186279297, 1.86309814453125, 0.21564292907714844, 0.3797149658203125, 0.7280120849609375, -0.73114013671875, 0.6809463500976562, 0.8093948364257812, 0.710693359375, 0.4431133270263672, -0.609039306640625, 0.33544158935546875, -0.07928466796875, -0.30209922790527344, 0.11452484130859375, 0.3408203125, 0.7234573364257812, 0.09126853942871094, 0.7349700927734375, -0.13980865478515625, 0.4118061065673828, -0.080108642578125, 0.21169662475585938, 0.0081787109375, 0.15549087524414062, 0.16208648681640625, -0.020235061645507812, -0.9545135498046875, 0.1679534912109375, 0.11510658264160156, 0.2620735168457031, 0.5579605102539062, -0.6563339233398438, 0.4366455078125, -1.022369384765625, 0.20209312438964844, 0.06380462646484375, -0.3250732421875, -0.088653564453125, -0.5738754272460938, -0.03603363037109375, 0.3168468475341797, 0.30049896240234375, -0.2240142822265625, 0.6731719970703125, 0.30768585205078125, -0.08936309814453125, 0.08011054992675781, 0.5568389892578125, -0.269500732421875, 0.19556427001953125, 0.4697723388671875, -0.25296592712402344, 0.51129150390625, 0.380950927734375, 0.5485076904296875, 0.2292957305908203, 0.20546722412109375, -0.09602737426757812, -0.07303619384765625, 0.11737060546875, -0.035152435302734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000057.npy"} +{"epoch": 0.08616780045351474, "step": 58, "batch_size": 64, "mean": 0.13939306139945984, "std": 0.516380786895752, "min": -0.9222373962402344, "p10": -0.4930816650390625, "median": 0.10617828369140625, "p90": 0.7946670532226563, "max": 1.61260986328125, "pos_frac": 0.59375, "sample": [-0.9222373962402344, 0.4239044189453125, 0.215118408203125, -0.17203903198242188, 0.02214813232421875, 0.3133697509765625, -0.5110855102539062, 0.78955078125, 0.3365325927734375, -0.66876220703125, 0.2548408508300781, 0.1774272918701172, 0.171234130859375, -0.918121337890625, -0.0964508056640625, 0.11919784545898438, 0.2329559326171875, 0.13951873779296875, 0.50164794921875, 0.7968597412109375, 0.24039268493652344, -0.0742034912109375, 0.35573577880859375, 0.7519378662109375, -0.2444000244140625, -0.19421005249023438, 0.8230667114257812, 0.1397552490234375, -0.5268325805664062, 0.4145088195800781, -0.379730224609375, -0.2540016174316406, -0.0482940673828125, -0.172454833984375, 1.45831298828125, -0.0597991943359375, 1.232879638671875, 1.61260986328125, -0.27193450927734375, 0.10269927978515625, -0.2141571044921875, -0.05066680908203125, -0.06755828857421875, -0.0117950439453125, -0.7377700805664062, 1.314971923828125, -0.030384063720703125, 0.10965728759765625, -0.1579132080078125, 0.24585723876953125, 0.7486495971679688, 0.18449783325195312, -0.4515533447265625, -0.10092544555664062, 0.2797126770019531, 1.1061553955078125, 0.05162620544433594, 0.02684783935546875, -0.5108795166015625, 0.285125732421875, 0.4556884765625, 0.07106781005859375, 0.171539306640625, 0.09171295166015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000058.npy"} +{"epoch": 0.08767951625094482, "step": 59, "batch_size": 64, "mean": 0.15986764430999756, "std": 0.3881513774394989, "min": -0.6501617431640625, "p10": -0.29738464355468747, "median": 0.15989398956298828, "p90": 0.563589096069336, "max": 1.81915283203125, "pos_frac": 0.65625, "sample": [0.4941139221191406, -0.27471923828125, -0.1949005126953125, -0.214508056640625, -0.2324657440185547, -0.32968902587890625, 0.3345069885253906, -0.10321807861328125, 0.6880607604980469, 0.9753875732421875, 0.531158447265625, 0.09247970581054688, 0.496307373046875, 0.5774879455566406, 0.24939346313476562, 0.2792701721191406, 0.30340576171875, -0.3782997131347656, 0.0778350830078125, 0.1490936279296875, 0.6345119476318359, 0.39641571044921875, 0.02655792236328125, 0.21537017822265625, -0.6501617431640625, 0.06021881103515625, 0.2966651916503906, 0.46337890625, 0.677398681640625, -0.01184844970703125, 0.2191162109375, -0.3445396423339844, 0.12390518188476562, -0.07086944580078125, 0.0770111083984375, -0.193084716796875, -0.0862274169921875, -0.09979057312011719, -0.5534782409667969, 0.16176414489746094, 1.81915283203125, 0.24935150146484375, -0.35992431640625, 0.3309803009033203, 0.4037208557128906, 0.8415889739990234, 0.15802383422851562, 0.2433757781982422, 0.24616622924804688, 0.527557373046875, -0.24427413940429688, 0.22670745849609375, -0.0467376708984375, -0.040374755859375, 0.3175201416015625, 0.0347442626953125, 0.189849853515625, 0.3044281005859375, 0.038730621337890625, -0.03293609619140625, 0.2323932647705078, -0.307098388671875, 0.3249244689941406, -0.08935546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000059.npy"} +{"epoch": 0.08919123204837491, "step": 60, "batch_size": 64, "mean": 0.07655695080757141, "std": 0.37988728284835815, "min": -0.7493438720703125, "p10": -0.3639503479003906, "median": 0.0032787322998046875, "p90": 0.5928871154785158, "max": 1.1510486602783203, "pos_frac": 0.5, "sample": [0.44829559326171875, -0.38082122802734375, 0.12752151489257812, 0.38128662109375, -0.13462066650390625, 0.5007858276367188, -0.1403350830078125, -0.28429412841796875, -0.1363677978515625, -0.1186065673828125, 0.3152046203613281, 0.679840087890625, -0.10567665100097656, 0.09988021850585938, -0.573974609375, 0.65814208984375, -0.11840438842773438, 0.18523597717285156, 1.1510486602783203, 0.18747711181640625, -0.571319580078125, -0.17589187622070312, 0.34424591064453125, -0.04156494140625, 0.4611949920654297, 0.2505035400390625, -0.10164642333984375, -0.06657981872558594, 0.23731231689453125, -0.032123565673828125, -0.07317543029785156, -0.3142242431640625, 0.553436279296875, 0.1713104248046875, -0.16861343383789062, 0.6097946166992188, 0.2889537811279297, -0.593841552734375, -0.055103302001953125, -0.11723709106445312, 0.03234100341796875, 0.6958999633789062, 0.4294891357421875, -0.13796234130859375, 0.3906593322753906, 0.7230377197265625, 0.3415260314941406, -0.3245849609375, 0.15361404418945312, -0.17624664306640625, -0.291229248046875, 0.3842010498046875, -0.15944290161132812, 0.1026611328125, 0.136993408203125, -0.025783538818359375, 0.17176437377929688, -0.4381103515625, 0.47548675537109375, -0.27385902404785156, -0.7493438720703125, -0.5576057434082031, 0.7088813781738281, -0.05978965759277344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000060.npy"} +{"epoch": 0.09070294784580499, "step": 61, "batch_size": 64, "mean": 0.09999510645866394, "std": 0.385189950466156, "min": -0.80474853515625, "p10": -0.44753017425537106, "median": 0.09030628204345703, "p90": 0.5337036132812502, "max": 0.9893798828125, "pos_frac": 0.625, "sample": [0.0770263671875, 0.1708221435546875, 0.3935966491699219, 0.56353759765625, -0.012691497802734375, 0.08737945556640625, 0.472442626953125, 0.094940185546875, 0.0476226806640625, -0.13930511474609375, 0.48139190673828125, -0.4018268585205078, 0.12125396728515625, 0.3896636962890625, 0.4960670471191406, -0.39933013916015625, 0.49108123779296875, -0.0914764404296875, 0.2213134765625, 0.4071083068847656, 0.145904541015625, 0.38188743591308594, -0.1751251220703125, -0.024637222290039062, 0.05200958251953125, -0.09036064147949219, -0.1325836181640625, -0.4671173095703125, 0.33518218994140625, -0.49817657470703125, 0.4242534637451172, 0.09323310852050781, -0.64166259765625, 0.1435089111328125, 0.0267333984375, -0.06204986572265625, -0.03522491455078125, -0.5959262847900391, 0.31401824951171875, 0.749908447265625, 0.9893798828125, -0.5034027099609375, 0.7274932861328125, -0.151397705078125, 0.055706024169921875, 0.5080795288085938, 0.07651901245117188, -0.1689586639404297, -0.3388099670410156, -0.2479114532470703, 0.4234771728515625, 0.1392059326171875, 0.21310806274414062, 0.06999969482421875, 0.37740325927734375, -0.80474853515625, -0.2472991943359375, 0.31732940673828125, -0.2446441650390625, 0.2228546142578125, 0.8452911376953125, 0.7594833374023438, -0.5775489807128906, 0.5446853637695312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000061.npy"} +{"epoch": 0.09221466364323508, "step": 62, "batch_size": 64, "mean": 0.13938573002815247, "std": 0.4895670413970947, "min": -2.4387664794921875, "p10": -0.2772224426269531, "median": 0.10404205322265625, "p90": 0.7265762329101564, "max": 1.0694732666015625, "pos_frac": 0.640625, "sample": [1.0083847045898438, -0.000614166259765625, -0.471588134765625, 0.08011627197265625, 0.4578704833984375, -0.04779052734375, 0.9559860229492188, -0.1795215606689453, 0.056865692138671875, 0.01673126220703125, 0.7414627075195312, -0.1674823760986328, 0.32001495361328125, -0.25146484375, -0.38933563232421875, 0.12796783447265625, -0.06542205810546875, -0.17920684814453125, -0.31468772888183594, 0.6379776000976562, 0.06256866455078125, -0.1016845703125, -0.5152587890625, 0.6723003387451172, 0.477935791015625, -0.2064361572265625, -0.05318450927734375, 0.1844940185546875, 0.44631195068359375, 0.1581573486328125, 0.19786834716796875, 0.29457664489746094, 0.15319252014160156, -0.3800506591796875, 0.7545013427734375, -2.4387664794921875, -0.037689208984375, 0.3551445007324219, 0.011014938354492188, 0.215087890625, 0.18046951293945312, 0.24143218994140625, 0.4801597595214844, 0.43733978271484375, 0.8252029418945312, -0.1071014404296875, 0.0515594482421875, -0.06590652465820312, 0.07898521423339844, 0.8880233764648438, -0.12554931640625, 0.5840797424316406, 0.0492095947265625, 0.20250701904296875, 0.03952789306640625, -0.04095458984375, 1.0694732666015625, 0.277099609375, -0.011203765869140625, -0.28826141357421875, 0.21033477783203125, 0.6918411254882812, 0.417022705078125, 0.24904823303222656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000062.npy"} +{"epoch": 0.09372637944066516, "step": 63, "batch_size": 64, "mean": 0.1857585310935974, "std": 0.378060519695282, "min": -0.7880210876464844, "p10": -0.2858402252197265, "median": 0.1984548568725586, "p90": 0.57288818359375, "max": 1.1843490600585938, "pos_frac": 0.703125, "sample": [0.573638916015625, 0.5609130859375, -0.42319488525390625, -0.0402069091796875, 0.20196533203125, -0.13251495361328125, -0.009552001953125, -0.304229736328125, 1.1843490600585938, 0.515289306640625, -0.5189208984375, -0.5161285400390625, -2.288818359375e-05, -0.6483917236328125, 0.8841400146484375, -0.16919708251953125, -0.00128173828125, 0.111541748046875, 0.1969738006591797, 0.1999359130859375, -0.24293136596679688, -0.013860702514648438, 0.02060699462890625, 0.36700439453125, 0.2479248046875, 0.04993629455566406, -0.0774993896484375, 0.1878204345703125, 0.28794288635253906, 0.466522216796875, 0.09032440185546875, 0.5531044006347656, 0.3076896667480469, 0.3942756652832031, 0.28340911865234375, -0.39418792724609375, 0.3175697326660156, 0.45409393310546875, 0.5141124725341797, 0.022863388061523438, 0.20415687561035156, 0.79730224609375, 0.3278026580810547, 0.6099987030029297, 0.4602031707763672, 0.1926860809326172, 0.303314208984375, 0.5172042846679688, 0.05327606201171875, 0.308319091796875, 0.18474960327148438, 0.3093681335449219, 0.8885498046875, -0.7880210876464844, -0.19720458984375, -0.0130767822265625, 0.1918792724609375, 0.2457733154296875, -0.1607799530029297, 0.04936981201171875, 0.40727996826171875, 0.9067840576171875, 0.0166473388671875, 0.571136474609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000063.npy"} +{"epoch": 0.09523809523809523, "step": 64, "batch_size": 64, "mean": 0.08198148012161255, "std": 0.40862542390823364, "min": -0.9658050537109375, "p10": -0.33078727722167967, "median": 0.0352325439453125, "p90": 0.5775205612182619, "max": 0.9760169982910156, "pos_frac": 0.5625, "sample": [-0.1194000244140625, 0.9568328857421875, -0.210205078125, 0.4531593322753906, 0.1129608154296875, -0.084503173828125, 0.4694499969482422, -0.3025684356689453, 0.2983379364013672, -0.1991119384765625, -0.24535751342773438, -0.0041351318359375, 0.0959930419921875, -0.21820831298828125, 0.3617820739746094, 0.2863616943359375, -0.659576416015625, -0.0193634033203125, 0.7134895324707031, 0.9760169982910156, 0.7778472900390625, 0.36840057373046875, 0.03282928466796875, 0.004589080810546875, -0.34207916259765625, 0.3582611083984375, 0.4324378967285156, -0.86090087890625, 0.048130035400390625, 0.5330028533935547, 0.20151901245117188, -0.230560302734375, 0.19255828857421875, -0.3044395446777344, -0.1753387451171875, 0.15480804443359375, 0.3014564514160156, -0.9658050537109375, 0.206298828125, 0.11714935302734375, 0.5067520141601562, 0.0788421630859375, 0.03763580322265625, -0.069854736328125, 0.51483154296875, -0.204193115234375, -0.13907814025878906, 0.0028839111328125, -0.35711669921875, 0.8344268798828125, 0.5155258178710938, 0.116729736328125, 0.9546012878417969, -0.5233688354492188, -0.23107528686523438, 0.30788612365722656, 0.022693634033203125, -0.21148300170898438, -0.24158477783203125, -0.06485748291015625, -0.1442108154296875, -0.36127471923828125, -0.20661544799804688, 0.5965995788574219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000064.npy"} +{"epoch": 0.09674981103552532, "step": 65, "batch_size": 64, "mean": 0.29260820150375366, "std": 0.4603598713874817, "min": -0.7862129211425781, "p10": -0.3207916259765625, "median": 0.27780914306640625, "p90": 0.9269060134887697, "max": 1.469207763671875, "pos_frac": 0.75, "sample": [0.4821949005126953, 0.2921142578125, -0.11352920532226562, 0.57403564453125, 0.06573486328125, 0.2635040283203125, -0.32135009765625, 1.0428924560546875, 0.39478302001953125, 0.5507659912109375, -0.426849365234375, 0.4161224365234375, 0.7552509307861328, 0.48664093017578125, 1.0367698669433594, 0.2473888397216797, 0.626312255859375, 0.6426124572753906, 0.0052642822265625, 1.469207763671875, 0.23846435546875, -0.2243194580078125, 0.42840576171875, 1.049041748046875, 0.8162574768066406, -0.000732421875, -0.7862129211425781, -0.025875091552734375, -0.39037322998046875, 0.5055999755859375, 0.016511917114257812, 0.3860931396484375, -0.319488525390625, -0.07724761962890625, 0.7344131469726562, -0.59375, 0.1037750244140625, -0.4345855712890625, 0.21787261962890625, 0.5058059692382812, 0.8058948516845703, 0.044921875, 0.47170257568359375, 0.11470413208007812, 0.3052253723144531, 0.15572357177734375, -0.0346221923828125, 0.20438385009765625, -0.5966110229492188, 0.15174102783203125, 0.9601783752441406, 0.12379646301269531, 0.1589202880859375, 0.9463787078857422, -0.2271575927734375, 0.528656005859375, 0.6508407592773438, 0.00150299072265625, 1.0818939208984375, 0.4823455810546875, 0.3388671875, 0.569854736328125, 0.8814697265625, -0.03320884704589844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000065.npy"} +{"epoch": 0.0982615268329554, "step": 66, "batch_size": 64, "mean": 0.08048596978187561, "std": 0.4345366656780243, "min": -1.2196884155273438, "p10": -0.4282363891601562, "median": 0.07460975646972656, "p90": 0.5066528320312501, "max": 2.0427627563476562, "pos_frac": 0.625, "sample": [0.07677459716796875, -0.4755859375, 0.14404678344726562, 0.18622779846191406, 0.06788063049316406, -0.171417236328125, 0.3407459259033203, -0.06964111328125, 0.04821205139160156, 0.5305118560791016, 0.19904327392578125, 0.9827117919921875, 0.2211456298828125, 0.17169570922851562, -0.2342529296875, -0.01015472412109375, -0.21883392333984375, 0.3072242736816406, 0.46649169921875, 0.38106536865234375, 0.0390777587890625, 0.20140457153320312, -0.521209716796875, -0.19510650634765625, 0.14987945556640625, 0.1351032257080078, 0.6053600311279297, 0.31507110595703125, 0.15383148193359375, 0.337158203125, 0.04404449462890625, -0.30072021484375, -0.078857421875, 0.3979301452636719, 0.21155357360839844, -0.13135147094726562, 0.003940582275390625, 0.01708221435546875, 0.4601783752441406, -0.18109130859375, -0.394012451171875, -1.2196884155273438, 0.52386474609375, 0.2413787841796875, -0.47888946533203125, -0.39910888671875, -0.0151824951171875, -0.22397613525390625, -0.0003204345703125, 0.10671043395996094, 0.04450225830078125, -0.22367095947265625, 0.16693115234375, 0.769744873046875, 2.0427627563476562, -0.5410079956054688, -0.087982177734375, -0.5423049926757812, 0.415069580078125, 0.5648040771484375, 0.07244491577148438, 0.0820465087890625, 0.080535888671875, -0.4407196044921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000066.npy"} +{"epoch": 0.09977324263038549, "step": 67, "batch_size": 64, "mean": 0.1473003625869751, "std": 0.4404861032962799, "min": -0.7859649658203125, "p10": -0.31556243896484376, "median": 0.14464378356933594, "p90": 0.560498046875, "max": 2.056427001953125, "pos_frac": 0.625, "sample": [0.074462890625, -0.3059844970703125, -0.1491546630859375, -0.10071563720703125, -0.145599365234375, 0.26894569396972656, 0.6202774047851562, 0.3865375518798828, 0.49420928955078125, -0.30834197998046875, -0.2998199462890625, -0.1894683837890625, 0.325042724609375, -0.148223876953125, -0.34293365478515625, 0.3983592987060547, -0.22559547424316406, 0.5640983581542969, 0.0500640869140625, 0.3314361572265625, -0.13224029541015625, 0.14177703857421875, 0.081268310546875, 0.6797409057617188, -0.19495773315429688, -0.7859649658203125, 0.3557548522949219, 0.19335174560546875, 0.8470458984375, -0.4327201843261719, 0.27286529541015625, 0.5520973205566406, 0.00933074951171875, -0.14923095703125, 0.2388153076171875, 0.36514854431152344, -0.47881317138671875, 0.3634815216064453, -0.18773651123046875, 0.2645416259765625, 1.4376983642578125, -0.33899688720703125, 0.2830352783203125, 0.12641143798828125, 0.122467041015625, -0.1321258544921875, 0.395416259765625, 0.17655372619628906, -0.01679229736328125, -0.0758819580078125, 0.24417877197265625, 2.056427001953125, -0.023693084716796875, 0.21163558959960938, 0.4764595031738281, -0.6021575927734375, 0.16260528564453125, 0.18778228759765625, 0.10624885559082031, 0.371856689453125, 0.14751052856445312, 0.6314468383789062, 0.49664306640625, -0.31865692138671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000067.npy"} +{"epoch": 0.10128495842781557, "step": 68, "batch_size": 64, "mean": 0.21536031365394592, "std": 0.46667250990867615, "min": -0.9891815185546875, "p10": -0.27817153930664057, "median": 0.19977951049804688, "p90": 0.8359207153320315, "max": 1.2284698486328125, "pos_frac": 0.671875, "sample": [1.2284698486328125, -0.08493804931640625, 0.358428955078125, 0.783721923828125, -0.0072174072265625, 1.1037750244140625, 0.5755233764648438, 0.5369911193847656, 0.07349967956542969, -0.1851348876953125, 0.1571807861328125, -0.717559814453125, 0.585784912109375, 0.7114791870117188, -0.4178466796875, 0.9034233093261719, -0.02054595947265625, 0.30340576171875, -0.9891815185546875, -0.06951141357421875, 0.19757080078125, -0.007503509521484375, 0.4752197265625, 0.4377098083496094, -0.18270111083984375, 0.3199138641357422, 0.26171875, -0.21905899047851562, 0.16522216796875, 0.2802276611328125, -0.22550010681152344, 0.17870330810546875, 0.3425006866455078, 0.5749359130859375, -0.47483062744140625, 0.5579071044921875, 0.438690185546875, 0.19802093505859375, 0.15841293334960938, 0.2528114318847656, 0.725067138671875, -0.11273574829101562, 0.8582916259765625, 0.1278228759765625, -0.23477935791015625, -0.0444793701171875, 0.24059295654296875, -0.032016754150390625, -0.2967681884765625, 0.3449859619140625, 1.0133056640625, 0.23116302490234375, -0.6738357543945312, 0.6716079711914062, 0.0587158203125, 0.2015380859375, 0.0171661376953125, 1.0653305053710938, 0.65667724609375, 0.9019565582275391, 0.4870872497558594, -0.2059326171875, -0.89166259765625, 0.1142425537109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000068.npy"} +{"epoch": 0.10279667422524566, "step": 69, "batch_size": 64, "mean": 0.16213801503181458, "std": 0.46769818663597107, "min": -1.0941238403320312, "p10": -0.3553169250488281, "median": 0.205352783203125, "p90": 0.6907272338867189, "max": 1.227996826171875, "pos_frac": 0.671875, "sample": [0.08370208740234375, 0.38739776611328125, 0.8159942626953125, -0.3127899169921875, 0.31801605224609375, 0.21736526489257812, -0.3459491729736328, 0.1732025146484375, 0.2894134521484375, -0.4028472900390625, -0.0304107666015625, 0.21963119506835938, 0.5218467712402344, 0.49371337890625, 0.7098312377929688, 0.20137786865234375, 0.03396415710449219, 0.8640975952148438, 0.1255340576171875, -0.04131317138671875, 1.227996826171875, -0.8028564453125, 0.7026519775390625, 0.3093109130859375, 0.57781982421875, 0.288482666015625, 0.2299041748046875, 0.6167526245117188, -0.1718597412109375, -0.6079025268554688, 0.17090606689453125, 1.163848876953125, 0.40691375732421875, 0.03229522705078125, 0.2997550964355469, -1.006805419921875, 0.48378753662109375, -0.2700538635253906, 0.2537384033203125, -0.20827484130859375, 0.66290283203125, 0.4434852600097656, 0.0297088623046875, -0.12137603759765625, 0.10309600830078125, -0.34937286376953125, 0.47930908203125, 0.4869651794433594, -0.34833526611328125, -1.0941238403320312, -0.5751495361328125, 0.4174842834472656, -3.4332275390625e-05, -0.11823654174804688, 0.20932769775390625, -0.24109649658203125, -0.3578643798828125, 0.45990753173828125, 0.4861106872558594, 0.1644420623779297, 0.4713134765625, -0.08795356750488281, 1.1150970458984375, 0.12303733825683594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000069.npy"} +{"epoch": 0.10430839002267574, "step": 70, "batch_size": 64, "mean": 0.1359853446483612, "std": 0.42658212780952454, "min": -0.8558197021484375, "p10": -0.41484203338623044, "median": 0.17448997497558594, "p90": 0.7203418731689453, "max": 0.9934921264648438, "pos_frac": 0.609375, "sample": [0.78802490234375, -0.2518653869628906, 0.7091064453125, 0.372528076171875, 0.3137645721435547, 0.171722412109375, -0.2308349609375, 0.2351837158203125, 0.40164947509765625, -0.4261665344238281, -0.00040435791015625, -0.4531402587890625, -0.8558197021484375, -0.03610992431640625, 0.3710975646972656, -0.38841819763183594, -0.5096206665039062, -0.022308349609375, -0.09088134765625, 0.08252334594726562, 0.39569854736328125, 0.4983386993408203, 0.5499057769775391, 0.24468231201171875, -0.1683197021484375, 0.02048492431640625, 0.12546539306640625, 0.49239349365234375, -0.36217498779296875, 0.10516357421875, 0.09872817993164062, 0.21275711059570312, -0.20409393310546875, 0.17725753784179688, 0.32132720947265625, 0.8741531372070312, 0.9934921264648438, 0.3028564453125, 0.4780921936035156, 0.26847076416015625, 0.19719314575195312, -0.3390483856201172, -0.12656402587890625, 0.4477386474609375, -0.015338897705078125, 0.7206840515136719, -0.662933349609375, 0.71954345703125, 0.25470733642578125, 0.7454757690429688, 0.70526123046875, 0.2587738037109375, -0.17681121826171875, 0.8128204345703125, -0.1466045379638672, 0.31231689453125, -0.5578155517578125, -0.6467514038085938, 0.5091400146484375, 0.047008514404296875, -0.3509979248046875, -0.3712615966796875, -0.10119056701660156, 0.863006591796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000070.npy"} +{"epoch": 0.10582010582010581, "step": 71, "batch_size": 64, "mean": 0.08478212356567383, "std": 0.4544980525970459, "min": -1.1713714599609375, "p10": -0.5148855209350585, "median": 0.09717464447021484, "p90": 0.6769357681274416, "max": 1.0070571899414062, "pos_frac": 0.640625, "sample": [0.13634872436523438, 0.701202392578125, 0.11913108825683594, 0.6203136444091797, -0.27446746826171875, 0.35558319091796875, 0.49962425231933594, -0.21730804443359375, 0.6070404052734375, 0.58953857421875, 0.24829483032226562, -0.2745208740234375, -0.0231781005859375, -0.5546493530273438, -0.29537200927734375, 0.05243492126464844, 0.21605682373046875, 0.7931060791015625, -0.2975807189941406, 0.07521820068359375, -0.975433349609375, 0.837310791015625, 0.03664398193359375, 0.18032455444335938, 0.7283763885498047, 0.49932861328125, -0.2813873291015625, 0.1327228546142578, 0.8492622375488281, 0.4516448974609375, -0.63763427734375, 0.189483642578125, -0.2875556945800781, -0.5535373687744141, 0.23064041137695312, 0.4178199768066406, 0.16170120239257812, 0.410247802734375, 0.47290611267089844, -0.5787315368652344, 0.012420654296875, -1.1713714599609375, 0.1875152587890625, 0.4342193603515625, -0.24975967407226562, 0.012990951538085938, -0.4246978759765625, 0.9821243286132812, 0.06513214111328125, 0.05127716064453125, 0.014963150024414062, 0.15656661987304688, 1.0070571899414062, -0.086273193359375, -0.5800704956054688, -0.17206573486328125, 0.2833137512207031, 0.44760894775390625, -0.28293609619140625, -0.3570537567138672, -0.16928863525390625, 0.26578712463378906, 0.0122833251953125, -0.3746376037597656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000071.npy"} +{"epoch": 0.1073318216175359, "step": 72, "batch_size": 64, "mean": 0.29211464524269104, "std": 0.4857678711414337, "min": -0.6638755798339844, "p10": -0.22960033416748046, "median": 0.19662857055664062, "p90": 0.9990856170654301, "max": 1.70458984375, "pos_frac": 0.734375, "sample": [-0.34255218505859375, 0.35124969482421875, -0.017486572265625, 0.16622543334960938, 0.180694580078125, 0.4251136779785156, -0.11691665649414062, 0.8208484649658203, -0.6638755798339844, 0.131317138671875, 1.0424232482910156, 1.0907363891601562, 0.31446075439453125, 0.6760368347167969, 0.2938880920410156, -0.13543701171875, -0.5128860473632812, 0.4798603057861328, -0.4015960693359375, -0.00672149658203125, 1.1510467529296875, 0.24150466918945312, 0.570281982421875, 0.2903900146484375, 0.4077301025390625, 0.0888519287109375, -0.01201629638671875, -0.2308483123779297, 0.1162872314453125, -0.179351806640625, 1.70458984375, 0.8058528900146484, 0.6099472045898438, 0.5766143798828125, -0.336456298828125, -0.1932220458984375, 0.5518627166748047, 0.08277511596679688, 0.6154747009277344, 0.1037139892578125, 0.016002655029296875, 0.14412307739257812, 0.6935806274414062, 0.1515483856201172, 0.5082931518554688, 0.0409393310546875, 0.8979644775390625, -0.1181793212890625, 0.12070083618164062, 0.02942657470703125, 0.7331390380859375, 1.3127899169921875, 1.288787841796875, -0.17903709411621094, 0.2286834716796875, 0.08141517639160156, 0.8309974670410156, -0.22668838500976562, 0.2463226318359375, 0.08260917663574219, -0.5787277221679688, 1.1128005981445312, 0.3248710632324219, 0.21256256103515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000072.npy"} +{"epoch": 0.10884353741496598, "step": 73, "batch_size": 64, "mean": 0.17031550407409668, "std": 0.4986055791378021, "min": -0.7948722839355469, "p10": -0.3911108016967773, "median": 0.17851638793945312, "p90": 0.9126270294189457, "max": 1.3222694396972656, "pos_frac": 0.625, "sample": [0.30408287048339844, -0.3593101501464844, -0.2485504150390625, -0.2521858215332031, -0.6019744873046875, 0.1315765380859375, -0.33936309814453125, -0.575439453125, -0.0678863525390625, 0.4919757843017578, -0.10356903076171875, 0.7096633911132812, 0.28165245056152344, -0.29911041259765625, -0.3620452880859375, -0.07786750793457031, 0.0106048583984375, 0.5375328063964844, 0.28401947021484375, -0.43143463134765625, -0.740753173828125, -0.149139404296875, -0.39624977111816406, 0.19123077392578125, 0.399078369140625, 0.7785110473632812, -0.2266387939453125, 0.03219413757324219, -0.3148345947265625, 0.2513160705566406, 0.49848175048828125, -0.09772109985351562, 0.27715110778808594, 0.22708892822265625, -0.6280059814453125, 0.23582839965820312, 0.2645740509033203, 1.0939483642578125, -0.7948722839355469, -0.267364501953125, -0.379119873046875, 0.3856658935546875, 0.1103057861328125, 0.06929779052734375, 0.165802001953125, 1.1129074096679688, 0.06110382080078125, 0.2999687194824219, 0.448638916015625, 1.1901321411132812, -0.2730712890625, 0.8130760192871094, 1.0510005950927734, 0.13254165649414062, 1.1149215698242188, -0.06146240234375, 0.5961647033691406, 0.5917510986328125, 0.566497802734375, 0.2521514892578125, 0.47081565856933594, 1.3222694396972656, 0.955291748046875, 0.23734664916992188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000073.npy"} +{"epoch": 0.11035525321239607, "step": 74, "batch_size": 64, "mean": 0.22797417640686035, "std": 0.4824661314487457, "min": -0.7061843872070312, "p10": -0.3023824691772461, "median": 0.14406585693359375, "p90": 0.8648254394531255, "max": 1.74456787109375, "pos_frac": 0.65625, "sample": [0.132904052734375, -0.35506439208984375, -0.3372039794921875, -0.11914825439453125, 0.3384246826171875, 0.43891143798828125, -0.10778045654296875, 0.596893310546875, 0.7153587341308594, 1.2473812103271484, 0.6507453918457031, 1.2250442504882812, 0.2305755615234375, 0.15480804443359375, 0.0155792236328125, 0.4343528747558594, 0.73028564453125, -0.2426300048828125, -0.2215290069580078, 0.0258636474609375, 0.662322998046875, 0.113037109375, 0.1659393310546875, 0.45652008056640625, 0.4603729248046875, -0.12141036987304688, -0.6551017761230469, 0.08469390869140625, 1.1509590148925781, -0.390167236328125, -0.10231399536132812, 0.5160312652587891, 0.9224853515625, -0.2115325927734375, -0.17362594604492188, 0.2823829650878906, -0.311431884765625, 0.0307159423828125, -0.7061843872070312, -0.17102432250976562, 0.5423583984375, -0.47505950927734375, -0.00919342041015625, 0.29734039306640625, 0.378662109375, 0.49755859375, 1.74456787109375, -0.2812671661376953, 0.46497344970703125, 1.100555419921875, 0.4581451416015625, 0.22867584228515625, 0.10219955444335938, 0.13332366943359375, -0.158172607421875, 0.029987335205078125, 0.3778839111328125, 0.0770263671875, 0.46681976318359375, 0.9801406860351562, 0.5967330932617188, -0.14145278930664062, -0.1657867431640625, -0.2121124267578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000074.npy"} +{"epoch": 0.11186696900982615, "step": 75, "batch_size": 64, "mean": 0.24743930995464325, "std": 0.47516876459121704, "min": -0.8313026428222656, "p10": -0.32327575683593746, "median": 0.20537376403808594, "p90": 0.9473527908325199, "max": 1.50384521484375, "pos_frac": 0.6875, "sample": [-0.1288890838623047, 0.1505279541015625, 0.08621597290039062, 0.3446998596191406, 1.4144363403320312, -0.1383037567138672, -0.0004119873046875, 0.3131065368652344, 1.2724761962890625, 0.4568614959716797, 0.181976318359375, 0.30466461181640625, -0.35074615478515625, 0.39264678955078125, -0.07256889343261719, 0.8657608032226562, -0.3928070068359375, -0.23668861389160156, 0.24663829803466797, 0.08366966247558594, 0.695526123046875, 0.6706390380859375, 1.50384521484375, 0.4548187255859375, 0.10963249206542969, 0.2546348571777344, 0.44484710693359375, -0.04632568359375, 0.24642181396484375, 0.20871353149414062, 0.057430267333984375, 0.9823207855224609, 0.3861083984375, 0.6482582092285156, 1.0646591186523438, 0.2609291076660156, 0.591888427734375, 0.1623992919921875, 1.2011566162109375, 0.5262184143066406, -0.43500518798828125, 0.4124336242675781, -0.5269737243652344, 0.18139076232910156, -0.00650787353515625, -0.8313026428222656, 0.3256492614746094, -0.086669921875, -0.12600135803222656, -0.12170028686523438, 0.3954925537109375, 0.43547821044921875, 0.02657318115234375, 0.29286956787109375, -0.33408355712890625, -0.29805755615234375, 0.115997314453125, -0.12162208557128906, -0.5796585083007812, 0.14014053344726562, 0.3740100860595703, -0.03981781005859375, 1.22406005859375, 0.20203399658203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000075.npy"} +{"epoch": 0.11337868480725624, "step": 76, "batch_size": 64, "mean": 0.16627269983291626, "std": 0.46595603227615356, "min": -0.8415985107421875, "p10": -0.3117366790771484, "median": 0.13995361328125, "p90": 0.8095512390136721, "max": 1.3387298583984375, "pos_frac": 0.65625, "sample": [-0.2917633056640625, 0.7244148254394531, -0.0871734619140625, -0.12744140625, -0.22121429443359375, -0.4765491485595703, 0.147705078125, 0.2699432373046875, 0.044040679931640625, 1.2608184814453125, 0.744049072265625, 1.3387298583984375, -0.14003944396972656, -0.271270751953125, 0.2791290283203125, -0.304107666015625, 0.42681121826171875, 0.4715385437011719, 0.8606719970703125, 0.034656524658203125, 0.5589447021484375, 0.44899749755859375, -0.5323944091796875, 0.8376235961914062, 0.8440399169921875, 0.0706024169921875, -0.13076019287109375, 1.3085556030273438, -0.6278228759765625, 0.1426544189453125, 0.23198890686035156, -0.19565391540527344, -0.8415985107421875, 0.07916259765625, -0.04900550842285156, 0.25756072998046875, 0.1885223388671875, -0.291351318359375, 0.00537872314453125, 0.1250457763671875, 0.2611122131347656, -0.8104095458984375, -0.13695144653320312, 0.4025306701660156, 0.0500030517578125, 0.4631633758544922, 0.1378021240234375, -0.3150062561035156, 0.2777595520019531, 1.083892822265625, -0.07659149169921875, -0.35659027099609375, 0.11758041381835938, 0.2649078369140625, 0.6713104248046875, 0.42395782470703125, -0.23831558227539062, -0.10988616943359375, 0.2488861083984375, 0.1421051025390625, 0.6053924560546875, 0.15324020385742188, 0.2427520751953125, 0.02536773681640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000076.npy"} +{"epoch": 0.11489040060468632, "step": 77, "batch_size": 64, "mean": 0.1326504349708557, "std": 0.4568830132484436, "min": -1.0106735229492188, "p10": -0.40048065185546866, "median": 0.10662841796875, "p90": 0.5945640563964846, "max": 1.6961212158203125, "pos_frac": 0.625, "sample": [0.020666122436523438, 0.12227249145507812, 0.45166015625, -0.055141448974609375, 0.29178810119628906, 1.3723678588867188, 0.02759552001953125, -0.09446334838867188, -0.8574752807617188, 0.3647308349609375, 0.640625, 0.4347496032714844, 0.19159317016601562, -0.1631317138671875, 0.020679473876953125, 1.0805206298828125, -0.166534423828125, 1.0339202880859375, -0.0141754150390625, 0.16173553466796875, -0.3069477081298828, 0.17948532104492188, 0.9674301147460938, -0.038059234619140625, -0.065582275390625, 0.07152175903320312, -0.23007583618164062, 0.53009033203125, 0.21340179443359375, 0.32019805908203125, -0.34128570556640625, -0.4407005310058594, -0.19041061401367188, 0.11885833740234375, 0.2936515808105469, 0.542327880859375, -0.14862823486328125, 0.1883544921875, 0.09160804748535156, 0.6169509887695312, -0.45577239990234375, -0.15118408203125, 0.09439849853515625, -0.11811637878417969, 0.35892486572265625, 1.6961212158203125, 0.08463287353515625, 0.038372039794921875, 0.22689056396484375, 0.14586257934570312, -0.02022552490234375, -0.027923583984375, 0.1263580322265625, 0.5236377716064453, 0.44188690185546875, 0.17377853393554688, -0.42584991455078125, -0.5185546875, -1.0106735229492188, -0.43264007568359375, 0.1563262939453125, 0.13039398193359375, 0.37164306640625, -0.1548309326171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000077.npy"} +{"epoch": 0.1164021164021164, "step": 78, "batch_size": 64, "mean": 0.09206125140190125, "std": 0.5869220495223999, "min": -1.7900848388671875, "p10": -0.7459083557128906, "median": 0.17238426208496094, "p90": 0.6135883331298829, "max": 1.4659652709960938, "pos_frac": 0.640625, "sample": [0.36920738220214844, -1.7900848388671875, -0.04207420349121094, -0.026325225830078125, 0.30109405517578125, 1.0777873992919922, 0.40456390380859375, -0.26378631591796875, 0.23754310607910156, 0.17517852783203125, -0.29717254638671875, -1.066131591796875, -0.21657752990722656, 0.57330322265625, -0.3125, 0.5229339599609375, -0.8484306335449219, 1.4659652709960938, 0.15480804443359375, 0.5576019287109375, 0.1988983154296875, -0.928192138671875, 0.7280426025390625, 0.4526634216308594, -0.44110107421875, 0.41054534912109375, 0.5766181945800781, -0.7568740844726562, 0.36989593505859375, -1.2531890869140625, 0.4721527099609375, 0.44054412841796875, 0.35985565185546875, 1.2546844482421875, 0.091400146484375, -0.7203216552734375, -0.965789794921875, -0.21270751953125, 0.017978668212890625, 0.4165382385253906, 0.3359794616699219, 0.2576026916503906, 1.2497406005859375, 0.29583740234375, -0.39321136474609375, 0.5146026611328125, -0.06873703002929688, 0.16958999633789062, 0.07815170288085938, 0.18242645263671875, -0.1826763153076172, 0.6294326782226562, -0.16352081298828125, 0.12706756591796875, -0.1720123291015625, -0.0147705078125, 0.020872116088867188, 0.10901641845703125, 0.08848381042480469, -0.41204071044921875, 0.9166412353515625, 0.2922096252441406, 0.2624092102050781, 0.28027915954589844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000078.npy"} +{"epoch": 0.11791383219954649, "step": 79, "batch_size": 64, "mean": 0.13618725538253784, "std": 0.440145343542099, "min": -1.2302474975585938, "p10": -0.31804199218749996, "median": 0.1778268814086914, "p90": 0.7105598449707033, "max": 1.1443367004394531, "pos_frac": 0.625, "sample": [0.3048419952392578, 0.32033538818359375, -0.48184967041015625, 0.5377597808837891, -0.33072662353515625, 0.230438232421875, -1.2302474975585938, 0.15765762329101562, 0.6698379516601562, 0.13253021240234375, -0.2406482696533203, -0.12453460693359375, 0.6235466003417969, 1.1443367004394531, -0.39038848876953125, -0.18883514404296875, 0.4613761901855469, 0.3249835968017578, -0.07541465759277344, 0.00759124755859375, -0.032711029052734375, 0.4303741455078125, -0.9321441650390625, -0.090576171875, 0.20085525512695312, -0.1307849884033203, 0.8628883361816406, -0.026092529296875, 0.295562744140625, -0.0285797119140625, -0.4475555419921875, -0.15441131591796875, 0.7280120849609375, 0.28916168212890625, 0.39952659606933594, 0.0026569366455078125, 0.2579841613769531, 0.3220100402832031, -0.28844451904296875, -0.226898193359375, 0.09299087524414062, 0.280364990234375, -0.21024322509765625, 0.06766128540039062, -0.27705955505371094, 0.1979961395263672, 0.05544281005859375, 0.2326812744140625, 0.22746658325195312, 0.24066925048828125, -0.0171356201171875, 0.5387649536132812, 0.927337646484375, -0.2738189697265625, -0.6595954895019531, 0.433990478515625, 0.24611663818359375, -0.23340606689453125, 0.510833740234375, 1.1055793762207031, 0.7784194946289062, 0.2974891662597656, 0.7681617736816406, 0.1018524169921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000079.npy"} +{"epoch": 0.11942554799697656, "step": 80, "batch_size": 64, "mean": 0.1793329417705536, "std": 0.48223090171813965, "min": -1.3770294189453125, "p10": -0.3214813232421874, "median": 0.10610389709472656, "p90": 0.789250946044922, "max": 1.1499404907226562, "pos_frac": 0.625, "sample": [-0.1230316162109375, 0.4960289001464844, 0.045501708984375, 0.412994384765625, 0.30342674255371094, -0.0624542236328125, 0.6910552978515625, 0.06725692749023438, -0.1203460693359375, -0.3511199951171875, 1.0327816009521484, 0.34384918212890625, 0.0844268798828125, -0.34452056884765625, 0.12162399291992188, 1.1499404907226562, 0.23500823974609375, 0.4550628662109375, 0.5757827758789062, -0.08905792236328125, 0.07965469360351562, -0.0088348388671875, -0.18699264526367188, 0.7988204956054688, 0.96319580078125, 0.7302780151367188, 0.43944549560546875, -0.0793609619140625, 1.0515289306640625, 0.7669219970703125, 0.45761871337890625, 0.11510467529296875, 0.0180511474609375, 0.32489967346191406, -0.26772308349609375, 0.529052734375, -0.2046031951904297, 0.09626007080078125, 0.3427619934082031, -1.3770294189453125, -0.636566162109375, -0.08106231689453125, -0.9620513916015625, 0.4167823791503906, -0.05306243896484375, -0.1873626708984375, 0.56072998046875, 0.24236297607421875, -0.24761199951171875, -0.37600135803222656, 1.090179443359375, 0.3199729919433594, -0.20428466796875, -0.6741943359375, 0.09710311889648438, -0.07588577270507812, 0.517181396484375, -0.017679214477539062, 0.4831867218017578, 0.8410797119140625, 0.06383514404296875, 0.188720703125, -0.101226806640625, 0.7599029541015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000080.npy"} +{"epoch": 0.12093726379440665, "step": 81, "batch_size": 64, "mean": 0.22928106784820557, "std": 0.5118841528892517, "min": -0.7968521118164062, "p10": -0.4108924865722655, "median": 0.17179584503173828, "p90": 0.9219669342041018, "max": 1.7434234619140625, "pos_frac": 0.734375, "sample": [0.3300323486328125, 0.17438697814941406, 0.9421348571777344, -0.16555023193359375, 1.1803092956542969, -0.7498016357421875, 0.260711669921875, -0.08675765991210938, 0.007961273193359375, -0.0316619873046875, 0.1892242431640625, 0.5929489135742188, -0.15602874755859375, 0.5709228515625, -0.1286334991455078, 0.0225372314453125, 0.0062847137451171875, 0.05411529541015625, -0.5742721557617188, 0.6186008453369141, 0.1600208282470703, 0.1692047119140625, 1.130096435546875, -0.284027099609375, 0.341888427734375, 0.4430999755859375, 0.24729156494140625, 0.41468048095703125, 0.072967529296875, 0.112548828125, 0.22461700439453125, 0.9865188598632812, 0.247802734375, -0.0822906494140625, 0.00043487548828125, 0.263519287109375, -0.4843292236328125, 1.48333740234375, 1.62701416015625, 0.4561614990234375, 0.5014724731445312, 0.38391876220703125, 0.19725799560546875, -0.46526336669921875, 0.5240325927734375, 0.04288482666015625, 0.5953311920166016, -0.11212921142578125, 0.16706085205078125, 0.874908447265625, 1.7434234619140625, 0.19219970703125, 0.11664199829101562, -0.7968521118164062, -0.14385223388671875, -0.6592025756835938, -0.05211448669433594, 0.13285064697265625, 0.14501190185546875, 0.17699432373046875, 0.2862224578857422, 0.6717720031738281, 0.0794525146484375, -0.5160560607910156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000081.npy"} +{"epoch": 0.12244897959183673, "step": 82, "batch_size": 64, "mean": 0.2993359863758087, "std": 0.420189768075943, "min": -1.26025390625, "p10": -0.24331321716308582, "median": 0.3276805877685547, "p90": 0.8068868637084962, "max": 1.021392822265625, "pos_frac": 0.84375, "sample": [0.0782623291015625, 0.43647193908691406, 0.2923736572265625, 0.13141632080078125, 0.8477706909179688, 0.2773418426513672, 0.6454563140869141, 0.6468887329101562, 0.81353759765625, 0.33495330810546875, 0.7913684844970703, 0.6052455902099609, -0.4942626953125, 0.8813629150390625, 0.20565414428710938, 0.21595382690429688, 0.34798431396484375, 0.32602691650390625, 0.4249114990234375, 0.7876358032226562, 0.20395660400390625, 0.3482818603515625, 0.6594047546386719, 0.08986282348632812, 0.3349761962890625, 0.787384033203125, 0.28023719787597656, -0.1077423095703125, 0.9004688262939453, 0.48468780517578125, 0.25556182861328125, 0.06019783020019531, 0.45961761474609375, -0.12461090087890625, -1.26025390625, 1.021392822265625, 0.8201828002929688, -0.3895263671875, 0.31929779052734375, 0.7450828552246094, 0.37127685546875, -0.451416015625, 0.359649658203125, -0.1188507080078125, 0.7087574005126953, 0.042186737060546875, 0.5120925903320312, 0.3293342590332031, 0.37537384033203125, 0.6733856201171875, -0.5795135498046875, -0.3641529083251953, 0.1646575927734375, 0.1010589599609375, 0.02677154541015625, 0.13805198669433594, -0.2941856384277344, 0.07129287719726562, 0.7295856475830078, 0.17261314392089844, 0.07723236083984375, 0.52593994140625, 0.16745376586914062, 0.9340934753417969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000082.npy"} +{"epoch": 0.12396069538926682, "step": 83, "batch_size": 64, "mean": 0.23940622806549072, "std": 0.4937407374382019, "min": -1.1756362915039062, "p10": -0.3768644332885742, "median": 0.24355220794677734, "p90": 0.9229225158691406, "max": 1.4973678588867188, "pos_frac": 0.71875, "sample": [0.3820648193359375, 0.398956298828125, 0.30628204345703125, 0.1715087890625, 0.2264690399169922, -0.05821990966796875, -0.4057464599609375, -0.5013809204101562, 1.1496963500976562, 0.917694091796875, -0.29644775390625, 0.020044326782226562, 0.11259841918945312, 0.3012504577636719, 0.5829448699951172, 0.12618637084960938, -0.3898773193359375, 0.21949005126953125, 0.10416412353515625, 0.052154541015625, 0.29683685302734375, 0.46785736083984375, 1.01025390625, -0.35518646240234375, -1.1756362915039062, 0.1169281005859375, -0.3824920654296875, 0.3508453369140625, 0.012311935424804688, -0.06755638122558594, 0.5431671142578125, 0.13943862915039062, 0.9251632690429688, -0.6263847351074219, -0.36373329162597656, 0.3286476135253906, 0.42461585998535156, -0.6203155517578125, 0.36638450622558594, -0.0332489013671875, -0.137847900390625, 0.7155075073242188, 0.4310569763183594, 0.45873069763183594, -0.09146690368652344, 0.344573974609375, 0.20156097412109375, 0.9604949951171875, -0.18054962158203125, 0.5285263061523438, 1.4973678588867188, 0.17852020263671875, 0.2922821044921875, -0.2794227600097656, 0.6029891967773438, 0.5747909545898438, 0.5787906646728516, 0.7964820861816406, 1.0326690673828125, 1.40423583984375, 0.2606353759765625, -0.2516632080078125, 0.2059040069580078, 0.4201011657714844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000083.npy"} +{"epoch": 0.1254724111866969, "step": 84, "batch_size": 64, "mean": 0.1330859661102295, "std": 0.55341637134552, "min": -1.8518447875976562, "p10": -0.5053142547607421, "median": 0.07991600036621094, "p90": 0.8066659927368165, "max": 1.3836212158203125, "pos_frac": 0.625, "sample": [-0.15245819091796875, 0.4766731262207031, 0.47898101806640625, 0.5402450561523438, 0.0318756103515625, 0.45772361755371094, 0.7924880981445312, -0.06553268432617188, 0.5349445343017578, -0.44615936279296875, -1.8518447875976562, 0.9730796813964844, 0.6399955749511719, 0.510589599609375, 0.45922088623046875, -0.12334442138671875, -0.4563331604003906, 0.6089935302734375, 0.5312423706054688, -0.7996635437011719, -0.34329986572265625, -0.6941604614257812, 0.35526275634765625, -0.40093231201171875, 0.05908775329589844, 0.90484619140625, -0.21928024291992188, -0.2023468017578125, 0.07553482055664062, -0.6641616821289062, 0.5404472351074219, 0.0356903076171875, -0.2743110656738281, 0.039154052734375, -0.12862396240234375, 0.3671607971191406, 1.0749149322509766, 0.8127422332763672, 0.04402923583984375, -0.3337249755859375, -0.52630615234375, 1.080535888671875, 0.3756980895996094, -0.0031986236572265625, 0.58587646484375, 0.03391075134277344, 0.13330078125, 0.5288848876953125, 0.3895912170410156, 0.8183746337890625, 0.08429718017578125, -0.28853607177734375, 0.5462436676025391, 0.0055389404296875, 0.15976333618164062, -0.1034698486328125, 0.2064056396484375, 0.20664596557617188, -0.6272125244140625, -0.860321044921875, 0.6436347961425781, -0.0712738037109375, -0.3732490539550781, 1.3836212158203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000084.npy"} +{"epoch": 0.12698412698412698, "step": 85, "batch_size": 64, "mean": 0.30089178681373596, "std": 0.569708526134491, "min": -1.1716156005859375, "p10": -0.367154312133789, "median": 0.21941757202148438, "p90": 1.0752288818359377, "max": 1.7601318359375, "pos_frac": 0.71875, "sample": [1.0307846069335938, -0.2051849365234375, 0.5452308654785156, 0.11507034301757812, 0.41552734375, 0.23233795166015625, 0.21201324462890625, 0.19002723693847656, -0.3808174133300781, 1.1243553161621094, 1.0942764282226562, -1.1716156005859375, 0.7675495147705078, 1.7601318359375, 0.21169090270996094, 0.44854736328125, 1.6169013977050781, 0.0532073974609375, 0.180023193359375, 0.5940704345703125, 0.3025169372558594, 0.29149818420410156, 0.279693603515625, 0.9961166381835938, -0.029092788696289062, -0.011962890625, -0.049560546875, 0.3404121398925781, 0.9323310852050781, -0.00817108154296875, -0.17309188842773438, 0.6539421081542969, -0.2461090087890625, -0.947845458984375, -0.4399547576904297, 0.11140823364257812, 1.324676513671875, 0.5947513580322266, 0.0617218017578125, 0.3393077850341797, -0.5062179565429688, 0.5476608276367188, 1.3213119506835938, 0.6537895202636719, 1.3976669311523438, 0.7112751007080078, -0.1095123291015625, 0.7869758605957031, 0.2882080078125, 0.371826171875, -0.21895217895507812, -0.5044784545898438, 0.08326339721679688, 0.9706878662109375, 0.16217041015625, -0.087677001953125, 0.162872314453125, -0.44927978515625, 0.2627716064453125, -0.33527374267578125, 0.2268218994140625, 0.2010345458984375, 0.10388946533203125, 0.0595245361328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000085.npy"} +{"epoch": 0.12849584278155707, "step": 86, "batch_size": 64, "mean": 0.2184884250164032, "std": 0.5682001709938049, "min": -1.05242919921875, "p10": -0.4728202819824218, "median": 0.1817455291748047, "p90": 0.8597312927246095, "max": 2.2962112426757812, "pos_frac": 0.6875, "sample": [-0.043914794921875, 0.025873184204101562, -0.222381591796875, 0.028356552124023438, 1.0605106353759766, 0.4006004333496094, -0.4298553466796875, 0.5552749633789062, 0.25336456298828125, 0.3005962371826172, -0.35173797607421875, -0.2935523986816406, 0.1983642578125, 0.7068252563476562, 0.47228240966796875, 0.30861663818359375, 0.22553062438964844, -0.37570762634277344, 0.143707275390625, -0.1296539306640625, -0.60443115234375, -0.7431182861328125, 0.0647125244140625, 0.09378623962402344, 0.2934226989746094, 0.03806304931640625, 0.6695709228515625, 0.290191650390625, 1.0281524658203125, 0.19453811645507812, 1.1218986511230469, -1.05242919921875, 0.63519287109375, 0.801361083984375, 0.0748748779296875, 0.25746917724609375, -0.6770095825195312, -0.05104827880859375, 0.8021240234375, 0.41934967041015625, -0.33492279052734375, -0.017187118530273438, 0.09137725830078125, -0.3906879425048828, 0.6514205932617188, 0.8653182983398438, 0.60406494140625, 0.8466949462890625, 0.6526412963867188, -0.2724800109863281, 1.337249755859375, 0.14746856689453125, -0.49123382568359375, 0.05613899230957031, 0.431427001953125, 2.2962112426757812, 0.2141094207763672, -0.5284271240234375, 0.16895294189453125, -0.5307044982910156, 0.16532516479492188, -0.14261245727539062, 1.2844963073730469, 0.38884735107421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000086.npy"} +{"epoch": 0.13000755857898716, "step": 87, "batch_size": 64, "mean": 0.29163050651550293, "std": 0.5492010116577148, "min": -1.0430450439453125, "p10": -0.42462844848632814, "median": 0.3182640075683594, "p90": 1.0290538787841799, "max": 1.68145751953125, "pos_frac": 0.65625, "sample": [0.20572662353515625, 1.26995849609375, 0.52337646484375, 0.09286880493164062, 0.8668403625488281, 0.8507232666015625, -1.0430450439453125, -0.426910400390625, 0.6231002807617188, 0.7595615386962891, -0.3141593933105469, -0.12960529327392578, 0.5186443328857422, -0.10629081726074219, -0.5125732421875, 0.7919692993164062, 0.2140350341796875, 1.0423851013183594, 0.9979476928710938, 0.47129058837890625, -0.220001220703125, 1.4178466796875, 0.677978515625, -0.02313995361328125, 0.6156997680664062, -0.5836410522460938, -0.2586669921875, -0.2361125946044922, 1.119384765625, -0.41930389404296875, 0.32366180419921875, -0.31261444091796875, 0.8619880676269531, -0.32012176513671875, -0.5193328857421875, -0.23003387451171875, 0.44210052490234375, 0.5120010375976562, 0.1600475311279297, 0.5178146362304688, 0.4110565185546875, 0.42536163330078125, 0.23638916015625, -0.5368003845214844, 0.38297271728515625, -0.09070968627929688, 0.41793251037597656, 1.1195297241210938, 0.5645713806152344, 0.3128662109375, 0.30307769775390625, -0.0229339599609375, 0.624359130859375, -0.18531036376953125, 0.2801399230957031, -0.0265655517578125, 0.11883544921875, -0.42917633056640625, 0.738800048828125, 0.15621185302734375, 1.68145751953125, 0.4117012023925781, 0.389892578125, 1.1592941284179688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000087.npy"} +{"epoch": 0.13151927437641722, "step": 88, "batch_size": 64, "mean": 0.31872305274009705, "std": 0.5750210285186768, "min": -0.8848419189453125, "p10": -0.3975269317626952, "median": 0.2517881393432617, "p90": 1.0322013854980472, "max": 2.1329803466796875, "pos_frac": 0.703125, "sample": [-0.17302703857421875, 0.2819328308105469, -0.022439956665039062, 0.6148147583007812, 1.0711822509765625, 0.033477783203125, 1.6559219360351562, -0.5101776123046875, 0.7362518310546875, -0.23919296264648438, 0.5596046447753906, 0.8212356567382812, 0.5174789428710938, 0.2175312042236328, -0.47048187255859375, 0.12453460693359375, 0.30548095703125, -0.21509552001953125, -0.8848419189453125, 1.2282028198242188, 0.8668460845947266, 0.1430644989013672, 0.25557899475097656, -0.2250804901123047, -0.5874366760253906, 0.44614410400390625, 0.9412460327148438, 2.1329803466796875, 0.5463294982910156, 0.30358314514160156, -0.4369049072265625, 0.08199310302734375, 0.8242034912109375, 0.7820587158203125, 0.5733451843261719, 0.0579071044921875, 0.657440185546875, 0.5380153656005859, 0.5609321594238281, 0.13990020751953125, 0.24799728393554688, 0.1170654296875, 0.6266403198242188, 0.11011123657226562, 0.85870361328125, -0.1898345947265625, -0.3056449890136719, 0.030618667602539062, 0.7654876708984375, 0.3799324035644531, 0.8246307373046875, -0.0349884033203125, 1.2544403076171875, 0.7803802490234375, -0.6747894287109375, 0.034946441650390625, 0.0025634765625, -0.10010147094726562, 1.1189117431640625, 1.1551399230957031, -0.06279182434082031, -0.1141357421875, -0.14649200439453125, -0.5350551605224609], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000088.npy"} +{"epoch": 0.1330309901738473, "step": 89, "batch_size": 64, "mean": 0.24268493056297302, "std": 0.5940409898757935, "min": -1.528076171875, "p10": -0.4379852294921875, "median": 0.28017616271972656, "p90": 0.9145782470703125, "max": 1.6429443359375, "pos_frac": 0.625, "sample": [0.3085899353027344, -0.11359024047851562, 0.2643108367919922, 0.20880126953125, 0.4471893310546875, 1.1047649383544922, 0.26507568359375, 0.34564781188964844, -0.185150146484375, 0.7336349487304688, 0.96551513671875, 0.8106613159179688, -0.037506103515625, 0.9008636474609375, -0.17343902587890625, 0.3236217498779297, 0.5803909301757812, 0.61669921875, 0.446807861328125, -0.7620792388916016, -0.370513916015625, -0.004550933837890625, 0.8469390869140625, 0.48047637939453125, 0.22566795349121094, 1.6429443359375, 0.1250457763671875, -0.11818695068359375, -0.3788871765136719, 0.60968017578125, -0.2605133056640625, 0.804412841796875, 0.10123825073242188, -0.6415786743164062, 0.9928131103515625, -0.017396926879882812, 1.1109390258789062, 0.5680618286132812, -1.528076171875, -0.44573974609375, 0.9204559326171875, 0.49741363525390625, 1.6425018310546875, 0.8506107330322266, -0.22045135498046875, 0.2040576934814453, 0.8610649108886719, 0.8805694580078125, 0.4464836120605469, -0.413970947265625, -0.419891357421875, -0.13617706298828125, 0.5253524780273438, 0.14446258544921875, 0.5844535827636719, -0.6464004516601562, 0.3151588439941406, 0.4587249755859375, -0.2789745330810547, -0.17896270751953125, -0.8804168701171875, -0.4977684020996094, 0.2952766418457031, -0.21532249450683594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000089.npy"} +{"epoch": 0.1345427059712774, "step": 90, "batch_size": 64, "mean": 0.26826387643814087, "std": 0.6517350077629089, "min": -1.36236572265625, "p10": -0.3967267990112305, "median": 0.28893280029296875, "p90": 0.901327514648438, "max": 3.485748291015625, "pos_frac": 0.75, "sample": [-0.39576148986816406, 1.0146827697753906, 0.049312591552734375, 0.3917503356933594, 0.4252052307128906, 0.16829490661621094, 0.6141357421875, 1.0400466918945312, 0.4062957763671875, -1.36236572265625, 0.6520462036132812, 0.5075340270996094, 0.3237018585205078, 0.4454803466796875, 0.1995849609375, 0.11461257934570312, -0.44873046875, 0.9547119140625, -0.062335968017578125, 0.3454399108886719, 1.074981689453125, 0.17197418212890625, 0.1367645263671875, 1.3597335815429688, 0.776763916015625, 0.1723003387451172, -0.33176422119140625, 3.485748291015625, 0.5711212158203125, 1.0589752197265625, 0.4524879455566406, 0.28443145751953125, -0.22257041931152344, 0.369354248046875, 0.24536895751953125, -0.25919342041015625, -0.6902999877929688, -0.193450927734375, 0.32756805419921875, 0.38847923278808594, 0.22351837158203125, -0.3971405029296875, 0.7250213623046875, 0.6458587646484375, 0.15105438232421875, 0.14037322998046875, 0.43657684326171875, 0.5443458557128906, 0.24968719482421875, -0.14136123657226562, 0.3292579650878906, 0.5524215698242188, 0.480926513671875, 0.00254058837890625, -0.016033172607421875, 0.15319252014160156, 0.0191192626953125, -0.6004314422607422, 0.46971893310546875, 0.29343414306640625, -0.7638778686523438, -1.283721923828125, -0.19918251037597656, 0.5911731719970703], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000090.npy"} +{"epoch": 0.1360544217687075, "step": 91, "batch_size": 64, "mean": 0.29064705967903137, "std": 0.6231523156166077, "min": -1.4196243286132812, "p10": -0.40000667572021475, "median": 0.353607177734375, "p90": 1.0541114807128908, "max": 1.9981842041015625, "pos_frac": 0.671875, "sample": [0.344024658203125, -0.31079864501953125, 0.7096977233886719, -0.33219337463378906, 0.4270172119140625, 1.021453857421875, 0.5524616241455078, -0.206787109375, 1.1732101440429688, 1.9981842041015625, -0.2049560546875, -0.433258056640625, 0.3988456726074219, 0.3113117218017578, -0.04225730895996094, 0.17669296264648438, 1.7012176513671875, 1.0681076049804688, 0.3836402893066406, -0.42906951904296875, 0.6170597076416016, 0.7539596557617188, -0.74493408203125, 0.3977508544921875, 0.7258377075195312, 0.818145751953125, 0.28986549377441406, -0.07157135009765625, 0.13663482666015625, 0.3901214599609375, 0.1627044677734375, 0.0354766845703125, 0.4073333740234375, 0.1396465301513672, 0.5347499847412109, 0.385711669921875, -0.0205078125, 0.363189697265625, 0.07506752014160156, -0.9461593627929688, -0.6383438110351562, -0.9644546508789062, -0.0277252197265625, -1.4196243286132812, 0.37345314025878906, -0.11856460571289062, -0.02497100830078125, 1.6992111206054688, 0.4816627502441406, 0.5460662841796875, 0.1510009765625, -0.3003959655761719, -0.016984939575195312, 0.043609619140625, 0.8899421691894531, 1.1828975677490234, -0.0888671875, -0.02991485595703125, 1.4503326416015625, 0.6240921020507812, 0.41481781005859375, 0.47622108459472656, 0.474212646484375, 0.6671104431152344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000091.npy"} +{"epoch": 0.13756613756613756, "step": 92, "batch_size": 64, "mean": 0.18867191672325134, "std": 0.523184597492218, "min": -0.79730224609375, "p10": -0.47845821380615233, "median": 0.13352203369140625, "p90": 0.9219514846801758, "max": 1.2150802612304688, "pos_frac": 0.609375, "sample": [0.12148284912109375, 0.0897674560546875, 0.011322021484375, -0.027574539184570312, 0.13162994384765625, 0.09564208984375, 0.02846527099609375, 0.4556999206542969, -0.01953887939453125, -0.26069068908691406, -0.48932647705078125, 0.16197776794433594, 0.6159858703613281, -0.34795379638671875, 1.1921539306640625, 0.89208984375, 0.13541412353515625, -0.490020751953125, 0.9186229705810547, -0.7018051147460938, 0.9294891357421875, -0.48175811767578125, 0.75311279296875, 0.161285400390625, 1.0588836669921875, -0.198089599609375, -0.30577850341796875, 0.46808624267578125, 0.29291534423828125, -0.79730224609375, -0.4634742736816406, 0.67767333984375, 0.5821762084960938, -0.05985260009765625, -0.13533782958984375, -0.17559051513671875, -0.1346282958984375, 0.8601531982421875, -0.429656982421875, -0.1212310791015625, 0.40131378173828125, -0.3469886779785156, 1.043212890625, 0.1931304931640625, 0.4855461120605469, -0.5961074829101562, 0.19298553466796875, 0.7374343872070312, 1.2150802612304688, -0.6222000122070312, -0.24345016479492188, 1.1638031005859375, 0.2012786865234375, -0.020147323608398438, -0.259185791015625, -0.47075843811035156, 0.1447601318359375, 0.03682708740234375, 0.46429443359375, 0.83734130859375, 0.5129623413085938, 0.2657146453857422, 0.9233779907226562, 0.8203582763671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000092.npy"} +{"epoch": 0.13907785336356765, "step": 93, "batch_size": 64, "mean": 0.185196191072464, "std": 0.5241662859916687, "min": -0.9311485290527344, "p10": -0.43286895751953125, "median": 0.23793411254882812, "p90": 0.7346820831298829, "max": 1.66925048828125, "pos_frac": 0.65625, "sample": [0.450042724609375, 0.7151756286621094, 0.166259765625, -0.19172286987304688, -0.9311485290527344, -0.15694808959960938, 0.5760040283203125, 0.07379913330078125, -0.17383193969726562, 0.7430419921875, 1.230316162109375, 0.8969039916992188, 1.66925048828125, -0.6640739440917969, 0.23487091064453125, 0.41483116149902344, 0.34607696533203125, 0.3460121154785156, -0.381591796875, -0.087677001953125, 0.01409149169921875, 0.240997314453125, -0.2039642333984375, 0.2231597900390625, 0.39282798767089844, -0.1250019073486328, 0.56903076171875, 0.11610794067382812, -0.45876502990722656, -0.32818603515625, -0.17193603515625, -0.246673583984375, 0.5071945190429688, 1.1934967041015625, -0.8719940185546875, 0.027740478515625, -0.7293319702148438, 0.2624855041503906, 0.4703521728515625, 0.6798858642578125, 0.6469154357910156, 0.039794921875, 0.4492321014404297, 0.3848304748535156, 0.45911407470703125, 0.4097137451171875, 0.29198265075683594, 0.1300220489501953, 0.97296142578125, -0.05449485778808594, 1.048309326171875, 0.09684562683105469, 0.5109786987304688, 0.6669502258300781, -0.4343452453613281, -0.3879852294921875, 0.3974342346191406, -0.3550262451171875, -0.3596153259277344, 0.24318695068359375, 0.6247787475585938, -0.790252685546875, -0.4294242858886719, 0.4535408020019531], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000093.npy"} +{"epoch": 0.14058956916099774, "step": 94, "batch_size": 64, "mean": 0.3565560281276703, "std": 0.6317989230155945, "min": -1.961679458618164, "p10": -0.2765907287597656, "median": 0.3861122131347656, "p90": 1.0313125610351563, "max": 2.452972412109375, "pos_frac": 0.765625, "sample": [0.908355712890625, 0.6300048828125, 0.3804168701171875, 1.1090412139892578, 0.30096435546875, -0.5773963928222656, -0.4008636474609375, 0.2411518096923828, 0.0717620849609375, 1.1391410827636719, 0.5248222351074219, 1.4920234680175781, 0.955902099609375, -1.961679458618164, 0.018442153930664062, 0.4523963928222656, -0.0327606201171875, 0.3382835388183594, 0.969940185546875, 0.19562530517578125, 0.9553050994873047, 0.8667678833007812, 0.398193359375, 0.027923583984375, 0.3116607666015625, 0.1959552764892578, 0.3013134002685547, -0.03971099853515625, -0.14896011352539062, 1.0223617553710938, 0.5460224151611328, -0.20482635498046875, -0.1502666473388672, 0.5457000732421875, 0.426727294921875, 0.3718986511230469, 0.57470703125, 1.3776092529296875, 0.5322837829589844, 2.452972412109375, 0.7325916290283203, 1.0351486206054688, -0.24380874633789062, 0.0357666015625, 1.0123634338378906, 0.5293350219726562, -0.9715042114257812, 1.13958740234375, 0.594512939453125, 0.4669361114501953, 0.8361053466796875, 0.4179973602294922, -0.24524688720703125, 0.1880054473876953, 0.4210624694824219, 0.041961669921875, -0.1749114990234375, 0.42621803283691406, -0.5893707275390625, 0.2615089416503906, 0.39180755615234375, -0.2900238037109375, -0.37950897216796875, 0.06384086608886719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000094.npy"} +{"epoch": 0.1421012849584278, "step": 95, "batch_size": 64, "mean": 0.4166697561740875, "std": 0.5159606337547302, "min": -0.9583358764648438, "p10": -0.16557865142822265, "median": 0.4026908874511719, "p90": 1.0293701171875, "max": 1.901092529296875, "pos_frac": 0.8125, "sample": [1.901092529296875, -0.1497344970703125, 0.9337806701660156, 0.08836746215820312, 0.19255828857421875, 0.42120933532714844, 0.0724945068359375, 0.5363807678222656, 1.0443572998046875, 0.7159423828125, 0.21907806396484375, 0.58367919921875, 1.233001708984375, 0.4212379455566406, -0.19171905517578125, 0.7527694702148438, 0.9944000244140625, 0.5534133911132812, 0.22114181518554688, 0.7755546569824219, 0.501007080078125, 0.7099075317382812, -0.06698989868164062, 0.8602142333984375, -0.120574951171875, -0.45145416259765625, 1.883758544921875, 0.1912841796875, 0.5126266479492188, -0.9583358764648438, 0.04332733154296875, 0.457794189453125, -0.30094146728515625, 0.82269287109375, -0.33220863342285156, 0.36175537109375, 1.06488037109375, 0.5429153442382812, 0.3677825927734375, 0.01544189453125, 0.0460205078125, 0.23151016235351562, 0.03183746337890625, 0.5693206787109375, 0.12420654296875, -0.13375473022460938, 0.72283935546875, 0.11704826354980469, 0.3841724395751953, 1.2257843017578125, 0.02915191650390625, 0.664031982421875, -0.10486984252929688, 0.32199859619140625, 0.529327392578125, 0.9171524047851562, 0.48047828674316406, -0.19380950927734375, 0.8973045349121094, 1.1380462646484375, 0.2801952362060547, 0.8974838256835938, 0.2398681640625, -0.17236900329589844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000095.npy"} +{"epoch": 0.1436130007558579, "step": 96, "batch_size": 64, "mean": 0.3844701647758484, "std": 0.5640097260475159, "min": -1.1205825805664062, "p10": -0.2465892791748047, "median": 0.38473987579345703, "p90": 1.0529918670654297, "max": 1.5511474609375, "pos_frac": 0.71875, "sample": [0.40050697326660156, -1.1061477661132812, 0.368133544921875, -0.1539154052734375, 0.88494873046875, -0.2486572265625, 0.5221710205078125, -0.22208786010742188, 0.46451568603515625, 0.0359954833984375, 0.6922531127929688, 0.1266765594482422, 0.8116912841796875, 1.313690185546875, 0.7653579711914062, 0.885040283203125, -0.32231903076171875, 0.0261993408203125, 0.5755786895751953, -0.2514915466308594, 0.7229690551757812, 1.00152587890625, 0.943389892578125, -1.1205825805664062, -0.068328857421875, 0.09603309631347656, 0.9489288330078125, 0.52508544921875, 1.3708419799804688, 0.745513916015625, 1.0311088562011719, 1.10821533203125, 1.5511474609375, 0.26427459716796875, -0.04534149169921875, 0.6483230590820312, -0.232513427734375, 0.0271759033203125, 0.02120208740234375, 0.7927207946777344, 1.009765625, -0.03876304626464844, 0.6401481628417969, -0.24176406860351562, 0.3689727783203125, 0.3569793701171875, 1.0623703002929688, -0.016510009765625, 0.7983932495117188, -0.21088409423828125, 0.5943374633789062, 0.07702255249023438, 0.7663516998291016, 0.6792449951171875, 1.2863540649414062, 0.32920074462890625, -0.105499267578125, -0.16235733032226562, 0.33248329162597656, 1.1275482177734375, 0.7163772583007812, -0.45343971252441406, 0.09394073486328125, -0.3040122985839844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000096.npy"} +{"epoch": 0.14512471655328799, "step": 97, "batch_size": 64, "mean": 0.2673855423927307, "std": 0.6191110014915466, "min": -1.104238510131836, "p10": -0.37266769409179684, "median": 0.16699600219726562, "p90": 1.1671556472778322, "max": 1.8663482666015625, "pos_frac": 0.59375, "sample": [0.0908050537109375, 0.19673919677734375, -1.104238510131836, 1.5625247955322266, 0.3432483673095703, 1.38397216796875, -0.07722091674804688, -0.116668701171875, -0.33037567138671875, -0.943115234375, -0.3027324676513672, -0.09987640380859375, 0.1372528076171875, 0.5429439544677734, -0.07025527954101562, -0.8258514404296875, -0.4312763214111328, 0.5675449371337891, 0.4414253234863281, 0.10710906982421875, 0.4457550048828125, 0.11000823974609375, 0.30217933654785156, 0.01827239990234375, -0.0964813232421875, -0.20135498046875, 0.37359619140625, 0.5061569213867188, 0.5332298278808594, -0.027130126953125, -0.10186767578125, -0.3907928466796875, 0.8294296264648438, 0.34845733642578125, 0.7031288146972656, -0.119476318359375, 1.3797054290771484, -0.05944061279296875, -0.26291656494140625, 1.137420654296875, 0.6278839111328125, 0.7283821105957031, 1.6435699462890625, 0.5140800476074219, -0.6967391967773438, -0.31987571716308594, 0.45453643798828125, 0.6851043701171875, 1.52764892578125, 0.5594558715820312, -0.03309440612792969, 0.10163116455078125, 1.8663482666015625, -0.08443069458007812, 0.7593307495117188, 0.7169570922851562, 1.1798992156982422, 0.6107711791992188, 0.3329734802246094, -0.00525665283203125, 0.3700828552246094, -0.4920234680175781, -0.2618141174316406, -0.17258071899414062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000097.npy"} +{"epoch": 0.14663643235071808, "step": 98, "batch_size": 64, "mean": 0.23879370093345642, "std": 0.4962601363658905, "min": -0.6085758209228516, "p10": -0.37640571594238276, "median": 0.16951465606689453, "p90": 1.0076118469238282, "max": 1.4658203125, "pos_frac": 0.71875, "sample": [0.003986358642578125, -0.027692794799804688, 0.6929874420166016, 0.28873443603515625, 0.02032470703125, 0.25542449951171875, 0.31778717041015625, -0.1233978271484375, 0.3266105651855469, 0.8242874145507812, -0.0747833251953125, 0.16724014282226562, 1.4658203125, 0.08049392700195312, 1.0257797241210938, 1.1512985229492188, 0.2026214599609375, 0.9920120239257812, 0.17178916931152344, -0.27544403076171875, 0.02069091796875, 0.9855537414550781, -0.34356689453125, 0.114410400390625, 0.0910797119140625, 0.019517898559570312, 0.23410415649414062, 0.2551460266113281, -0.47203826904296875, -0.4695777893066406, 0.3350982666015625, 0.6142425537109375, 0.27639007568359375, 0.742950439453125, -0.04383087158203125, 0.131256103515625, -0.26903724670410156, 1.3111343383789062, 0.24850845336914062, 0.06647109985351562, 0.2568511962890625, 0.15760040283203125, -0.34603118896484375, 0.5775127410888672, -0.5932769775390625, -0.4447898864746094, 0.4783210754394531, 0.005298614501953125, -0.416778564453125, 1.2080192565917969, 0.5207176208496094, 1.2438507080078125, -0.32166290283203125, -0.6085758209228516, 0.68768310546875, 0.00612640380859375, 0.55596923828125, -0.01239776611328125, 0.014842987060546875, 1.0142974853515625, -0.2976341247558594, 0.3398590087890625, 0.31203460693359375, -0.3894233703613281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000098.npy"} +{"epoch": 0.14814814814814814, "step": 99, "batch_size": 64, "mean": 0.37138599157333374, "std": 0.5743963122367859, "min": -0.8661651611328125, "p10": -0.27252502441406246, "median": 0.2810192108154297, "p90": 1.0736236572265625, "max": 1.864114761352539, "pos_frac": 0.734375, "sample": [1.2470169067382812, 1.864114761352539, -0.6762466430664062, 1.7616958618164062, 0.12012481689453125, 0.2214813232421875, 0.015148162841796875, -0.02392578125, 0.6950836181640625, 1.4345817565917969, 0.418792724609375, 1.0254440307617188, 0.8907394409179688, 1.0142364501953125, 0.36273956298828125, 0.25168609619140625, 0.14287185668945312, 0.2444934844970703, -0.30017852783203125, 0.6900711059570312, 0.9464492797851562, 0.2985076904296875, -0.03217315673828125, -0.0547332763671875, 0.0166473388671875, -0.0532684326171875, 0.533599853515625, -0.8661651611328125, -0.0856170654296875, 1.4199790954589844, -0.06573486328125, 0.579681396484375, -0.354644775390625, -0.13692474365234375, 0.2676277160644531, 0.9926071166992188, 0.4864044189453125, -0.5788478851318359, 0.10285758972167969, 0.6612892150878906, 0.1685028076171875, 0.49700927734375, 0.6192207336425781, 1.63470458984375, -0.13794708251953125, 0.5384597778320312, 1.0672950744628906, -0.15928268432617188, 0.09063720703125, 0.22305679321289062, 1.0763359069824219, 0.5185070037841797, 0.39218711853027344, 0.4163970947265625, 0.1826629638671875, 0.29441070556640625, -0.3372764587402344, 0.6323776245117188, 0.12320709228515625, -0.20800018310546875, 0.6599884033203125, 0.3556976318359375, -0.5716781616210938, 0.21471786499023438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000099.npy"} +{"epoch": 0.14965986394557823, "step": 100, "batch_size": 64, "mean": 0.264363557100296, "std": 0.7038221955299377, "min": -1.6939544677734375, "p10": -0.5303340911865234, "median": 0.24935054779052734, "p90": 1.17571029663086, "max": 2.1834640502929688, "pos_frac": 0.671875, "sample": [1.3087158203125, 0.35959625244140625, -0.17803955078125, 0.24713134765625, 0.59063720703125, -0.3240623474121094, 0.2515697479248047, 0.8871707916259766, 2.1834640502929688, 0.4656219482421875, 0.2575531005859375, -0.23516082763671875, 0.6838607788085938, 0.7693328857421875, 0.12485885620117188, 1.4974746704101562, -0.561126708984375, -0.4584846496582031, 0.0844268798828125, 0.33685302734375, 0.7529411315917969, -0.1909027099609375, -0.5685043334960938, -0.24265670776367188, -0.23439788818359375, 0.00433349609375, 0.05335044860839844, 0.7414627075195312, 0.38880157470703125, 0.256011962890625, -0.10590744018554688, -0.6335296630859375, -0.5837631225585938, -0.2920951843261719, -0.13066864013671875, 0.1868133544921875, -0.20140838623046875, -0.069976806640625, -1.6279296875, 1.2411270141601562, -0.3292999267578125, 1.0230712890625, 0.29019927978515625, 0.022045135498046875, 0.6989326477050781, 1.4373359680175781, 0.7398033142089844, 0.2867393493652344, 0.15839004516601562, -0.6033363342285156, 1.8554840087890625, 0.5146942138671875, 0.3052978515625, 0.10689735412597656, 0.103973388671875, 0.6422672271728516, -0.17798614501953125, 0.8491058349609375, 0.1395721435546875, 0.5037460327148438, 1.70281982421875, 0.314361572265625, -1.6939544677734375, 0.9946136474609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000100.npy"} +{"epoch": 0.15117157974300832, "step": 101, "batch_size": 64, "mean": 0.40265610814094543, "std": 0.5663474202156067, "min": -0.783660888671875, "p10": -0.34227218627929684, "median": 0.3647747039794922, "p90": 1.0848411560058595, "max": 2.0021705627441406, "pos_frac": 0.84375, "sample": [0.2596893310546875, -0.35405731201171875, 0.41786956787109375, -0.3147735595703125, 1.326080322265625, 0.8009109497070312, 1.1129608154296875, 0.11785888671875, 0.2489471435546875, 0.0089263916015625, 0.07519340515136719, 0.7620697021484375, -0.604461669921875, 0.4139251708984375, 1.0616989135742188, 0.07463455200195312, 0.6924514770507812, 0.5420875549316406, 0.04326057434082031, -0.7251567840576172, 0.9138031005859375, -0.021289825439453125, 0.7881622314453125, 0.8610801696777344, 0.23025131225585938, 1.0732574462890625, 1.0898056030273438, 1.0206031799316406, 0.4231147766113281, 0.8164882659912109, 0.993408203125, 1.398284912109375, 0.7037982940673828, 0.024005889892578125, -0.783660888671875, -0.6797695159912109, 0.29010009765625, 0.43598175048828125, 0.20519256591796875, -0.25753021240234375, -0.61309814453125, 0.5430831909179688, 0.25599098205566406, 0.3188629150390625, 1.4651565551757812, 0.1455841064453125, 0.139801025390625, 0.364288330078125, 0.08894920349121094, 0.07911109924316406, 1.2632217407226562, 0.3652610778808594, 0.9044780731201172, 0.657806396484375, 0.01898193359375, 0.1206817626953125, 0.5699615478515625, 0.5403289794921875, -0.5288162231445312, 0.23532485961914062, 0.260101318359375, 0.5925121307373047, 2.0021705627441406, 0.4950447082519531], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000101.npy"} +{"epoch": 0.15268329554043839, "step": 102, "batch_size": 64, "mean": 0.26210689544677734, "std": 0.6144367456436157, "min": -1.384246826171875, "p10": -0.4033546447753906, "median": 0.2535247802734375, "p90": 1.0729953765869145, "max": 1.625, "pos_frac": 0.671875, "sample": [0.9069442749023438, -0.3661041259765625, 0.0035228729248046875, 0.8213310241699219, -0.8504867553710938, 0.08269309997558594, -0.23687744140625, 0.267974853515625, 0.0328826904296875, 0.06212615966796875, 0.6584930419921875, 0.502288818359375, 0.7794342041015625, 0.13702392578125, 0.4845733642578125, 0.3122978210449219, -0.4372711181640625, 1.625, -1.0617599487304688, 0.5314788818359375, -0.3387565612792969, -0.1728496551513672, 0.23907470703125, 0.8781356811523438, 0.9526329040527344, 0.6554412841796875, 0.1795654296875, 0.3419990539550781, -0.82110595703125, -0.0545654296875, 1.1108322143554688, 0.9868278503417969, -0.23680877685546875, 0.5548133850097656, 0.3525390625, 0.0123443603515625, 0.5715370178222656, 1.5341339111328125, 1.4475250244140625, -0.2491912841796875, -0.3935413360595703, 0.9513397216796875, -0.43120574951171875, 0.2828521728515625, -0.3260612487792969, 0.6820297241210938, 0.1487274169921875, 0.07320404052734375, 0.4259815216064453, 0.18205642700195312, 0.4108695983886719, 1.1553497314453125, -0.3211479187011719, -0.4075603485107422, -0.09106063842773438, -0.05852508544921875, -0.23412704467773438, 1.10992431640625, -0.003025054931640625, -1.384246826171875, 0.42093658447265625, 0.7256622314453125, 1.2190361022949219, 0.43768310546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000102.npy"} +{"epoch": 0.15419501133786848, "step": 103, "batch_size": 64, "mean": 0.2588259279727936, "std": 0.5441632866859436, "min": -1.0681381225585938, "p10": -0.45554351806640625, "median": 0.22318267822265625, "p90": 0.9325847625732423, "max": 1.8104095458984375, "pos_frac": 0.703125, "sample": [-0.32352447509765625, 0.23856353759765625, 0.507965087890625, -0.8046913146972656, 0.7169303894042969, 0.0973663330078125, 0.04376983642578125, 0.05688667297363281, 0.24726104736328125, 0.53570556640625, -0.17583847045898438, 0.590789794921875, -0.37784576416015625, 0.07810211181640625, -0.43060302734375, 0.34259796142578125, 0.6193981170654297, 0.6416244506835938, 0.1351318359375, 0.8144798278808594, -0.0606536865234375, -0.5696372985839844, -0.0033473968505859375, -0.19871139526367188, 0.6765518188476562, -0.4662322998046875, -0.4920921325683594, 1.1352767944335938, -0.12679290771484375, 0.24682998657226562, 0.16275787353515625, 1.008392333984375, 0.94097900390625, 0.26758575439453125, 0.3216400146484375, 0.4400482177734375, -0.5889434814453125, 0.278289794921875, -0.5372848510742188, 0.18274688720703125, 0.7198638916015625, 0.1006011962890625, 0.1701374053955078, 0.590728759765625, 0.9124622344970703, 0.391326904296875, 0.9129981994628906, -0.010150909423828125, 0.9604549407958984, 0.1903839111328125, 1.8104095458984375, 0.27901458740234375, -1.0681381225585938, 0.08473968505859375, 0.1921234130859375, -0.00148773193359375, 0.8354644775390625, -0.14847946166992188, -0.08828163146972656, 0.24116134643554688, 0.357208251953125, 0.9762649536132812, 1.7767791748046875, 0.20780181884765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000103.npy"} +{"epoch": 0.15570672713529857, "step": 104, "batch_size": 64, "mean": 0.26366138458251953, "std": 0.6473783254623413, "min": -1.7056045532226562, "p10": -0.476591682434082, "median": 0.304901123046875, "p90": 1.0148283004760743, "max": 1.7640380859375, "pos_frac": 0.640625, "sample": [1.489013671875, 0.71337890625, -0.09521102905273438, 0.5985755920410156, -1.7056045532226562, 0.31829833984375, 0.30495452880859375, 0.45839691162109375, 0.30484771728515625, -0.56988525390625, 0.17727279663085938, -0.43718719482421875, 1.7640380859375, 1.1599884033203125, 0.25579071044921875, -0.5357246398925781, 0.655029296875, 0.3822784423828125, -0.15761184692382812, -0.6537094116210938, 0.4311981201171875, -0.61041259765625, 1.17620849609375, 0.9340667724609375, -0.450958251953125, -0.0011844635009765625, 0.45700836181640625, -0.3851318359375, 1.004068374633789, -0.087005615234375, 0.22612380981445312, -0.3616657257080078, 0.0050811767578125, -0.009466171264648438, 0.4992713928222656, 0.5434246063232422, 0.0608367919921875, -0.4875774383544922, 0.9462509155273438, -0.12699127197265625, 0.3476676940917969, 0.3384876251220703, 1.019439697265625, -0.01541900634765625, 0.1852569580078125, -0.406158447265625, 0.7611465454101562, 0.6322841644287109, -0.3970794677734375, -0.22454833984375, 0.6267356872558594, -0.03468132019042969, 0.9991569519042969, 0.7333145141601562, 0.5180702209472656, 0.425262451171875, 0.15641403198242188, 0.607208251953125, 0.16665077209472656, -1.1566925048828125, -0.4250602722167969, 1.393096923828125, 1.5590362548828125, 0.874664306640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000104.npy"} +{"epoch": 0.15721844293272866, "step": 105, "batch_size": 64, "mean": 0.3147069811820984, "std": 0.6679058074951172, "min": -2.356292724609375, "p10": -0.27264556884765623, "median": 0.33069610595703125, "p90": 1.1787170410156256, "max": 1.952117919921875, "pos_frac": 0.765625, "sample": [0.6908531188964844, 0.4242095947265625, 0.647918701171875, 0.41181182861328125, 0.3067626953125, 0.50177001953125, -0.88299560546875, 1.952117919921875, -0.21692657470703125, 0.4398174285888672, -0.1086578369140625, 0.5054492950439453, 0.6924057006835938, 0.8756332397460938, 0.13624954223632812, 1.243743896484375, 0.1447124481201172, -1.052459716796875, 0.10468673706054688, 1.6569137573242188, 0.42075347900390625, -0.19832611083984375, 0.034088134765625, -0.5074081420898438, 0.00428009033203125, 0.3546295166015625, 0.9730110168457031, 0.825592041015625, 0.41472625732421875, 0.27276611328125, 0.0193634033203125, 0.4143333435058594, -0.25888824462890625, 0.109893798828125, 0.5316925048828125, 0.3795013427734375, 0.7964801788330078, 0.6108245849609375, -0.27854156494140625, 1.24029541015625, 0.22248458862304688, -0.15612411499023438, 0.289886474609375, 0.7772693634033203, 1.6132965087890625, 1.0350341796875, -2.356292724609375, 0.049663543701171875, 0.371734619140625, 0.0092010498046875, 0.2145233154296875, 0.2764701843261719, 0.4324817657470703, 1.2672271728515625, -0.4225311279296875, 0.1064605712890625, 1.6282958984375, 0.4526214599609375, -0.093658447265625, 0.4074440002441406, -0.6232452392578125, 0.20867156982421875, -0.1609954833984375, -0.04175567626953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000105.npy"} +{"epoch": 0.15873015873015872, "step": 106, "batch_size": 64, "mean": 0.2501852214336395, "std": 0.5450973510742188, "min": -1.209381103515625, "p10": -0.36330242156982423, "median": 0.2396221160888672, "p90": 0.9814239501953126, "max": 1.474761962890625, "pos_frac": 0.65625, "sample": [0.0224609375, 0.5283222198486328, 0.34832000732421875, 0.5850143432617188, 0.21704864501953125, -0.46038055419921875, 1.2769622802734375, 0.4331016540527344, -0.17864990234375, 0.526214599609375, 0.00926971435546875, 0.4068641662597656, 1.474761962890625, -0.1600189208984375, 0.04541015625, -0.07795333862304688, 0.4825897216796875, -0.16910934448242188, 0.04920196533203125, -0.3574371337890625, -0.04082489013671875, 0.0050811767578125, -0.308502197265625, -0.0742340087890625, -0.13353347778320312, -0.6934127807617188, -0.3658161163330078, -0.8269805908203125, 0.5682868957519531, 1.20391845703125, -0.3761787414550781, 0.4505119323730469, -0.08386993408203125, 0.8524017333984375, 0.18262481689453125, 1.1129341125488281, 0.7758827209472656, -0.04383659362792969, 0.9960403442382812, 0.0986480712890625, 0.13622283935546875, 0.4694366455078125, 1.1548290252685547, 0.9473190307617188, -0.34650421142578125, 0.470428466796875, -0.04647064208984375, 0.17955398559570312, -0.8608245849609375, 0.2728691101074219, 0.330596923828125, 0.48589324951171875, 0.7711639404296875, 1.2455482482910156, 0.32909393310546875, -0.09154510498046875, 0.2621955871582031, 0.7729721069335938, -0.051792144775390625, 0.7139739990234375, -1.209381103515625, 0.5499954223632812, 0.5217132568359375, 0.7034320831298828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000106.npy"} +{"epoch": 0.1602418745275888, "step": 107, "batch_size": 64, "mean": 0.15993207693099976, "std": 0.686407744884491, "min": -1.2318878173828125, "p10": -0.6326126098632812, "median": 0.15979766845703125, "p90": 0.9662761688232424, "max": 2.521484375, "pos_frac": 0.609375, "sample": [-0.1633892059326172, -1.2318878173828125, 0.5429840087890625, 1.8877525329589844, 0.20470428466796875, 0.14580535888671875, -0.5137939453125, 0.5402069091796875, -0.5540847778320312, 0.4822883605957031, -1.0636749267578125, 0.4517669677734375, 0.3005809783935547, 0.01300048828125, 0.23984146118164062, -0.4902496337890625, 0.6083221435546875, -0.140838623046875, 0.1712360382080078, -0.75408935546875, -0.3357391357421875, 2.521484375, 1.7406082153320312, 0.015201568603515625, 0.9148063659667969, -0.9709701538085938, 0.9094467163085938, -0.2996711730957031, -0.1442108154296875, 0.3165740966796875, 0.22616958618164062, -0.5790786743164062, -0.05374908447265625, 0.4333000183105469, 0.15155029296875, -0.980926513671875, 0.9883346557617188, 0.35317230224609375, 0.30226898193359375, 0.5200653076171875, 1.269073486328125, 1.1824684143066406, 0.1680450439453125, -0.5481796264648438, -0.0298309326171875, -0.04062652587890625, 0.3650321960449219, 0.362884521484375, -0.26053428649902344, -0.4155426025390625, 1.2482070922851562, 0.20871734619140625, 0.2954139709472656, -0.24553680419921875, -0.16383743286132812, 0.0867462158203125, -0.6903839111328125, 0.31494140625, 0.08475494384765625, -0.6555557250976562, 0.08588409423828125, 0.422576904296875, -0.0950164794921875, 0.5808334350585938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000107.npy"} +{"epoch": 0.1617535903250189, "step": 108, "batch_size": 64, "mean": 0.35200008749961853, "std": 0.6747092604637146, "min": -1.3272781372070312, "p10": -0.4294536590576171, "median": 0.23779678344726562, "p90": 1.1403251647949222, "max": 2.71685791015625, "pos_frac": 0.71875, "sample": [0.4989013671875, 0.7361221313476562, 0.8980712890625, 0.34903717041015625, -0.05223274230957031, 1.03839111328125, 0.47088623046875, 0.4100933074951172, 1.8507537841796875, -0.6653976440429688, -0.6006622314453125, 1.2490882873535156, 0.394287109375, -0.15855979919433594, 0.547149658203125, 0.1846923828125, -0.39153289794921875, 0.10684394836425781, 0.09206390380859375, 0.22535324096679688, 1.0421600341796875, 0.4847259521484375, -0.0233306884765625, 1.175323486328125, 0.7078094482421875, -0.4517822265625, 0.7720832824707031, -0.252410888671875, 2.71685791015625, 0.4883575439453125, -0.32562255859375, 0.11786651611328125, 0.7227554321289062, 0.08372879028320312, 0.02275848388671875, 2.0809707641601562, -0.25856590270996094, 0.23674774169921875, -1.3272781372070312, 1.0586624145507812, -0.6765975952148438, 0.8240127563476562, 0.3148059844970703, -0.021734237670898438, 0.218109130859375, 0.20212936401367188, -0.12259864807128906, 1.2234611511230469, -0.6070480346679688, 0.05809783935546875, -0.4457054138183594, 0.0633544921875, -0.072906494140625, 0.7177886962890625, 0.21071624755859375, 0.69927978515625, 0.4209861755371094, 0.2388458251953125, 0.333892822265625, 1.2463226318359375, 0.023092269897460938, 0.8517036437988281, -0.1718597412109375, 0.74468994140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000108.npy"} +{"epoch": 0.16326530612244897, "step": 109, "batch_size": 64, "mean": 0.26250216364860535, "std": 0.6096794605255127, "min": -0.775665283203125, "p10": -0.45445480346679684, "median": 0.2516651153564453, "p90": 1.0523738861083987, "max": 2.474029541015625, "pos_frac": 0.640625, "sample": [0.6274871826171875, 0.9830398559570312, 1.93121337890625, -0.3044853210449219, 0.09803390502929688, 0.3289928436279297, 1.1077995300292969, 0.6588363647460938, 0.37624168395996094, 0.5045204162597656, 0.2588958740234375, -0.273468017578125, 0.8784866333007812, 1.0820884704589844, -0.294708251953125, -0.6151123046875, 0.6278476715087891, 0.3618049621582031, 0.07272720336914062, 0.6327743530273438, 0.33909034729003906, 0.7634830474853516, -0.0505218505859375, -0.03260231018066406, 0.6225738525390625, 1.224365234375, -0.475860595703125, 0.3276214599609375, -0.6969795227050781, 2.474029541015625, -0.1703968048095703, 0.49761199951171875, -0.2504615783691406, 0.11106109619140625, 0.28725433349609375, 0.02709197998046875, -0.3612098693847656, 0.1782684326171875, -0.443511962890625, 0.4031219482421875, -0.06554412841796875, -0.09207534790039062, -0.3599510192871094, -0.775665283203125, -0.12515640258789062, -0.5402488708496094, 0.362762451171875, 0.24443435668945312, 0.43503570556640625, -0.45914459228515625, 0.28837013244628906, -0.3316802978515625, -0.16016387939453125, 1.1913375854492188, 0.4461669921875, 0.5699844360351562, 1.3570022583007812, 0.76611328125, -0.5300445556640625, 0.23445510864257812, 0.17646026611328125, -0.19829750061035156, 0.0671844482421875, 0.48175811767578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000109.npy"} +{"epoch": 0.16477702191987906, "step": 110, "batch_size": 64, "mean": 0.16184476017951965, "std": 0.5808418989181519, "min": -1.34503173828125, "p10": -0.5749813079833984, "median": 0.1404552459716797, "p90": 0.941234588623047, "max": 1.67242431640625, "pos_frac": 0.640625, "sample": [0.37967872619628906, -0.076995849609375, -1.34503173828125, 0.05440521240234375, 0.23171615600585938, -0.5895919799804688, -0.04059600830078125, 0.9027557373046875, 0.483978271484375, 0.9577255249023438, 1.0207443237304688, 0.06386375427246094, -0.095458984375, 0.9895858764648438, 0.031993865966796875, -0.7144088745117188, -0.3543739318847656, 1.104095458984375, 0.09798240661621094, -0.7746429443359375, 0.1672496795654297, 0.381622314453125, 0.300445556640625, -0.23773956298828125, 0.021907806396484375, 0.7795791625976562, 0.23347091674804688, -0.14706039428710938, -0.49265289306640625, -0.783660888671875, 0.9011993408203125, 0.3600921630859375, 0.6196746826171875, -0.5408897399902344, -0.144012451171875, 0.2744789123535156, 0.09832763671875, 1.4189605712890625, 0.13903045654296875, 0.46761131286621094, 0.47341156005859375, 0.23028564453125, -0.35596466064453125, -0.319671630859375, 0.22269821166992188, 0.5907363891601562, 0.43064117431640625, -0.06118011474609375, -0.271697998046875, -0.8069076538085938, -0.2108917236328125, 0.23677825927734375, 0.40406036376953125, -0.37313079833984375, 0.12540435791015625, 1.67242431640625, 0.14188003540039062, 0.6310844421386719, 0.1301288604736328, 1.5564727783203125, -0.2486095428466797, 0.4012298583984375, 0.25966644287109375, -0.645843505859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000110.npy"} +{"epoch": 0.16628873771730915, "step": 111, "batch_size": 64, "mean": 0.26701000332832336, "std": 0.664932131767273, "min": -1.3631019592285156, "p10": -0.517420768737793, "median": 0.28256988525390625, "p90": 1.0532060623168946, "max": 1.6679840087890625, "pos_frac": 0.6875, "sample": [0.2607269287109375, -0.505157470703125, -0.35849761962890625, 1.4580230712890625, 0.6714744567871094, -0.0803680419921875, -1.3631019592285156, 0.40755462646484375, 0.9775428771972656, 0.35466575622558594, 0.301422119140625, 0.6040191650390625, -0.6802902221679688, -0.2165679931640625, 0.2830963134765625, 0.8534717559814453, 0.0228271484375, 0.19849395751953125, -0.29486083984375, 1.0040416717529297, -0.17671966552734375, -0.744598388671875, -0.024524688720703125, 0.7984199523925781, 0.3990478515625, 0.6713104248046875, 0.11041259765625, 1.0619659423828125, 0.1568431854248047, 1.4278945922851562, -0.10536575317382812, -0.5226764678955078, -0.9652099609375, -0.27661895751953125, 0.67266845703125, 0.3808441162109375, 1.032766342163086, 0.548919677734375, 0.14268112182617188, 0.9071426391601562, 1.1332550048828125, 0.8250350952148438, 1.0041275024414062, -0.43388938903808594, 0.39730072021484375, 0.1005859375, 0.2994537353515625, 1.6679840087890625, 0.28204345703125, 0.49863433837890625, 1.4752655029296875, 1.3942756652832031, 0.43581199645996094, -0.24632835388183594, -1.1542510986328125, 0.232147216796875, -0.48431396484375, 0.368682861328125, 0.004154205322265625, 0.8771896362304688, -1.08367919921875, 0.17304229736328125, -0.17441177368164062, 0.10280609130859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000111.npy"} +{"epoch": 0.16780045351473924, "step": 112, "batch_size": 64, "mean": 0.34459996223449707, "std": 0.6616691946983337, "min": -1.4546737670898438, "p10": -0.43136138916015626, "median": 0.33625221252441406, "p90": 1.167636871337891, "max": 1.8743438720703125, "pos_frac": 0.6875, "sample": [0.97381591796875, 1.8743438720703125, 0.7315521240234375, 0.9353981018066406, -0.09234428405761719, 0.7721176147460938, 1.7053680419921875, 0.12894439697265625, 1.5376968383789062, 1.086273193359375, -0.6758270263671875, -0.434844970703125, -0.10739898681640625, 0.5165061950683594, -0.0001926422119140625, -0.1377410888671875, 0.697662353515625, 0.3614501953125, -1.4546737670898438, -0.03338623046875, 0.6165771484375, -0.42803955078125, 0.8363151550292969, 0.6737518310546875, 0.0693511962890625, 0.513671875, 0.04669189453125, 0.4443626403808594, 1.382537841796875, 0.45532989501953125, 0.572784423828125, 0.01796722412109375, -0.18434715270996094, 0.25017738342285156, 0.37766265869140625, 0.61468505859375, 0.0061969757080078125, 0.970733642578125, 1.2025070190429688, 0.08013153076171875, 0.16121292114257812, -0.05328369140625, 0.4028816223144531, -0.11517906188964844, -0.7707672119140625, 1.4824371337890625, 0.5402679443359375, 0.3110542297363281, -0.03922843933105469, 0.000850677490234375, -0.9906082153320312, 1.5637016296386719, -0.199462890625, 0.9761962890625, 0.5212249755859375, 0.26503753662109375, -0.3234748840332031, -0.2428264617919922, 1.0309066772460938, 1.028564453125, 0.5797996520996094, -0.4327850341796875, 0.0791015625, -0.6249923706054688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000112.npy"} +{"epoch": 0.1693121693121693, "step": 113, "batch_size": 64, "mean": 0.22868850827217102, "std": 0.6177945733070374, "min": -1.767608642578125, "p10": -0.4344215393066406, "median": 0.2346343994140625, "p90": 0.9779777526855472, "max": 1.450958251953125, "pos_frac": 0.640625, "sample": [-0.1498565673828125, -0.3462066650390625, -0.04346466064453125, -0.13509178161621094, -0.24672317504882812, 1.3088912963867188, -0.0570220947265625, 0.5793304443359375, 1.403594970703125, 1.149942398071289, -0.14722824096679688, 0.2523078918457031, -0.05706024169921875, 0.8117141723632812, -0.46622467041015625, 1.0086135864257812, 0.26023292541503906, 0.8846664428710938, -0.5924949645996094, -0.7642803192138672, -0.40950775146484375, 0.19621658325195312, 0.8826828002929688, -1.1185150146484375, -0.40299034118652344, 0.098724365234375, 0.46051025390625, 0.3770122528076172, -0.3926658630371094, 0.3097419738769531, 0.7894744873046875, 0.078369140625, 0.05848503112792969, -0.27759552001953125, 0.6826515197753906, 1.1419391632080078, -1.767608642578125, 0.6237068176269531, 0.23822021484375, 0.1923675537109375, 0.10061073303222656, 1.2695999145507812, 0.3295135498046875, 0.8773040771484375, 0.08968353271484375, -0.445098876953125, 0.4801177978515625, -0.001617431640625, -0.8693389892578125, 0.6131496429443359, -0.20428466796875, 0.40252685546875, 0.2351837158203125, 0.7838287353515625, 1.450958251953125, -0.2379016876220703, 0.7283096313476562, -0.090972900390625, 0.4091949462890625, 0.2340850830078125, 0.7345848083496094, 0.31697845458984375, 0.10829544067382812, 0.906494140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000113.npy"} +{"epoch": 0.1708238851095994, "step": 114, "batch_size": 64, "mean": 0.43679025769233704, "std": 0.6293262243270874, "min": -1.0942840576171875, "p10": -0.3113128662109375, "median": 0.4007911682128906, "p90": 1.2831310272216798, "max": 2.292877197265625, "pos_frac": 0.703125, "sample": [-0.2425079345703125, 0.3649444580078125, 0.401092529296875, -0.08547210693359375, 0.3902130126953125, 0.208343505859375, 0.3699188232421875, 0.0783233642578125, 0.5158004760742188, -0.27259254455566406, 0.30054473876953125, 0.8571128845214844, 0.5898704528808594, 0.389801025390625, 0.46039581298828125, -1.0942840576171875, 0.7516136169433594, 0.5379791259765625, -0.14345359802246094, 0.18749618530273438, 0.5417976379394531, 0.7585525512695312, 1.2959518432617188, 0.42540740966796875, 0.08632469177246094, 0.8805770874023438, -0.09200477600097656, 1.118124008178711, -0.39333343505859375, 0.74078369140625, 0.40048980712890625, 0.42568016052246094, 1.0540618896484375, 1.35992431640625, 0.5082626342773438, 0.16629791259765625, 0.7755622863769531, 1.1735687255859375, 1.1504230499267578, -0.0181121826171875, 0.950103759765625, 1.2532157897949219, 0.769622802734375, -0.0594482421875, -0.26114654541015625, -0.3170166015625, 1.529998779296875, 0.5655250549316406, -0.298004150390625, -0.14869308471679688, -0.49794769287109375, 2.292877197265625, 0.31272125244140625, 1.361541748046875, 1.2033500671386719, 1.525909423828125, 0.2892570495605469, -0.10058212280273438, -0.52203369140625, -0.4349040985107422, 1.5314559936523438, -0.3802337646484375, -0.004505157470703125, 0.4700336456298828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000114.npy"} +{"epoch": 0.17233560090702948, "step": 115, "batch_size": 64, "mean": 0.29103943705558777, "std": 0.6296499371528625, "min": -0.8417205810546875, "p10": -0.3971939086914062, "median": 0.2617197036743164, "p90": 1.2045272827148439, "max": 2.263671875, "pos_frac": 0.59375, "sample": [0.285736083984375, 0.5594940185546875, 0.5456924438476562, -0.8417205810546875, 1.182769775390625, 0.3768329620361328, -0.299285888671875, 0.2302703857421875, 0.2506103515625, 2.263671875, 0.2728290557861328, -0.33368682861328125, 1.8307418823242188, -0.18361282348632812, 0.4250946044921875, -0.12451171875, -0.0118408203125, -0.492095947265625, 0.417388916015625, -0.1420440673828125, 0.7843971252441406, 0.3455657958984375, -0.83502197265625, -0.1938934326171875, -0.4648323059082031, 0.8552894592285156, -0.25131988525390625, 0.42609405517578125, -0.03997802734375, 0.9221839904785156, 0.426971435546875, 0.29236793518066406, 0.22503662109375, 0.3934478759765625, 0.07127761840820312, 1.2138519287109375, -0.069427490234375, 0.3865547180175781, 0.42813873291015625, 1.2478103637695312, 0.8291091918945312, -0.6882286071777344, 1.6173095703125, -0.4207000732421875, 0.37518310546875, 0.4208335876464844, 1.2436141967773438, 0.7210044860839844, -0.4421119689941406, -0.34234619140625, -0.0005359649658203125, 0.18179702758789062, 1.5416545867919922, -0.09059333801269531, -0.3312225341796875, -0.33128929138183594, -0.12537384033203125, -0.06768417358398438, -0.03235626220703125, 0.446624755859375, 0.4631786346435547, 0.22603797912597656, -0.031940460205078125, 1.0877113342285156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000115.npy"} +{"epoch": 0.17384731670445955, "step": 116, "batch_size": 64, "mean": 0.29369184374809265, "std": 0.5575571656227112, "min": -0.9128189086914062, "p10": -0.3979875564575195, "median": 0.19498443603515625, "p90": 1.13076171875, "max": 1.4922599792480469, "pos_frac": 0.6875, "sample": [0.18309783935546875, 0.2078094482421875, 0.12191009521484375, 0.7630844116210938, 0.1757659912109375, -0.9128189086914062, -0.4521026611328125, -0.021518707275390625, 0.5611553192138672, 0.57781982421875, 1.2657852172851562, 0.3278350830078125, 0.0197296142578125, 0.669281005859375, 0.10933685302734375, 0.10673141479492188, -0.12410736083984375, 0.00577545166015625, 0.8659629821777344, -0.2186126708984375, -0.6841278076171875, 0.5583381652832031, 0.14270782470703125, 0.09729385375976562, -0.020145416259765625, 0.7120132446289062, 0.14960479736328125, 1.4922599792480469, 0.32391357421875, 0.4646949768066406, -0.17147064208984375, 0.16930770874023438, -0.17215919494628906, -0.23615264892578125, 1.3039703369140625, 0.20687103271484375, 1.0893936157226562, 1.4616317749023438, 0.14471435546875, -0.05176544189453125, -0.38134765625, 0.3957633972167969, 0.6435127258300781, 0.50372314453125, 0.8567962646484375, 1.1484909057617188, 1.170806884765625, 0.40167236328125, 0.22861480712890625, 0.9263458251953125, -0.2144298553466797, 0.8426895141601562, -0.1537322998046875, -0.7736892700195312, 0.6995162963867188, -0.591583251953125, -0.035297393798828125, 0.38775634765625, -0.4164600372314453, 0.2364501953125, 1.2306365966796875, -0.4051189422607422, 1.0646610260009766, -0.1823139190673828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000116.npy"} +{"epoch": 0.17535903250188964, "step": 117, "batch_size": 64, "mean": 0.27185723185539246, "std": 0.6163628101348877, "min": -1.7480316162109375, "p10": -0.48230304718017575, "median": 0.29012298583984375, "p90": 1.0326934814453126, "max": 1.6963424682617188, "pos_frac": 0.703125, "sample": [0.48706817626953125, 0.6196269989013672, -0.48494720458984375, -0.2233123779296875, 0.6705474853515625, 0.3253593444824219, 0.28559112548828125, 0.5203189849853516, 0.52978515625, 0.2233600616455078, 0.10369873046875, -0.3010826110839844, 1.13232421875, 0.16507720947265625, 0.117095947265625, 1.0462570190429688, 0.0904693603515625, 0.7226791381835938, 0.569427490234375, 1.0010452270507812, -0.9219284057617188, -0.002666473388671875, 0.633575439453125, 1.1701889038085938, -0.38274383544921875, 0.09212493896484375, -0.6710052490234375, 0.6372833251953125, -0.5363197326660156, 1.65142822265625, 0.38762664794921875, 1.6963424682617188, 0.9664039611816406, -0.2536144256591797, 0.54949951171875, 0.5790023803710938, 0.29465484619140625, 1.1148948669433594, 0.6942062377929688, 0.817230224609375, 0.28011322021484375, 0.397430419921875, -0.1973419189453125, 0.48415374755859375, -0.0443878173828125, -1.7480316162109375, 0.4789848327636719, -0.24779129028320312, 0.6490859985351562, -0.06227874755859375, -0.3457489013671875, 0.6723480224609375, 0.21924972534179688, -0.35961151123046875, 0.1947479248046875, -0.7868995666503906, -0.4761333465576172, 0.3919792175292969, -0.5626068115234375, 0.1890106201171875, 1.3382034301757812, 0.715087890625, 0.00688934326171875, 0.09583663940429688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000117.npy"} +{"epoch": 0.17687074829931973, "step": 118, "batch_size": 64, "mean": 0.32874205708503723, "std": 0.5993523001670837, "min": -0.8839931488037109, "p10": -0.3976573944091797, "median": 0.27801513671875, "p90": 1.171946716308594, "max": 1.5378875732421875, "pos_frac": 0.734375, "sample": [1.4777984619140625, 0.46530914306640625, 1.338623046875, 0.0116424560546875, -0.8839931488037109, 0.9955215454101562, 0.4934539794921875, 0.2137451171875, 0.5952320098876953, -0.3400764465332031, 0.039947509765625, 0.1567230224609375, -0.23785018920898438, 1.0470619201660156, 0.2606925964355469, 0.080535888671875, -0.74151611328125, 0.35950469970703125, -0.3283271789550781, 0.7423667907714844, 0.01134490966796875, 0.51556396484375, 0.3708362579345703, 1.5378875732421875, 0.1802520751953125, 0.52825927734375, -0.3130035400390625, -0.43000030517578125, 0.2976970672607422, -0.17597198486328125, 1.249114990234375, 0.48233795166015625, 1.1840591430664062, 0.024511337280273438, 0.5391311645507812, -0.347137451171875, 0.04024505615234375, 1.1436843872070312, 0.97198486328125, -0.5923919677734375, -0.4057655334472656, 0.9840850830078125, -0.725921630859375, 0.7064132690429688, 0.21587371826171875, -0.3787384033203125, 0.097991943359375, 0.10291290283203125, 1.3442268371582031, -0.2699127197265625, 0.127655029296875, 0.6314315795898438, 0.397796630859375, 1.2686119079589844, 1.1077651977539062, 0.2953376770019531, -0.282318115234375, 0.7510395050048828, 1.1056327819824219, 0.8566226959228516, 0.4715232849121094, -0.06080436706542969, 0.2467365264892578, -0.5135040283203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000118.npy"} +{"epoch": 0.17838246409674982, "step": 119, "batch_size": 64, "mean": 0.298042356967926, "std": 0.5990038514137268, "min": -1.1134872436523438, "p10": -0.42961444854736325, "median": 0.3050956726074219, "p90": 1.1141204833984377, "max": 1.7352447509765625, "pos_frac": 0.71875, "sample": [0.7489166259765625, 0.8718185424804688, -0.47696876525878906, 0.7876625061035156, 0.159515380859375, 0.21327590942382812, 0.5765018463134766, 0.327178955078125, -0.29298973083496094, 0.3224449157714844, -0.4162883758544922, 0.11229705810546875, 1.2614212036132812, -0.43532562255859375, 0.5518589019775391, 0.0863189697265625, 0.11522674560546875, -1.02099609375, 0.13581085205078125, 0.310333251953125, -0.3129444122314453, 1.2389507293701172, 0.5554046630859375, 0.5606803894042969, 1.033477783203125, -0.33182525634765625, 0.13516998291015625, 1.2756156921386719, 0.8333892822265625, 1.7352447509765625, -0.158233642578125, -0.6799163818359375, 0.4171867370605469, 0.6056289672851562, 0.4975128173828125, 0.8721923828125, 0.4211597442626953, 0.1741180419921875, 0.3259735107421875, 0.6697006225585938, 0.7458877563476562, 0.48665618896484375, -0.38064002990722656, 0.52484130859375, -0.5782928466796875, 0.0436248779296875, 0.7996063232421875, 0.29985809326171875, 1.213348388671875, 0.192291259765625, 1.148681640625, -0.6998443603515625, -1.1134872436523438, -0.2959403991699219, 0.08729934692382812, 0.2083282470703125, 0.8139991760253906, 1.3545379638671875, 1.01287841796875, -0.17046356201171875, 0.2156848907470703, -0.1763763427734375, -0.10386276245117188, -0.36040496826171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000119.npy"} +{"epoch": 0.17989417989417988, "step": 120, "batch_size": 64, "mean": 0.3802502751350403, "std": 0.6218160390853882, "min": -0.8102302551269531, "p10": -0.3646976470947265, "median": 0.3935565948486328, "p90": 1.0239952087402344, "max": 2.036651611328125, "pos_frac": 0.71875, "sample": [-0.2270050048828125, 0.6998176574707031, -0.8102302551269531, 1.036041259765625, 0.1064605712890625, -0.322509765625, 0.675567626953125, 0.32703399658203125, 0.41381072998046875, -0.00420379638671875, 0.428009033203125, -0.11439132690429688, 0.18010520935058594, 0.25055694580078125, 0.3166351318359375, 1.781005859375, 0.5913276672363281, -0.1612262725830078, 0.00135040283203125, 0.9042587280273438, 1.75469970703125, 0.6997146606445312, 0.25008392333984375, 0.40673255920410156, 0.196868896484375, 0.5638847351074219, 0.190521240234375, 1.0735549926757812, 0.7532272338867188, -0.1945953369140625, 0.9013824462890625, -0.7921676635742188, 0.9958877563476562, 0.3443412780761719, 0.5847854614257812, 0.6445503234863281, 0.7689704895019531, -0.28639984130859375, 0.85479736328125, 0.42407989501953125, -0.6058940887451172, -0.1740283966064453, -0.7145233154296875, 2.036651611328125, -0.15666770935058594, 0.5391693115234375, -0.3827781677246094, 0.980499267578125, 0.7965850830078125, 0.393585205078125, 0.7236461639404297, -0.3958301544189453, 0.8405380249023438, 0.5502777099609375, 1.1402454376220703, -0.560546875, 0.053741455078125, 2.0217666625976562, -0.14372825622558594, 0.3935279846191406, 0.1970367431640625, -0.1949787139892578, 0.5608997344970703, 0.22948837280273438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000120.npy"} +{"epoch": 0.18140589569160998, "step": 121, "batch_size": 64, "mean": 0.2976876497268677, "std": 0.631470799446106, "min": -0.959381103515625, "p10": -0.5052448272705078, "median": 0.26710987091064453, "p90": 1.0968513488769531, "max": 2.36053466796875, "pos_frac": 0.6875, "sample": [1.5291748046875, 0.320068359375, -0.5077133178710938, -0.11466598510742188, 0.8241157531738281, 1.142059326171875, 1.0958366394042969, -0.04400634765625, 0.5409736633300781, -0.6924285888671875, 1.2092514038085938, 0.7652740478515625, 1.501129150390625, 0.2795391082763672, 0.2988433837890625, -0.4994850158691406, 0.9417839050292969, -0.959381103515625, 0.5652961730957031, -0.5442733764648438, -0.022125244140625, 0.4215202331542969, 0.7282333374023438, 0.6618270874023438, 0.2342529296875, 0.05825042724609375, 0.30345916748046875, -0.06342887878417969, 0.8089141845703125, 0.5434608459472656, 0.1662139892578125, 2.36053466796875, 1.0972862243652344, 0.07851409912109375, -0.43294525146484375, -0.18894195556640625, 0.368804931640625, 0.5413970947265625, 1.079986572265625, 0.2546806335449219, -0.08553314208984375, 0.4699668884277344, 0.7264862060546875, 0.4167633056640625, -0.22313690185546875, 1.38580322265625, -0.6057395935058594, -0.48038482666015625, 0.8408660888671875, 0.67724609375, -0.26556396484375, -0.6974334716796875, 0.4284172058105469, 0.290679931640625, -0.27878379821777344, 0.07037353515625, 0.12866592407226562, 0.039829254150390625, 0.24567413330078125, -0.810638427734375, -0.2650794982910156, 0.1897735595703125, 0.139739990234375, 0.06273078918457031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000121.npy"} +{"epoch": 0.18291761148904007, "step": 122, "batch_size": 64, "mean": 0.45878836512565613, "std": 0.6656911373138428, "min": -1.1701431274414062, "p10": -0.2021869659423828, "median": 0.35733985900878906, "p90": 1.1794795989990239, "max": 2.4301300048828125, "pos_frac": 0.828125, "sample": [2.4301300048828125, 0.23087310791015625, -0.9349288940429688, 0.6947097778320312, -0.8700103759765625, 0.2466106414794922, 0.980316162109375, 0.7226181030273438, 0.11385917663574219, 0.1347808837890625, 0.566619873046875, -0.40183258056640625, -0.048816680908203125, 0.07441329956054688, 0.14611434936523438, 0.22821044921875, 0.046875, 1.0043182373046875, -0.20457077026367188, 0.7216682434082031, 0.20057106018066406, 2.2223472595214844, 0.3972206115722656, 0.6830253601074219, 1.8689918518066406, 0.4683380126953125, 0.31163597106933594, 0.2855339050292969, 0.27899169921875, 0.22631454467773438, -0.196624755859375, 1.8464889526367188, -0.2270641326904297, 0.3174591064453125, -1.1701431274414062, 0.7573966979980469, 0.15335655212402344, -0.00626373291015625, 1.0768070220947266, 0.7655181884765625, 0.003326416015625, 0.6309432983398438, -0.11960220336914062, 0.848724365234375, 0.264251708984375, 0.8701610565185547, 0.6986846923828125, 0.11235427856445312, 0.7664337158203125, 0.1066131591796875, 0.447418212890625, 0.6396198272705078, 0.0012035369873046875, 0.03009033203125, 0.4202690124511719, 0.5668678283691406, 0.6571807861328125, 0.5047378540039062, 0.7900314331054688, 0.75848388671875, 1.6260452270507812, 1.2234821319580078, -0.30682373046875, 1.6800994873046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000122.npy"} +{"epoch": 0.18442932728647016, "step": 123, "batch_size": 64, "mean": 0.37006592750549316, "std": 0.6625875234603882, "min": -1.5880889892578125, "p10": -0.3777275085449219, "median": 0.4213380813598633, "p90": 1.0507122039794925, "max": 2.4588623046875, "pos_frac": 0.765625, "sample": [0.993560791015625, 0.5733108520507812, 0.6056671142578125, -1.23944091796875, -0.14863204956054688, 0.1544189453125, -0.37828826904296875, -0.251007080078125, 1.4745635986328125, 0.6380271911621094, 0.2256927490234375, 1.1116485595703125, -0.24133682250976562, -0.3764190673828125, 1.494659423828125, 0.9033203125, 0.0489501953125, 0.5165920257568359, -0.4283294677734375, 0.7186737060546875, 0.9267807006835938, 1.1514205932617188, 0.9968605041503906, 0.5790348052978516, 1.2278518676757812, 0.7385406494140625, 0.3883075714111328, 0.24249649047851562, 0.5476531982421875, 0.18927001953125, -0.2393646240234375, 1.07379150390625, -1.1001663208007812, 0.48647499084472656, -0.6210746765136719, -0.5647201538085938, 0.7341766357421875, 0.2568206787109375, 0.02533721923828125, 0.5000553131103516, -0.26059722900390625, 0.9451522827148438, 0.9105911254882812, 0.34893798828125, 0.32685279846191406, 0.6779327392578125, 0.3390350341796875, 0.2163982391357422, -0.0877838134765625, 0.7402057647705078, 0.8347663879394531, 0.7378120422363281, 0.143341064453125, 0.9109725952148438, -1.5880889892578125, 0.34552001953125, 0.008073806762695312, 0.23201370239257812, -0.22638702392578125, 0.45436859130859375, 0.1316356658935547, 0.6442298889160156, 2.4588623046875, 0.5051956176757812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000123.npy"} +{"epoch": 0.18594104308390022, "step": 124, "batch_size": 64, "mean": 0.4275299310684204, "std": 0.6495681405067444, "min": -0.9233322143554688, "p10": -0.34640083312988273, "median": 0.3618202209472656, "p90": 1.1539581298828128, "max": 3.008819580078125, "pos_frac": 0.765625, "sample": [0.08736419677734375, 1.3995513916015625, 1.061126708984375, -0.4635276794433594, 0.31604766845703125, 0.09210968017578125, 0.7190399169921875, 0.6944160461425781, -0.3726043701171875, 0.842803955078125, 0.7919235229492188, 0.9174308776855469, 0.06825447082519531, 1.8995361328125, 0.33318519592285156, -0.11147117614746094, 0.371185302734375, 0.5525484085083008, -0.2042388916015625, -0.39658546447753906, 3.008819580078125, 0.1129608154296875, 1.1906890869140625, -0.7704620361328125, -0.6218223571777344, 0.6126861572265625, 0.3161811828613281, 0.488037109375, 1.4730377197265625, -0.069671630859375, 0.49811744689941406, 0.0157318115234375, 1.0496368408203125, 0.7125244140625, 0.2563972473144531, 0.177825927734375, 1.0682525634765625, -0.9233322143554688, 1.5562362670898438, 0.517059326171875, 0.648162841796875, 1.0658493041992188, -0.0342254638671875, 0.46675872802734375, 0.7955074310302734, 0.32279205322265625, 0.383209228515625, 0.3490753173828125, -0.4846687316894531, 1.2098751068115234, 0.46212005615234375, 0.3203849792480469, -0.09577178955078125, 0.12121200561523438, 0.6637420654296875, -0.0981903076171875, 0.13019180297851562, -0.135009765625, 0.4470672607421875, 0.8060455322265625, -0.2852592468261719, 0.4006919860839844, 0.35245513916015625, 0.28289794921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000124.npy"} +{"epoch": 0.1874527588813303, "step": 125, "batch_size": 64, "mean": 0.3849112391471863, "std": 0.5968617796897888, "min": -1.077545166015625, "p10": -0.32438201904296876, "median": 0.4638204574584961, "p90": 1.0770137786865235, "max": 1.7083206176757812, "pos_frac": 0.703125, "sample": [1.41436767578125, 0.7278671264648438, 0.9789543151855469, 0.2440338134765625, 0.7961654663085938, -0.6412982940673828, 1.7083206176757812, 0.15155982971191406, 0.860992431640625, 0.6567230224609375, 0.838531494140625, 0.5953750610351562, -0.1589508056640625, 0.7308502197265625, 0.17595863342285156, 1.2739906311035156, 1.4849853515625, 0.6641845703125, 0.30841064453125, 0.5135536193847656, 1.0776557922363281, 0.5144767761230469, 0.9423599243164062, 0.45843505859375, 0.8783645629882812, 0.5037631988525391, 1.0755157470703125, -0.22864532470703125, -0.8064498901367188, 0.18590545654296875, 1.1481475830078125, -0.2412261962890625, -0.3242645263671875, 0.7864990234375, 0.7976799011230469, -0.10617828369140625, 0.4692058563232422, 0.847564697265625, 0.4422454833984375, 0.8261566162109375, 0.548431396484375, -0.17646026611328125, 0.1130523681640625, 0.3806495666503906, 0.767974853515625, -1.077545166015625, 0.3871307373046875, 1.3035240173339844, -0.1328716278076172, -0.012327194213867188, -0.204559326171875, 0.5222187042236328, 0.7840423583984375, -0.11950302124023438, -0.324432373046875, -0.113983154296875, 0.3994483947753906, -0.5093460083007812, 0.7331733703613281, -0.20476150512695312, 0.1053314208984375, -0.6365852355957031, -0.7809219360351562, 0.31085205078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000125.npy"} +{"epoch": 0.1889644746787604, "step": 126, "batch_size": 64, "mean": 0.41218748688697815, "std": 0.8345752358436584, "min": -1.1042633056640625, "p10": -0.3823883056640625, "median": 0.21184444427490234, "p90": 1.3521392822265625, "max": 4.71551513671875, "pos_frac": 0.75, "sample": [0.6359958648681641, 0.09058952331542969, 0.9608592987060547, 0.4652690887451172, -0.08454322814941406, 1.8568077087402344, 0.9649810791015625, -0.07123565673828125, 0.26589202880859375, -0.5155792236328125, 0.5500164031982422, 1.869140625, -0.98828125, 0.178192138671875, 1.3742828369140625, 0.09576988220214844, 0.0526275634765625, -0.5564689636230469, -0.1433277130126953, 0.20218658447265625, 0.653167724609375, 1.4439926147460938, 0.0040130615234375, 0.14244461059570312, -0.18715286254882812, -0.38440704345703125, 0.19159698486328125, 0.15442657470703125, 1.0300064086914062, -0.2092609405517578, -0.37767791748046875, 1.1383895874023438, -0.6483173370361328, 0.6813640594482422, 0.22150230407714844, -0.08687591552734375, 0.14152145385742188, 4.71551513671875, 0.47722625732421875, -1.1042633056640625, 0.18947601318359375, 0.7836761474609375, 0.04800987243652344, -0.10366058349609375, 0.383056640625, 0.5161361694335938, 0.4195556640625, 1.3522720336914062, 0.3941497802734375, 1.3895416259765625, 0.6525077819824219, 1.3518295288085938, 0.13683319091796875, 0.4331169128417969, 0.0378875732421875, 0.07623291015625, 0.1750946044921875, 1.0757522583007812, 0.34076690673828125, 0.7677154541015625, 0.4019927978515625, 1.3132705688476562, -0.8389549255371094, -0.11664581298828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000126.npy"} +{"epoch": 0.19047619047619047, "step": 127, "batch_size": 64, "mean": 0.3935753107070923, "std": 0.6686541438102722, "min": -1.65484619140625, "p10": -0.3535421371459961, "median": 0.43803977966308594, "p90": 1.337375640869141, "max": 1.6417903900146484, "pos_frac": 0.75, "sample": [-1.0497894287109375, 0.819061279296875, 0.33754730224609375, 0.4987945556640625, 0.5174179077148438, 0.573089599609375, 0.0818328857421875, 1.6417903900146484, -0.0582427978515625, 0.6417083740234375, 0.8821487426757812, -0.3657379150390625, 0.5488090515136719, 1.398977279663086, 1.143280029296875, 0.6542930603027344, 0.187164306640625, 0.5796012878417969, -0.920257568359375, 0.4014549255371094, 1.2484817504882812, -1.65484619140625, 0.5330429077148438, 1.6377029418945312, 0.6434211730957031, -0.25112152099609375, 0.8553390502929688, -0.0697021484375, -0.8751373291015625, 0.13877105712890625, 0.3480720520019531, 0.5061759948730469, 0.08510017395019531, 0.0757598876953125, -0.0161590576171875, 1.0920085906982422, 0.3580799102783203, 1.1695671081542969, 0.7966499328613281, 1.4658203125, 0.766876220703125, 1.3770675659179688, -0.33827972412109375, -0.38344764709472656, 0.5692062377929688, 0.698089599609375, -0.03429412841796875, -0.24184036254882812, 1.3754730224609375, -0.3498077392578125, 0.4010009765625, 1.5943450927734375, 0.47376251220703125, 0.01776885986328125, 0.46472930908203125, 0.2459869384765625, 0.8448028564453125, 0.01218414306640625, -0.3309173583984375, 1.1257896423339844, 0.21895408630371094, 0.0251922607421875, 0.4113502502441406, -0.35514259338378906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000127.npy"} +{"epoch": 0.19198790627362056, "step": 128, "batch_size": 64, "mean": 0.34676679968833923, "std": 0.6998385190963745, "min": -1.4144744873046875, "p10": -0.43012428283691406, "median": 0.324188232421875, "p90": 1.0599624633789064, "max": 2.3394317626953125, "pos_frac": 0.671875, "sample": [2.3394317626953125, 0.28424072265625, 0.08423995971679688, 0.8178234100341797, 0.9153556823730469, 0.5698928833007812, 0.9638214111328125, 0.191192626953125, 0.5548591613769531, 1.7855072021484375, 0.136016845703125, -0.7325592041015625, -0.04483795166015625, -0.4417228698730469, 0.47752952575683594, -0.2519989013671875, -0.06568717956542969, -1.1776809692382812, -0.172698974609375, 1.5294342041015625, -0.049652099609375, -0.36185646057128906, -0.24127197265625, 1.352569580078125, 1.0882568359375, 0.896484375, -0.19672393798828125, 0.9774932861328125, -0.04714775085449219, 0.5727043151855469, 0.00653076171875, 0.35115814208984375, 0.653717041015625, 0.9463958740234375, 0.3179359436035156, 0.6572647094726562, -0.054443359375, 1.3652420043945312, -0.44716644287109375, -0.01732635498046875, -0.37310028076171875, 0.43699073791503906, 0.38846588134765625, 0.3304405212402344, -0.4030609130859375, 0.12068939208984375, 0.7393264770507812, 0.5792350769042969, 0.74847412109375, -0.44549560546875, 0.8290328979492188, 0.06031036376953125, 2.2348480224609375, -0.728515625, 0.7289295196533203, 0.3424415588378906, 0.1222991943359375, 0.6602668762207031, 0.9939422607421875, 0.2639923095703125, -1.4144744873046875, -0.17041778564453125, 0.2723503112792969, 0.343780517578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000128.npy"} +{"epoch": 0.19349962207105065, "step": 129, "batch_size": 64, "mean": 0.279253751039505, "std": 0.5740676522254944, "min": -1.197418212890625, "p10": -0.3001522064208984, "median": 0.21193313598632812, "p90": 1.1127922058105473, "max": 1.75872802734375, "pos_frac": 0.671875, "sample": [0.217803955078125, -0.06150054931640625, 0.44951820373535156, -0.5469131469726562, -0.04784202575683594, 1.3098258972167969, -0.518585205078125, -0.5045299530029297, -0.43285369873046875, 0.07162857055664062, 0.94384765625, -0.21591758728027344, 0.6932220458984375, -0.253570556640625, -0.18499755859375, -0.31281280517578125, 0.7085800170898438, 0.0185546875, 1.01251220703125, 0.2110748291015625, 0.5121612548828125, 0.5814361572265625, 0.21395111083984375, 0.251861572265625, -0.17556381225585938, 1.75872802734375, 1.1643352508544922, -0.08620452880859375, 0.21477890014648438, 0.91778564453125, 0.9190673828125, 1.1557693481445312, 0.0541534423828125, 0.8600578308105469, 0.205078125, 0.2958030700683594, -0.2112274169921875, 0.176422119140625, 0.6112346649169922, 1.3095722198486328, 0.1877593994140625, -0.1902637481689453, 0.09989166259765625, 0.23404312133789062, 0.07172393798828125, 0.5012149810791016, -0.120880126953125, 1.3117294311523438, -0.0933837890625, 0.35292816162109375, 0.2904396057128906, 0.6395797729492188, 0.9127960205078125, 0.012258529663085938, 0.10146903991699219, -0.20011138916015625, 1.6917190551757812, -1.197418212890625, 0.415802001953125, -0.2706108093261719, 0.21279144287109375, -0.5750732421875, -0.027286529541015625, 0.22487640380859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000129.npy"} +{"epoch": 0.19501133786848074, "step": 130, "batch_size": 64, "mean": 0.3397524058818817, "std": 0.590162456035614, "min": -0.7353973388671875, "p10": -0.35636825561523433, "median": 0.23081684112548828, "p90": 1.1843238830566407, "max": 1.9361457824707031, "pos_frac": 0.640625, "sample": [0.7870330810546875, -0.12342071533203125, 1.5298995971679688, 0.1807403564453125, 0.7225799560546875, 0.88873291015625, 1.4362030029296875, 1.1020278930664062, 0.2800426483154297, -0.28775978088378906, -0.1477508544921875, 0.9040145874023438, 0.019683837890625, -0.06708526611328125, 0.267242431640625, 0.47466278076171875, 0.8045101165771484, -0.02318572998046875, -0.01128387451171875, 1.1935501098632812, 0.8550815582275391, 0.8436050415039062, 1.2650833129882812, 0.5014877319335938, 0.17283248901367188, -0.32460784912109375, 0.600006103515625, 0.237457275390625, -0.5043087005615234, 0.09453964233398438, 1.1627960205078125, 0.56219482421875, 0.0883331298828125, -0.09834098815917969, 0.5382308959960938, 1.9361457824707031, 0.7053756713867188, -0.12772369384765625, 1.2999114990234375, -0.2541847229003906, -0.5330429077148438, 0.6103477478027344, -0.1738433837890625, -0.29422760009765625, 0.7697734832763672, 0.3486442565917969, -0.007488250732421875, -0.7353973388671875, 0.70953369140625, -0.3699798583984375, -0.45671653747558594, 0.48091888427734375, -0.1222991943359375, 0.3355293273925781, -0.48731231689453125, 0.5870323181152344, -0.04974365234375, -0.12210464477539062, -0.4746894836425781, 0.1702880859375, 1.5558013916015625, 0.16840362548828125, 0.22417640686035156, 0.12619781494140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000130.npy"} +{"epoch": 0.1965230536659108, "step": 131, "batch_size": 64, "mean": 0.42993444204330444, "std": 0.7127673625946045, "min": -1.6146697998046875, "p10": -0.38973140716552734, "median": 0.40087890625, "p90": 1.3652608871459961, "max": 1.9514007568359375, "pos_frac": 0.703125, "sample": [-0.3040199279785156, 0.8279781341552734, -0.21094894409179688, -0.2557487487792969, 0.021759033203125, 1.277496337890625, 0.395050048828125, 0.26018333435058594, -0.3803386688232422, 1.3272628784179688, 0.886993408203125, 0.25414466857910156, -0.5368995666503906, -0.11178207397460938, 0.46887969970703125, 0.4637432098388672, -0.6949615478515625, 0.9529876708984375, 1.0852279663085938, 0.174896240234375, -0.199615478515625, -0.064910888671875, 0.341705322265625, 0.9097747802734375, -0.770111083984375, 0.7160873413085938, 0.6460189819335938, -0.7282638549804688, 0.9770164489746094, -0.4813995361328125, 0.19489288330078125, 0.9075927734375, 1.26129150390625, 0.8311309814453125, 0.8792724609375, 0.1549224853515625, 0.027431488037109375, 1.2113037109375, 0.4957122802734375, 1.9514007568359375, 0.2647724151611328, 1.4165267944335938, -0.28837013244628906, 1.5567626953125, 0.550079345703125, -0.3937568664550781, 0.114410400390625, 1.01348876953125, 0.16410064697265625, 1.4093017578125, -0.15690231323242188, -0.1997528076171875, 0.6046886444091797, -0.038204193115234375, -1.6146697998046875, 0.406707763671875, -0.18463134765625, 1.0563125610351562, 1.5835113525390625, 0.06541061401367188, 1.348388671875, 1.3724918365478516, 1.653676986694336, 0.6483039855957031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000131.npy"} +{"epoch": 0.1980347694633409, "step": 132, "batch_size": 64, "mean": 0.36164700984954834, "std": 0.8108624219894409, "min": -2.6451263427734375, "p10": -0.5110649108886718, "median": 0.3364410400390625, "p90": 1.3473690032958985, "max": 2.7406387329101562, "pos_frac": 0.71875, "sample": [1.5824432373046875, 0.050464630126953125, 0.5242385864257812, 1.400390625, 0.24033164978027344, -0.9794197082519531, 0.846282958984375, 0.19556045532226562, 0.3380279541015625, 1.3480682373046875, 1.0757770538330078, 0.9981327056884766, 0.5829811096191406, -0.0514068603515625, -0.7381801605224609, -0.05458831787109375, 0.056224822998046875, 1.0264129638671875, 0.0846099853515625, -0.31087493896484375, -0.05588531494140625, -2.6451263427734375, 0.4744091033935547, 0.13104248046875, -0.025867462158203125, -0.3903350830078125, -0.4470367431640625, 1.1360588073730469, 0.7820816040039062, 0.25435638427734375, 0.5756072998046875, 0.1768169403076172, -1.4327964782714844, 0.19518280029296875, 0.5595626831054688, 0.9158782958984375, 1.6077651977539062, 0.6781139373779297, 0.49715232849121094, 0.3348541259765625, -0.3044910430908203, 2.7406387329101562, 0.47634124755859375, 0.8439788818359375, 0.9838829040527344, 0.5579414367675781, 0.15181732177734375, 0.09297943115234375, 0.19658851623535156, 0.6573867797851562, 1.1405353546142578, -0.5385055541992188, 1.458953857421875, 0.8502540588378906, 1.3457374572753906, 0.9771728515625, -0.2330780029296875, -0.39269065856933594, -0.6087493896484375, 1.5316085815429688, -0.31531524658203125, 0.1458587646484375, 0.48978424072265625, -0.640533447265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000132.npy"} +{"epoch": 0.19954648526077098, "step": 133, "batch_size": 64, "mean": 0.2855341136455536, "std": 0.6469444036483765, "min": -1.269256591796875, "p10": -0.5598152160644532, "median": 0.3115501403808594, "p90": 0.998270416259766, "max": 2.1819992065429688, "pos_frac": 0.6875, "sample": [1.0323715209960938, -0.1331634521484375, -0.48137664794921875, 0.918701171875, 0.10081100463867188, 0.3372802734375, 0.0054473876953125, 0.7390975952148438, -0.2305927276611328, -0.5600967407226562, 0.7144241333007812, 0.7978668212890625, 0.5536575317382812, 0.20411300659179688, -0.15449905395507812, 0.3785552978515625, 0.735748291015625, -0.871826171875, 0.722137451171875, -0.588836669921875, -0.18735504150390625, -0.5591583251953125, -0.4423027038574219, 0.8839797973632812, 0.8835391998291016, -0.19161605834960938, 1.4101715087890625, 0.3758697509765625, 0.0897674560546875, -0.23182296752929688, 0.4584236145019531, 0.21291542053222656, -1.269256591796875, 0.6094722747802734, -0.6687393188476562, 0.4260406494140625, 0.28582000732421875, 0.2735137939453125, 0.8099422454833984, 0.5575408935546875, 1.4188385009765625, 0.6894989013671875, 1.03533935546875, -0.9481983184814453, 0.09911346435546875, 0.7199554443359375, 0.438201904296875, 0.8177032470703125, 0.37574005126953125, 2.1819992065429688, 0.26766204833984375, 0.14220428466796875, -0.0068607330322265625, 1.497161865234375, -0.018833160400390625, 0.12943649291992188, 0.092987060546875, 0.8416824340820312, 0.5780677795410156, -0.2524909973144531, 1.0863571166992188, 0.4608917236328125, -0.3647499084472656, -0.9540901184082031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000133.npy"} +{"epoch": 0.20105820105820105, "step": 134, "batch_size": 64, "mean": 0.40749499201774597, "std": 0.6928680539131165, "min": -2.0683975219726562, "p10": -0.23221473693847655, "median": 0.4144315719604492, "p90": 1.1490798950195313, "max": 3.0536041259765625, "pos_frac": 0.75, "sample": [0.1308441162109375, 0.426971435546875, 0.5491943359375, 0.5767593383789062, -0.20376968383789062, 0.23392105102539062, -0.1514739990234375, 0.9206771850585938, 1.1348419189453125, -0.006591796875, 0.76153564453125, -0.15146636962890625, 1.0627899169921875, 0.5718421936035156, -0.1672229766845703, 0.046680450439453125, 1.1726951599121094, 0.2957611083984375, -0.23766326904296875, 0.7360382080078125, 0.49883079528808594, 0.112274169921875, 0.7265644073486328, 0.4282646179199219, 0.0387725830078125, 1.49810791015625, 0.02777862548828125, -0.37473297119140625, -0.19884681701660156, 0.6409721374511719, -0.3841667175292969, 0.12834739685058594, 1.4316558837890625, -0.2644195556640625, 0.06715011596679688, -0.7755889892578125, 0.6853675842285156, 0.3196525573730469, 0.7383880615234375, 0.2708415985107422, 1.6045074462890625, 0.8742198944091797, 0.1107025146484375, -2.0683975219726562, 0.9937515258789062, -0.4457817077636719, 1.155181884765625, -0.1602783203125, 0.7521209716796875, 0.4946422576904297, 0.15052413940429688, 1.4498138427734375, 0.7423477172851562, 0.9295806884765625, 3.0536041259765625, 0.3428192138671875, -0.21950149536132812, 0.5470390319824219, 0.40189170837402344, 0.5305290222167969, 0.5034637451171875, 1.0780029296875, 0.12371444702148438, -0.18239593505859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000134.npy"} +{"epoch": 0.20256991685563114, "step": 135, "batch_size": 64, "mean": 0.27311572432518005, "std": 0.6529503464698792, "min": -1.1662979125976562, "p10": -0.6845216751098633, "median": 0.26943206787109375, "p90": 1.1475803375244145, "max": 1.9219131469726562, "pos_frac": 0.703125, "sample": [0.5732040405273438, -0.7149085998535156, 1.1833457946777344, 0.04878807067871094, 0.11553764343261719, -0.2913665771484375, 0.5520858764648438, -0.62890625, -0.03687286376953125, 1.4029617309570312, 1.485260009765625, 0.32106781005859375, 0.3249053955078125, 0.09927177429199219, -0.6071853637695312, -0.7782707214355469, 0.640411376953125, 1.180755615234375, -0.3393592834472656, 0.2782135009765625, 0.19295692443847656, -0.2154693603515625, -0.7083568572998047, -0.9077911376953125, 0.4249382019042969, 1.0701713562011719, 0.29325103759765625, -0.7985610961914062, 0.32291412353515625, 0.20333480834960938, 0.12651443481445312, 0.6963920593261719, 0.98834228515625, 0.3214092254638672, 1.9219131469726562, 0.5747814178466797, -0.131744384765625, 0.9601345062255859, 0.7400894165039062, -1.1662979125976562, -0.020227432250976562, 0.9413299560546875, 0.16176795959472656, 0.46410369873046875, -0.21615219116210938, 0.19673919677734375, -0.6165084838867188, -0.8550567626953125, 0.38140869140625, 0.1054534912109375, 1.0309333801269531, 0.13616943359375, 0.38165283203125, 0.495513916015625, 1.5175552368164062, 1.2422218322753906, -0.17112159729003906, 0.7246417999267578, -0.04469490051269531, 0.4429588317871094, 0.01068115234375, 0.260650634765625, 0.9604911804199219, 0.2310333251953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000135.npy"} +{"epoch": 0.20408163265306123, "step": 136, "batch_size": 64, "mean": 0.34291741251945496, "std": 0.9207788705825806, "min": -2.309051513671875, "p10": -0.8172271728515624, "median": 0.3679847717285156, "p90": 1.4168792724609376, "max": 3.2241668701171875, "pos_frac": 0.703125, "sample": [3.2241668701171875, 0.9277839660644531, -0.9907970428466797, -0.2690277099609375, -0.4213409423828125, 0.5429458618164062, 0.22504425048828125, 0.12207794189453125, -0.13419342041015625, 0.3960227966308594, 0.28157806396484375, -1.5661163330078125, -0.15845489501953125, 0.49610137939453125, 0.421417236328125, 0.66436767578125, 1.4438362121582031, 0.09845733642578125, 0.17014694213867188, -0.1533355712890625, 1.0847320556640625, 0.14547348022460938, 0.42958831787109375, 1.1920394897460938, -0.72479248046875, 0.22487640380859375, 1.4966964721679688, 0.2352447509765625, 0.9818038940429688, 1.80670166015625, -0.039093017578125, 0.07407379150390625, 0.7314300537109375, 0.8680648803710938, 0.252838134765625, -2.309051513671875, 0.42159271240234375, 0.6327934265136719, -0.20188140869140625, 0.35512542724609375, -1.0616493225097656, 0.387176513671875, 0.80517578125, 0.6731033325195312, 0.039958953857421875, 1.0355377197265625, 0.4802055358886719, -0.184722900390625, -1.0995025634765625, 3.1523056030273438, 0.6655826568603516, -0.9078140258789062, -0.44695281982421875, 0.5581817626953125, -0.2970123291015625, 1.2746124267578125, 0.3808441162109375, 0.1007080078125, 1.41888427734375, 1.4938983917236328, 1.412200927734375, 0.40320587158203125, -0.4593086242675781, -0.856842041015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000136.npy"} +{"epoch": 0.20559334845049132, "step": 137, "batch_size": 64, "mean": 0.3818773031234741, "std": 0.6908769607543945, "min": -1.308929443359375, "p10": -0.46519470214843744, "median": 0.3689231872558594, "p90": 1.3200531005859384, "max": 2.1752777099609375, "pos_frac": 0.65625, "sample": [-0.150665283203125, -1.308929443359375, 0.7318191528320312, 1.0847625732421875, 0.8387660980224609, 1.4056320190429688, 0.9335212707519531, 1.48675537109375, -0.5755691528320312, 0.993408203125, 0.6751174926757812, 0.9287643432617188, 1.0106773376464844, 1.549102783203125, 0.5998153686523438, 0.7439804077148438, 0.515106201171875, 0.2259674072265625, 0.504180908203125, 0.2255706787109375, -0.5589809417724609, 0.179779052734375, 0.3856964111328125, -0.341522216796875, -0.138214111328125, 0.8917236328125, 0.7253265380859375, 2.1752777099609375, -0.2552757263183594, -0.1899871826171875, -0.8115692138671875, 0.5183944702148438, 0.26338958740234375, 0.2662506103515625, 1.810943603515625, -0.03994178771972656, 0.08534622192382812, -0.42925262451171875, 0.16605377197265625, -0.3050556182861328, -0.0977630615234375, -0.0204925537109375, 0.6183204650878906, 0.04656982421875, 0.6015586853027344, -0.2849006652832031, 0.35214996337890625, 1.1203689575195312, 0.472869873046875, 0.8064422607421875, -0.18514633178710938, -0.08765411376953125, -0.05945014953613281, -0.48059844970703125, 0.6269550323486328, 0.8037548065185547, 0.3320026397705078, -0.5988845825195312, -0.8116531372070312, 0.5247344970703125, -0.08625411987304688, 0.86187744140625, 1.421630859375, 1.7475433349609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000137.npy"} +{"epoch": 0.20710506424792138, "step": 138, "batch_size": 64, "mean": 0.3041801452636719, "std": 0.672796905040741, "min": -1.3795013427734375, "p10": -0.5066736221313476, "median": 0.3319587707519531, "p90": 1.0726051330566408, "max": 1.9992828369140625, "pos_frac": 0.703125, "sample": [-0.38404083251953125, 0.44589996337890625, 0.8336124420166016, 0.99652099609375, 0.30460357666015625, 0.037250518798828125, 1.3243179321289062, 1.0450210571289062, -0.54248046875, 0.1367511749267578, 0.21253585815429688, 0.2373199462890625, 1.358123779296875, 0.17624664306640625, 1.9992828369140625, -1.3795013427734375, -0.738372802734375, 0.6018314361572266, -0.16159820556640625, 0.7238273620605469, -0.1234130859375, 0.6091079711914062, 0.3054656982421875, 0.5989036560058594, 0.6199111938476562, 0.3676109313964844, 0.7571029663085938, 0.9415321350097656, 0.4794807434082031, -0.024286270141601562, 0.8117399215698242, -0.12819480895996094, 1.3938331604003906, -0.17293548583984375, -0.4231243133544922, 0.7582950592041016, -0.4143638610839844, -1.0369949340820312, 0.08485984802246094, 1.5731391906738281, 0.37972259521484375, 0.772674560546875, 0.3624267578125, 0.37673187255859375, 0.21840286254882812, -0.9854583740234375, 1.4051990509033203, 0.25504302978515625, 0.5114212036132812, -0.042816162109375, 0.4449920654296875, 0.8833084106445312, 1.0844268798828125, -0.6004142761230469, -1.2943572998046875, 0.0058956146240234375, -0.32969093322753906, 0.2160511016845703, 0.13981056213378906, 0.35845184326171875, 0.94427490234375, -0.17841720581054688, 0.54248046875, -0.20745086669921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000138.npy"} +{"epoch": 0.20861678004535147, "step": 139, "batch_size": 64, "mean": 0.44225820899009705, "std": 0.6696040034294128, "min": -1.0201568603515625, "p10": -0.4320068359375, "median": 0.36556148529052734, "p90": 1.3988590240478518, "max": 1.7212066650390625, "pos_frac": 0.734375, "sample": [0.5807037353515625, 1.331460952758789, 0.15401458740234375, -0.7347335815429688, -1.0201568603515625, 1.1394157409667969, -0.08159637451171875, 1.7212066650390625, -0.09015274047851562, 0.5019931793212891, 0.7117652893066406, 1.1441173553466797, 1.1689815521240234, -0.4598350524902344, 1.5573883056640625, 1.4922332763671875, 1.6370468139648438, 0.2003631591796875, -0.32913970947265625, 0.8300590515136719, -0.4342041015625, 0.260650634765625, -0.25240325927734375, 0.10383033752441406, 0.5926170349121094, 0.3704185485839844, 0.939910888671875, 0.14813232421875, 0.5888786315917969, 0.469512939453125, 0.04791259765625, 1.249420166015625, 0.07781600952148438, 0.163909912109375, -0.08351898193359375, 0.8989715576171875, -0.4268798828125, 0.7138175964355469, 1.0032958984375, 0.5097770690917969, 1.1754417419433594, 0.3750419616699219, 1.3677635192871094, -0.018308639526367188, -0.035312652587890625, 0.0284423828125, 0.144561767578125, 0.2558441162109375, 1.4121856689453125, 0.08994674682617188, 1.7105712890625, -0.6798171997070312, 0.5055770874023438, -0.515625, -0.15167999267578125, -0.13895797729492188, 0.3607044219970703, 0.47124481201171875, -0.5144309997558594, 0.9075164794921875, 1.209930419921875, 1.5189056396484375, 0.1519622802734375, 0.2760162353515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000139.npy"} +{"epoch": 0.21012849584278157, "step": 140, "batch_size": 64, "mean": 0.5633726119995117, "std": 0.8737393021583557, "min": -2.6202850341796875, "p10": -0.48595027923583983, "median": 0.6503925323486328, "p90": 1.6057073593139655, "max": 2.429290771484375, "pos_frac": 0.734375, "sample": [2.063201904296875, 0.364227294921875, -0.10574722290039062, -0.45806884765625, 0.2570953369140625, 0.5837821960449219, -0.7111663818359375, -0.8861923217773438, 1.0561199188232422, 2.2010726928710938, -2.6202850341796875, 0.855560302734375, 1.0302352905273438, 1.216552734375, 0.7397842407226562, 0.8430938720703125, -0.48049354553222656, 0.8650436401367188, -0.6522903442382812, 0.9606742858886719, 0.15503311157226562, 0.5311126708984375, 0.6675148010253906, 0.7516098022460938, -0.062427520751953125, 1.065521240234375, 0.633270263671875, 1.6654529571533203, 0.09755706787109375, -0.6270065307617188, 0.417022705078125, -0.20557022094726562, 1.1074390411376953, 0.5018234252929688, -0.32486724853515625, 0.32089996337890625, 2.2220115661621094, 1.137969970703125, 1.1906204223632812, 1.227447509765625, 0.7924880981445312, 1.3105430603027344, 0.9024429321289062, 0.8204555511474609, 0.5540390014648438, -0.3775672912597656, 1.971771240234375, 0.7029457092285156, 1.9654922485351562, -0.20664596557617188, -0.7778892517089844, 0.8123798370361328, -0.036014556884765625, 0.4639606475830078, 2.429290771484375, 0.48882293701171875, 1.4663009643554688, 0.21259307861328125, 0.8764114379882812, 1.0800323486328125, 0.4574775695800781, 1.1424064636230469, -0.10223579406738281, -0.48828887939453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000140.npy"} +{"epoch": 0.21164021164021163, "step": 141, "batch_size": 64, "mean": 0.38484299182891846, "std": 0.7617892622947693, "min": -1.241943359375, "p10": -0.4425479888916015, "median": 0.2058277130126953, "p90": 1.4157575607299804, "max": 2.408418655395508, "pos_frac": 0.671875, "sample": [-1.241943359375, 0.47924041748046875, 1.58526611328125, -0.4028434753417969, -0.30506134033203125, -0.16475677490234375, 0.20306396484375, 0.77545166015625, -0.06515884399414062, -0.4877433776855469, 0.916900634765625, 0.4209098815917969, 1.4160785675048828, -0.36496925354003906, 1.2930870056152344, 0.20859146118164062, 1.7520904541015625, -0.03643035888671875, 0.07792282104492188, -0.6719512939453125, 0.14766693115234375, 0.18890762329101562, -0.25433349609375, 0.8659934997558594, 0.508056640625, 0.11669921875, 0.22723388671875, 0.7503204345703125, -0.0626220703125, 0.684539794921875, -1.0175933837890625, -0.8443679809570312, 0.16337203979492188, -0.08588409423828125, 0.7989387512207031, 0.3067054748535156, 0.9982643127441406, 0.33487701416015625, 2.408418655395508, 1.054107666015625, 1.850921630859375, 0.9082698822021484, 0.17438125610351562, 1.1952438354492188, 0.6306304931640625, -0.16754913330078125, 0.10974884033203125, 1.0071334838867188, 1.502431869506836, 1.415008544921875, -0.9776954650878906, -0.09758758544921875, 0.10653495788574219, 1.262237548828125, 0.6355438232421875, 1.912506103515625, -0.21306800842285156, 1.0922489166259766, 0.07275390625, 0.561859130859375, -0.3417816162109375, 0.01619720458984375, -0.459564208984375, -0.243499755859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000141.npy"} +{"epoch": 0.21315192743764172, "step": 142, "batch_size": 64, "mean": 0.2886812686920166, "std": 0.7227985858917236, "min": -1.3119697570800781, "p10": -0.6092369079589843, "median": 0.22807693481445312, "p90": 1.2796287536621096, "max": 2.2929534912109375, "pos_frac": 0.671875, "sample": [-0.2747039794921875, -1.1679916381835938, 0.1409015655517578, 0.8735275268554688, 0.7799072265625, 1.3021163940429688, 0.225433349609375, 0.23771286010742188, 0.6242141723632812, -0.3731422424316406, 0.5900726318359375, -0.29390716552734375, 0.4456615447998047, -0.4433135986328125, -0.0169219970703125, 0.2798919677734375, 0.8878707885742188, -1.3119697570800781, 0.16477394104003906, 0.09611320495605469, 0.44139862060546875, 2.2929534912109375, 0.5516128540039062, 0.4361915588378906, 0.6618118286132812, 0.21833038330078125, -0.8525848388671875, 0.9704666137695312, 1.8271560668945312, -0.00296783447265625, -0.0927276611328125, 0.07904052734375, 1.0680389404296875, 2.0796356201171875, -0.26763916015625, -0.62860107421875, 0.024959564208984375, -0.27422523498535156, -0.8095550537109375, 0.23072052001953125, 1.5040054321289062, 1.2271575927734375, 0.39585113525390625, 0.25722503662109375, -0.011688232421875, 0.5568637847900391, -0.6454544067382812, 0.085174560546875, -0.17630767822265625, -0.1641998291015625, -0.6208114624023438, -0.4312152862548828, 0.22113037109375, 0.521270751953125, 0.030971527099609375, 0.2386474609375, 1.3559951782226562, 0.507049560546875, 1.4241180419921875, -0.5822296142578125, 0.3419647216796875, 0.08008766174316406, 1.21844482421875, 0.42128753662109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000142.npy"} +{"epoch": 0.2146636432350718, "step": 143, "batch_size": 64, "mean": 0.4295256435871124, "std": 0.675200343132019, "min": -1.3718147277832031, "p10": -0.33894348144531244, "median": 0.4568920135498047, "p90": 1.2652145385742188, "max": 2.023040771484375, "pos_frac": 0.765625, "sample": [0.16014862060546875, 0.518402099609375, 0.21990585327148438, 0.5685539245605469, 0.4882049560546875, 0.21478271484375, 0.5025863647460938, -0.3614311218261719, 1.117959976196289, -0.5107307434082031, 1.444997787475586, 0.0616912841796875, 2.023040771484375, 0.26587677001953125, 0.575225830078125, 0.832061767578125, -1.3718147277832031, 0.4246673583984375, 0.4300994873046875, 0.1742095947265625, 1.2786865234375, 0.4836845397949219, 1.89593505859375, 0.8342666625976562, 0.5771999359130859, 0.6587448120117188, 1.0007171630859375, -0.550811767578125, 0.08890914916992188, 0.3117408752441406, -0.02440643310546875, 0.12087821960449219, 1.2337799072265625, 1.4900054931640625, 1.5805549621582031, -0.8597335815429688, -0.18267822265625, -0.05002403259277344, 0.8041610717773438, 0.6771392822265625, 0.9193763732910156, 0.3778533935546875, 1.0051727294921875, 0.798095703125, 0.07395744323730469, -0.218994140625, -0.03929901123046875, 0.9539794921875, 0.22323989868164062, 0.75653076171875, -0.6178741455078125, 0.39838600158691406, 0.29266357421875, 1.9738082885742188, -1.0937347412109375, 0.4876270294189453, 0.5317611694335938, -0.2603302001953125, 0.9238739013671875, 0.5981235504150391, 0.4989166259765625, -0.11754608154296875, -0.2864723205566406, 0.16333770751953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000143.npy"} +{"epoch": 0.2161753590325019, "step": 144, "batch_size": 64, "mean": 0.4165222942829132, "std": 0.7656675577163696, "min": -1.355133056640625, "p10": -0.40734157562255857, "median": 0.4372978210449219, "p90": 1.3449628829956055, "max": 2.7715301513671875, "pos_frac": 0.6875, "sample": [1.3578357696533203, 1.4197616577148438, -0.0819854736328125, -0.17484474182128906, -0.5118522644042969, 0.48519325256347656, -0.3711872100830078, 0.8393173217773438, 0.4772167205810547, 0.1353626251220703, 0.5091476440429688, 0.01822662353515625, 0.5413665771484375, 0.8620738983154297, 0.25788116455078125, -0.18182373046875, 0.4948883056640625, 0.5717239379882812, -0.28743743896484375, 0.6337890625, 0.698883056640625, 1.2570152282714844, -0.43856048583984375, -0.05194854736328125, -0.06138038635253906, -0.338623046875, 0.8348236083984375, 1.1176834106445312, -0.22366905212402344, -0.06104278564453125, 0.1991119384765625, 0.45122528076171875, -0.4228363037109375, 0.0888671875, 0.423370361328125, -0.229705810546875, 0.06893539428710938, 0.9938125610351562, 2.7715301513671875, 1.3149261474609375, 1.521148681640625, 1.0871353149414062, -0.2693061828613281, 1.1404266357421875, 0.0663299560546875, 1.100982666015625, 0.18315887451171875, -0.2662200927734375, 1.1623687744140625, -0.7487030029296875, 0.2216472625732422, -0.9930419921875, 0.12758636474609375, 1.125274658203125, -1.355133056640625, 1.6160449981689453, 1.9598922729492188, 1.8867340087890625, 0.6977157592773438, -1.1066131591796875, 0.6409378051757812, 0.5431423187255859, 0.21356773376464844, 0.7152786254882812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000144.npy"} +{"epoch": 0.21768707482993196, "step": 145, "batch_size": 64, "mean": 0.3303707540035248, "std": 0.8115813732147217, "min": -1.6212921142578125, "p10": -0.577635383605957, "median": 0.2301616668701172, "p90": 1.42393798828125, "max": 2.0545806884765625, "pos_frac": 0.65625, "sample": [0.08780670166015625, 1.119964599609375, -0.5757808685302734, -0.6741180419921875, 0.8190116882324219, -1.6212921142578125, 0.8386688232421875, 0.03556060791015625, 1.5433998107910156, 0.08245849609375, 0.19833946228027344, 1.8693923950195312, -0.15866851806640625, 0.5234222412109375, 0.10911941528320312, 0.47714996337890625, -0.11882781982421875, 1.0188064575195312, -0.57843017578125, -0.08784866333007812, 0.22532272338867188, -0.2146320343017578, -0.686920166015625, 1.291290283203125, 1.5652275085449219, -0.56011962890625, 0.4124755859375, 0.76190185546875, 1.41693115234375, 0.043369293212890625, 2.0545806884765625, 1.1112060546875, 1.2926139831542969, 1.6196517944335938, -1.337615966796875, 1.4370975494384766, 0.40123558044433594, 1.249359130859375, 0.6124420166015625, 0.8233871459960938, -1.2913818359375, 0.5254497528076172, 1.3766326904296875, 0.7764930725097656, 0.0880126953125, -1.2922210693359375, 0.3402252197265625, 0.02828216552734375, -0.17704010009765625, 0.02327728271484375, 0.2350006103515625, 0.6135482788085938, 0.8723087310791016, -0.4102668762207031, 0.8557510375976562, 1.42694091796875, -0.0753631591796875, -0.11437225341796875, -0.318939208984375, -0.0946807861328125, 0.3486480712890625, -0.5642318725585938, -0.33251953125, -0.12276458740234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000145.npy"} +{"epoch": 0.21919879062736206, "step": 146, "batch_size": 64, "mean": 0.40906035900115967, "std": 0.7509440779685974, "min": -1.2055511474609375, "p10": -0.4911735534667968, "median": 0.4195365905761719, "p90": 1.1786262512207033, "max": 2.9422378540039062, "pos_frac": 0.75, "sample": [0.5446624755859375, 0.5011997222900391, 0.04285430908203125, -0.06600189208984375, 0.44792938232421875, -0.72027587890625, 0.21779632568359375, -0.1862945556640625, 0.2500419616699219, 0.8218879699707031, -1.2055511474609375, -0.5621719360351562, 1.265777587890625, 0.7861232757568359, -0.41449737548828125, -0.2964630126953125, 0.996917724609375, -0.9557266235351562, 1.1162109375, 0.33927154541015625, 0.25128173828125, 1.2858200073242188, 0.982269287109375, 0.5697002410888672, -0.9082622528076172, -0.29741668701171875, 0.5595321655273438, 0.8507156372070312, -0.44747161865234375, 0.631072998046875, 0.4696083068847656, -0.8599472045898438, 2.8015594482421875, 0.13063812255859375, -0.38033485412597656, 0.2906341552734375, 0.6047515869140625, 0.391143798828125, 0.7816314697265625, -0.05342292785644531, 1.4259796142578125, 0.9154644012451172, 0.152252197265625, 1.2053756713867188, 1.3489456176757812, 1.0201263427734375, 0.5769367218017578, 0.44928932189941406, 1.003662109375, 0.5078754425048828, 0.2890186309814453, 0.90264892578125, 0.1821002960205078, 0.8228988647460938, -0.5099029541015625, -0.1524658203125, 2.9422378540039062, 0.0658721923828125, 0.23427200317382812, 0.2054901123046875, 0.7589035034179688, 0.003795623779296875, 0.1627960205078125, 1.0890960693359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000146.npy"} +{"epoch": 0.22071050642479215, "step": 147, "batch_size": 64, "mean": 0.29151687026023865, "std": 0.766757071018219, "min": -2.01080322265625, "p10": -0.6635200500488281, "median": 0.4403076171875, "p90": 1.3034889221191412, "max": 2.291412353515625, "pos_frac": 0.625, "sample": [0.5603580474853516, -0.06939888000488281, 0.059295654296875, 0.5645828247070312, 1.175811767578125, 0.9124221801757812, -0.22779083251953125, 0.7072601318359375, -0.237274169921875, -0.20826339721679688, 0.6372833251953125, 0.5422706604003906, 2.291412353515625, 1.064971923828125, -0.6043624877929688, 1.0087089538574219, 1.5904350280761719, -2.01080322265625, 0.9942035675048828, -0.12464141845703125, 0.5229873657226562, -0.8540534973144531, 0.54034423828125, 1.3862762451171875, -0.095794677734375, 1.4201812744140625, -0.5154571533203125, 0.8065109252929688, 0.45676422119140625, 0.23862457275390625, 0.06385993957519531, 0.3333282470703125, -0.1951904296875, -0.7565574645996094, -0.8068618774414062, 0.9331207275390625, 0.42385101318359375, 1.5039100646972656, 1.4251022338867188, -0.5467262268066406, -0.688873291015625, -0.29405975341796875, 0.530364990234375, 0.5120601654052734, 0.8416233062744141, -0.4093017578125, 1.3582077026367188, -0.3221282958984375, 0.6293716430664062, 0.10834503173828125, 0.5673370361328125, 0.6139297485351562, 0.18621826171875, -0.4171333312988281, -0.5327301025390625, 0.9888153076171875, 0.7182636260986328, -0.012966156005859375, 0.509857177734375, -1.113739013671875, -0.9748039245605469, 0.38111114501953125, 0.6653022766113281, -0.09869384765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000147.npy"} +{"epoch": 0.2222222222222222, "step": 148, "batch_size": 64, "mean": 0.488546222448349, "std": 0.7801957130432129, "min": -1.1748580932617188, "p10": -0.34862327575683594, "median": 0.3854217529296875, "p90": 1.5256111145019533, "max": 2.8504486083984375, "pos_frac": 0.796875, "sample": [2.0273590087890625, -0.7066154479980469, 0.3705177307128906, -0.36570167541503906, 0.8436965942382812, 2.8504486083984375, 0.6900711059570312, -0.7752265930175781, 0.42694091796875, 0.8117218017578125, -0.28826904296875, 0.7886772155761719, 0.5607833862304688, 0.5595436096191406, 1.0006446838378906, 0.05039215087890625, 0.014293670654296875, 2.1511993408203125, 0.9262619018554688, 0.4207572937011719, -0.3304786682128906, -0.32311248779296875, 0.8551368713378906, 0.379638671875, 1.5838642120361328, 0.391204833984375, 0.7248992919921875, 0.8639297485351562, 2.518585205078125, 0.20583343505859375, 0.06290817260742188, 0.11865234375, 0.7533721923828125, -1.1748580932617188, 0.7212982177734375, 1.0193023681640625, 0.11284637451171875, 0.5744705200195312, -0.8939132690429688, 0.03927421569824219, 0.05393218994140625, 0.62310791015625, 1.5526809692382812, 2.2211761474609375, -0.42812347412109375, 0.8374423980712891, 0.9809379577636719, 0.9400253295898438, 0.43488311767578125, 0.27008819580078125, -0.07015609741210938, 1.0988006591796875, 0.2710094451904297, 0.1413116455078125, -0.019533157348632812, 1.4624481201171875, 0.18768310546875, 0.2286376953125, 0.16997337341308594, -0.3563995361328125, 0.09674835205078125, 0.1836700439453125, 0.0649566650390625, -0.2386932373046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000148.npy"} +{"epoch": 0.2237339380196523, "step": 149, "batch_size": 64, "mean": 0.4273618459701538, "std": 0.7190922498703003, "min": -1.045623779296875, "p10": -0.5037553787231446, "median": 0.2572669982910156, "p90": 1.4790479660034186, "max": 2.024749755859375, "pos_frac": 0.703125, "sample": [0.17613983154296875, 0.1095123291015625, 1.3280029296875, -0.04662322998046875, 0.86114501953125, 0.1277923583984375, 0.13211441040039062, 0.7602863311767578, 0.5072288513183594, 0.9840717315673828, -0.31842803955078125, 0.2479705810546875, 0.3306770324707031, 0.2565765380859375, 1.3328704833984375, 0.6354866027832031, 0.27285003662109375, -0.1095733642578125, 0.3262214660644531, -0.524627685546875, -0.05857658386230469, 1.2682723999023438, 1.3470649719238281, -0.61651611328125, 1.64398193359375, 1.0054378509521484, -0.009113311767578125, -0.5077419281005859, -0.09940528869628906, -1.045623779296875, -0.2559795379638672, 1.1531600952148438, -0.5902786254882812, 1.5356121063232422, 0.43462181091308594, 0.7759380340576172, 0.7929553985595703, -0.49445343017578125, 1.554534912109375, 0.09231948852539062, -0.9537353515625, 0.075225830078125, -5.7220458984375e-05, 1.613250732421875, 2.024749755859375, 1.306875228881836, -0.45137786865234375, 0.6279373168945312, 0.2400188446044922, 0.25795745849609375, 0.0608978271484375, -0.0054168701171875, 0.1767578125, -0.10089111328125, 0.03579902648925781, 1.1227340698242188, 0.9995574951171875, 1.8102874755859375, 0.9656982421875, 0.21878814697265625, 0.41368865966796875, 0.3967742919921875, -0.5569496154785156, 1.756683349609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000149.npy"} +{"epoch": 0.2252456538170824, "step": 150, "batch_size": 64, "mean": 0.5113657712936401, "std": 0.7018128633499146, "min": -1.310638427734375, "p10": -0.44521541595458985, "median": 0.5271568298339844, "p90": 1.3869216918945313, "max": 2.0937652587890625, "pos_frac": 0.78125, "sample": [0.7911605834960938, -0.03900337219238281, 0.99951171875, 0.5916366577148438, -1.310638427734375, 0.093475341796875, 0.19231224060058594, 1.3721923828125, 0.6807861328125, 1.3060550689697266, 0.1369171142578125, 0.41208648681640625, 0.523529052734375, 1.422119140625, -0.4338092803955078, 0.14304351806640625, 0.5667190551757812, 1.8658447265625, 1.337615966796875, 0.9957122802734375, 1.2591018676757812, 0.3150177001953125, -0.2940521240234375, -1.0332183837890625, 0.855499267578125, 1.0701007843017578, 0.7013359069824219, 0.48381996154785156, -0.2599639892578125, 0.9895458221435547, 0.8320999145507812, -0.450103759765625, 0.4251213073730469, 0.4301300048828125, 2.0937652587890625, -0.4681396484375, 0.5736370086669922, 0.4163532257080078, 0.13513946533203125, 1.3932342529296875, 0.85382080078125, 1.9424514770507812, -0.0939788818359375, 0.3496856689453125, 1.0063629150390625, 0.541717529296875, 0.6845130920410156, 1.0218009948730469, 0.20787811279296875, 1.441619873046875, -0.7903671264648438, 0.3906707763671875, 0.9776687622070312, 0.3470497131347656, 0.26130104064941406, 0.8843994140625, -0.10686492919921875, -0.46819305419921875, 1.5433483123779297, -0.738677978515625, -0.24388885498046875, 0.651397705078125, 0.5307846069335938, 0.4172210693359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000150.npy"} +{"epoch": 0.22675736961451248, "step": 151, "batch_size": 64, "mean": 0.5049391388893127, "std": 0.7309455275535583, "min": -0.96026611328125, "p10": -0.4061086654663086, "median": 0.5584821701049805, "p90": 1.4378181457519532, "max": 2.6263885498046875, "pos_frac": 0.765625, "sample": [0.90594482421875, 0.2616405487060547, 0.9373035430908203, 0.019290924072265625, 0.07422828674316406, 0.002216339111328125, 2.12921142578125, -0.07760238647460938, 1.0583343505859375, -0.22504234313964844, 0.5868377685546875, 0.9309425354003906, 1.441619873046875, 0.5779876708984375, 1.125509262084961, 0.5389766693115234, 0.9697990417480469, 0.03376960754394531, 0.2735557556152344, 0.1606159210205078, -0.3504638671875, -0.46610450744628906, 0.4545440673828125, -0.8571815490722656, 2.6263885498046875, -0.9384078979492188, 0.6365203857421875, 0.32085418701171875, -0.4214363098144531, -0.96026611328125, 0.449615478515625, -0.4975128173828125, 1.697662353515625, 0.7612762451171875, -0.1278839111328125, -0.24889755249023438, 0.142791748046875, 1.0631866455078125, -0.5802383422851562, 1.55615234375, 0.32914161682128906, 0.6108283996582031, 0.6967430114746094, 0.09324455261230469, 1.4289474487304688, 0.7454833984375, 0.9604263305664062, 1.8141632080078125, 0.8273849487304688, 0.6447105407714844, 0.40090179443359375, 0.37969207763671875, 0.5956401824951172, 1.3987960815429688, -0.3703441619873047, 0.7628498077392578, 0.8206577301025391, 0.7976417541503906, -0.26161956787109375, -0.06396484375, 1.5791549682617188, 1.0403213500976562, 0.07374763488769531, 1.02581787109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000151.npy"} +{"epoch": 0.22826908541194255, "step": 152, "batch_size": 64, "mean": 0.5341184139251709, "std": 0.6797597408294678, "min": -0.7184371948242188, "p10": -0.4177883148193358, "median": 0.4642505645751953, "p90": 1.4713575363159181, "max": 2.2644195556640625, "pos_frac": 0.78125, "sample": [-0.2508888244628906, 0.52227783203125, 1.044820785522461, 0.38884735107421875, -0.03862762451171875, -0.4818572998046875, 1.5396347045898438, 0.9246292114257812, 0.38434600830078125, -0.6074981689453125, 0.3833446502685547, 0.25527191162109375, 0.502410888671875, 0.5432586669921875, 0.25543975830078125, 1.2765274047851562, 0.1308307647705078, 0.4537696838378906, 0.4015941619873047, -0.47316741943359375, -0.7184371948242188, 0.9006462097167969, 0.24463653564453125, 0.847686767578125, -0.21429061889648438, 0.09401702880859375, 1.2443580627441406, 0.8519515991210938, 0.3004894256591797, 0.1768951416015625, 0.24364852905273438, 1.4079608917236328, 0.48291778564453125, 0.55224609375, 0.0143585205078125, 0.82733154296875, 1.1574268341064453, 0.4747314453125, 0.711151123046875, 1.6003570556640625, -0.49030303955078125, 1.3486766815185547, 1.117645263671875, -0.24388885498046875, 1.7698135375976562, 1.4985275268554688, 0.9912033081054688, -0.13949203491210938, 0.9800796508789062, 0.6924285888671875, -0.6617431640625, 0.989013671875, 0.17074966430664062, 1.5863609313964844, 0.6487464904785156, 0.2602081298828125, 2.2644195556640625, 1.330352783203125, -0.49249267578125, 1.9124832153320312, 0.447113037109375, -0.2885704040527344, -0.059234619140625, 0.19643402099609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000152.npy"} +{"epoch": 0.22978080120937264, "step": 153, "batch_size": 64, "mean": 0.4024793207645416, "std": 0.8013908267021179, "min": -1.449737548828125, "p10": -0.6285530090332031, "median": 0.3465995788574219, "p90": 1.4635604858398439, "max": 2.0971450805664062, "pos_frac": 0.6875, "sample": [0.2937507629394531, 1.6597824096679688, 0.8345184326171875, 1.7341461181640625, 0.13077163696289062, 1.1064682006835938, 1.4772109985351562, 0.014728546142578125, 0.5248184204101562, -0.01419830322265625, 0.39008140563964844, 1.3011474609375, -0.7042007446289062, 0.9808235168457031, 0.7430419921875, -0.6549453735351562, 1.1737289428710938, -0.8190078735351562, -1.1465911865234375, 1.4313812255859375, 2.0971450805664062, 0.3422431945800781, -0.7616653442382812, 0.40074920654296875, 1.5504302978515625, -0.1671924591064453, 1.0775794982910156, 0.3509559631347656, -0.02375030517578125, 1.4031181335449219, -0.5669708251953125, 0.3814506530761719, -0.5241012573242188, 0.2763862609863281, -0.45954132080078125, -0.04503440856933594, 1.73095703125, 1.4317092895507812, 1.4090576171875, 0.4233245849609375, 0.8786468505859375, -0.500457763671875, 0.376312255859375, 1.6414451599121094, 0.615509033203125, 0.01171112060546875, 0.72601318359375, 1.2216339111328125, -0.3959808349609375, -0.19048690795898438, 0.31982421875, 0.12271881103515625, 0.316314697265625, -0.0393218994140625, 0.9520034790039062, -1.0599594116210938, 0.09764862060546875, 1.0408649444580078, -1.449737548828125, 0.33477783203125, -0.020061492919921875, 0.3687725067138672, -0.448822021484375, 0.05500030517578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000153.npy"} +{"epoch": 0.23129251700680273, "step": 154, "batch_size": 64, "mean": 0.3825940191745758, "std": 0.8272759914398193, "min": -2.0439910888671875, "p10": -0.465458869934082, "median": 0.3881855010986328, "p90": 1.4845285415649416, "max": 2.6370391845703125, "pos_frac": 0.671875, "sample": [-0.2660102844238281, 1.580230712890625, -0.959259033203125, -1.5869102478027344, 0.7798690795898438, 0.9250411987304688, -0.4210186004638672, 0.4298114776611328, -0.04766845703125, -0.7463150024414062, 1.1143016815185547, 0.21355628967285156, 0.6438312530517578, -0.24748992919921875, -0.3705024719238281, -0.19146728515625, -0.2877464294433594, 0.5762138366699219, 0.07921218872070312, 0.5625839233398438, 1.410888671875, 1.5511455535888672, 1.1241722106933594, 0.5803565979003906, -0.822418212890625, 0.5132694244384766, 2.6370391845703125, 1.2415313720703125, 2.158111572265625, -2.0439910888671875, 0.3010749816894531, 0.7697067260742188, 0.7497177124023438, 0.2659149169921875, 0.40207672119140625, -0.01059722900390625, -0.06698226928710938, 0.5049991607666016, 0.5973262786865234, 0.3458671569824219, -0.09126663208007812, -0.18502044677734375, 0.7774600982666016, -0.1331329345703125, 0.20397186279296875, 1.8830394744873047, 0.4378814697265625, 0.8495769500732422, 0.5003890991210938, 0.045497894287109375, 0.17095947265625, -0.48450469970703125, 0.9088020324707031, -0.5020751953125, 1.838623046875, 1.1048011779785156, 1.1023025512695312, -0.3303565979003906, 1.5160884857177734, -0.3815269470214844, 0.505401611328125, 0.08748054504394531, 0.29785728454589844, 0.3742942810058594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000154.npy"} +{"epoch": 0.2328042328042328, "step": 155, "batch_size": 64, "mean": 0.35689258575439453, "std": 0.8799886107444763, "min": -1.7484664916992188, "p10": -0.8338920593261718, "median": 0.3678169250488281, "p90": 1.3670158386230469, "max": 2.6544113159179688, "pos_frac": 0.6875, "sample": [0.104156494140625, 0.046905517578125, 1.4622650146484375, 0.7874832153320312, -0.37889862060546875, 0.19684982299804688, 0.39532470703125, 0.8856735229492188, -0.03505897521972656, 1.754547119140625, 0.1901397705078125, 0.8527984619140625, -0.5370101928710938, -1.7484664916992188, 0.18162918090820312, -0.1126556396484375, 0.835662841796875, 1.3505401611328125, 1.2864151000976562, 1.1304035186767578, 0.8983993530273438, -0.29997825622558594, 1.2827987670898438, -0.5808982849121094, 0.6329193115234375, 1.4451484680175781, 1.3243255615234375, 0.6832351684570312, -0.647369384765625, 0.34278106689453125, -0.84576416015625, 0.23877716064453125, 1.6390094757080078, 1.3740768432617188, 0.9987869262695312, 1.7923431396484375, -0.55743408203125, -0.8061904907226562, -0.09930419921875, -0.2079925537109375, -1.3538665771484375, 0.18821334838867188, 1.1566009521484375, 0.49037933349609375, 1.0119857788085938, 0.9481315612792969, 0.4887237548828125, 1.0842361450195312, -1.3367919921875, 0.7775039672851562, 0.69818115234375, 0.392852783203125, -0.7512054443359375, -0.880584716796875, 0.34033966064453125, -1.0060958862304688, 0.28691864013671875, -0.9914302825927734, -0.5072898864746094, 0.01302337646484375, 0.09546661376953125, 2.6544113159179688, 1.1885738372802734, 0.5964736938476562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000155.npy"} +{"epoch": 0.23431594860166288, "step": 156, "batch_size": 64, "mean": 0.5367816686630249, "std": 0.9612331986427307, "min": -1.8542327880859375, "p10": -0.531889533996582, "median": 0.37888145446777344, "p90": 1.9307128906250004, "max": 3.390716552734375, "pos_frac": 0.75, "sample": [0.1594390869140625, 0.06864738464355469, 0.26490020751953125, -0.32561492919921875, -0.1750335693359375, 1.8276824951171875, 2.2290782928466797, -0.651824951171875, -0.0724945068359375, 0.5800857543945312, 0.4144134521484375, 1.3167304992675781, 0.38828277587890625, 0.313629150390625, 3.390716552734375, 0.6439208984375, 1.6103668212890625, 0.3694801330566406, -0.5506858825683594, 0.24895858764648438, 0.07385444641113281, 0.2943840026855469, 0.919921875, 0.0777740478515625, 0.34868621826171875, 0.8994979858398438, -0.8800487518310547, 0.8889007568359375, 0.22946548461914062, 1.9748687744140625, 0.5287704467773438, 0.971923828125, 0.16654205322265625, 1.3673019409179688, -0.118133544921875, 2.019634246826172, -1.8542327880859375, 1.3978042602539062, -0.623077392578125, 1.7305221557617188, 1.202972412109375, 3.11383056640625, 0.27950286865234375, 1.1467437744140625, 0.0630645751953125, -0.48803138732910156, -0.2728767395019531, 0.652435302734375, 0.44264984130859375, 0.5882034301757812, 0.5209465026855469, -0.881866455078125, 0.5123252868652344, -0.20117950439453125, 0.04654693603515625, 0.8698959350585938, 0.4345817565917969, -0.7373428344726562, 2.115753173828125, -0.29373741149902344, 0.11462593078613281, 2.471405029296875, 0.5146331787109375, -0.3260955810546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000156.npy"} +{"epoch": 0.23582766439909297, "step": 157, "batch_size": 64, "mean": 0.45090246200561523, "std": 0.7468938827514648, "min": -1.8165130615234375, "p10": -0.25495357513427735, "median": 0.4123067855834961, "p90": 1.2638387680053713, "max": 2.7251129150390625, "pos_frac": 0.765625, "sample": [0.5486602783203125, 0.78826904296875, -0.14461517333984375, 0.802154541015625, 1.1860504150390625, 0.0522918701171875, 0.6704559326171875, 0.1048583984375, 1.8330879211425781, 0.5895195007324219, -0.00968170166015625, 0.7858963012695312, 2.3515625, 0.466827392578125, -1.8165130615234375, -0.01651763916015625, 1.4109153747558594, 0.45124053955078125, 0.4124107360839844, 1.0993061065673828, 0.6441612243652344, 1.2903404235839844, 2.36175537109375, 0.2771110534667969, 1.0514049530029297, -0.19626617431640625, 0.89691162109375, 0.0060482025146484375, 0.23637008666992188, -0.2505836486816406, 0.8353252410888672, -0.01355743408203125, 0.86395263671875, -0.32928466796875, 0.06955718994140625, 0.2882957458496094, 0.0959320068359375, 1.2020015716552734, 1.4320297241210938, 0.2557792663574219, -0.130859375, 0.5305252075195312, 0.09885406494140625, 0.7404251098632812, 2.7251129150390625, 0.2148303985595703, -0.32213592529296875, 0.21208572387695312, 0.0264739990234375, -0.25682640075683594, 0.29973602294921875, -0.6590690612792969, 0.49575042724609375, 0.2931251525878906, 0.6343936920166016, 0.41901588439941406, 0.47554779052734375, 0.6334800720214844, -0.9763031005859375, 0.4122028350830078, 0.2401123046875, -0.6870880126953125, -0.15350341796875, 1.0084075927734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000157.npy"} +{"epoch": 0.23733938019652306, "step": 158, "batch_size": 64, "mean": 0.5448204278945923, "std": 0.7537803053855896, "min": -0.9247665405273438, "p10": -0.3601390838623047, "median": 0.5507411956787109, "p90": 1.1578804016113282, "max": 2.98101806640625, "pos_frac": 0.734375, "sample": [0.07910919189453125, 0.5307579040527344, 1.1309432983398438, 0.5520896911621094, -0.9247665405273438, 0.9309349060058594, 0.6272125244140625, 0.512664794921875, -0.01641845703125, -0.3632164001464844, 0.8635025024414062, 1.3266754150390625, 0.591827392578125, 0.4419994354248047, -0.35295867919921875, 0.7500534057617188, 0.994659423828125, 0.282928466796875, 0.5268287658691406, 1.1266632080078125, 0.16257476806640625, 1.195709228515625, 0.7045936584472656, 1.1641769409179688, 0.9595890045166016, -0.7595901489257812, 0.8487701416015625, 2.3417510986328125, -0.205596923828125, 0.392303466796875, 0.7676239013671875, 0.8129348754882812, 0.867706298828125, 1.1431884765625, 0.7804508209228516, 1.0899505615234375, 0.7796173095703125, -0.181640625, 0.9570121765136719, -0.7373199462890625, 2.2794189453125, -0.1177215576171875, 1.1293869018554688, 1.01007080078125, 0.9312591552734375, 0.41080665588378906, 0.2750568389892578, 1.0146408081054688, 2.98101806640625, 0.13666915893554688, 0.5493927001953125, -0.48113250732421875, -0.17497634887695312, 0.45035552978515625, 0.3270072937011719, -0.1859130859375, -0.2980823516845703, 2.2547264099121094, 0.8075027465820312, 0.352325439453125, -0.6426200866699219, -0.143798828125, -0.09242057800292969, -0.599761962890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000158.npy"} +{"epoch": 0.23885109599395313, "step": 159, "batch_size": 64, "mean": 0.3540443778038025, "std": 0.7337093949317932, "min": -2.26019287109375, "p10": -0.4349920272827148, "median": 0.3473091125488281, "p90": 1.2807128906250003, "max": 1.7625274658203125, "pos_frac": 0.71875, "sample": [0.22219085693359375, 0.9659919738769531, -0.5463485717773438, 0.9690017700195312, 0.9391326904296875, -0.13974761962890625, -2.26019287109375, 1.316202163696289, -0.26773834228515625, 1.7625274658203125, 1.457916259765625, -0.08029937744140625, 1.4194812774658203, 0.45748138427734375, 0.06710433959960938, 0.012645721435546875, 0.17510986328125, 1.1566543579101562, 0.3804512023925781, -0.15130233764648438, -0.18023300170898438, -1.3588409423828125, 0.3122577667236328, -0.5688095092773438, 0.35826873779296875, 0.38651466369628906, 0.48220062255859375, 0.2684326171875, 0.3363494873046875, 1.4800758361816406, -0.14382553100585938, -0.02239990234375, 0.12853431701660156, -0.45652008056640625, 0.16292572021484375, 0.7997627258300781, 1.0556373596191406, 0.903961181640625, 0.3778228759765625, 0.3040771484375, 0.6908435821533203, -0.205413818359375, 1.3141250610351562, -0.6347236633300781, 0.3670845031738281, 1.2027511596679688, 0.07621574401855469, 0.5338287353515625, -0.12960052490234375, 0.2270660400390625, -0.38475990295410156, 1.0009727478027344, 0.5471038818359375, 0.305938720703125, 0.06676673889160156, -1.3615875244140625, 0.7484302520751953, 0.679473876953125, 0.7919998168945312, -0.042690277099609375, 0.5540542602539062, 1.1401214599609375, 1.7380294799804688, 0.9503555297851562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000159.npy"} +{"epoch": 0.24036281179138322, "step": 160, "batch_size": 64, "mean": 0.3636714518070221, "std": 0.80547034740448, "min": -1.3636932373046875, "p10": -0.6107614517211913, "median": 0.35248661041259766, "p90": 1.2911033630371098, "max": 3.071044921875, "pos_frac": 0.71875, "sample": [0.0894927978515625, 0.40587615966796875, 0.0456695556640625, 1.3264083862304688, 0.7132472991943359, 0.7659988403320312, -0.563568115234375, -0.0041294097900390625, 1.0907745361328125, 0.6631622314453125, 2.54803466796875, -0.20937728881835938, 1.4250564575195312, 0.406707763671875, 0.2602977752685547, 1.84393310546875, -0.6309871673583984, 0.16829299926757812, 0.7106037139892578, 0.7610950469970703, 0.3298797607421875, -0.6585121154785156, -0.3955497741699219, -0.9798583984375, 0.8542881011962891, 0.3795967102050781, 0.4292945861816406, 0.028932571411132812, 0.5757904052734375, -0.3536109924316406, 0.13692855834960938, 0.1150665283203125, 3.071044921875, 1.4404106140136719, 0.18696212768554688, -0.250030517578125, 0.9463958740234375, 0.4119834899902344, -0.1812305450439453, 1.0477066040039062, -0.11190414428710938, 1.1040840148925781, -0.23224639892578125, 0.36344146728515625, 0.34487342834472656, 0.42774200439453125, -0.9367141723632812, -0.2902717590332031, 0.8640003204345703, 0.6203460693359375, -1.3636932373046875, 0.15192031860351562, -0.76934814453125, 0.6800117492675781, 1.2087249755859375, 0.1807403564453125, 0.36009979248046875, 1.1609878540039062, -1.3229904174804688, 0.6723785400390625, 0.11549758911132812, 1.4506072998046875, -0.4847412109375, 0.1293487548828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000160.npy"} +{"epoch": 0.2418745275888133, "step": 161, "batch_size": 64, "mean": 0.5008042454719543, "std": 0.8815625309944153, "min": -1.3081932067871094, "p10": -0.4531368255615234, "median": 0.3524589538574219, "p90": 1.85943603515625, "max": 3.3688430786132812, "pos_frac": 0.6875, "sample": [-0.19255447387695312, -1.3081932067871094, 0.5819091796875, 0.385833740234375, 1.83721923828125, -0.4698829650878906, 1.86895751953125, 1.893707275390625, -0.2039356231689453, -0.1544322967529297, -0.369384765625, 1.0908126831054688, 0.2434673309326172, 0.92498779296875, 0.8049049377441406, 0.9990100860595703, 0.8863296508789062, -0.6991500854492188, 0.27469635009765625, -0.34642791748046875, -0.3854255676269531, -0.46965789794921875, 0.6516036987304688, -0.38408660888671875, 0.33754730224609375, -0.4327278137207031, -0.5808334350585938, 1.0584278106689453, 1.0463790893554688, 0.085235595703125, 1.6672096252441406, 0.3999061584472656, 0.8443927764892578, 3.3688430786132812, 1.8966751098632812, 0.10520553588867188, -0.27370262145996094, 2.2606353759765625, 1.168609619140625, 0.007091522216796875, 0.23974990844726562, 1.8793182373046875, 0.06357955932617188, 0.36737060546875, 0.155670166015625, -0.056400299072265625, 0.7111167907714844, 0.6907463073730469, -0.461883544921875, -0.07433700561523438, 0.021636962890625, -0.13692092895507812, 1.1541748046875, 0.2017822265625, -0.48267364501953125, -0.345794677734375, 1.028656005859375, 0.5260829925537109, 1.5063018798828125, 0.8062210083007812, 2.61334228515625, 0.4343414306640625, 0.07415008544921875, 0.7160377502441406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000161.npy"} +{"epoch": 0.24338624338624337, "step": 162, "batch_size": 64, "mean": 0.6207360029220581, "std": 0.880763053894043, "min": -2.2745895385742188, "p10": -0.3343147277832031, "median": 0.6956424713134766, "p90": 1.7050586700439454, "max": 3.2748565673828125, "pos_frac": 0.796875, "sample": [2.086181640625, 2.3520355224609375, 0.7770233154296875, 0.8437423706054688, 0.067291259765625, 1.6523933410644531, 0.6287727355957031, -0.4667510986328125, 0.74847412109375, 0.9734306335449219, 0.42021942138671875, 0.6546287536621094, 1.1268749237060547, 0.727264404296875, 1.2737350463867188, 0.2034626007080078, 0.81158447265625, 2.2975997924804688, 0.06951904296875, 1.6733436584472656, 0.5491104125976562, 0.7244606018066406, 0.3552818298339844, 0.0102386474609375, 0.36634063720703125, -0.79833984375, 1.524627685546875, 1.284271240234375, -0.300628662109375, 1.827728271484375, 0.36574363708496094, 0.8592376708984375, -0.3455047607421875, 0.8734073638916016, 3.2748565673828125, 0.6735000610351562, 1.16156005859375, 0.9045181274414062, -0.30820465087890625, 0.7177848815917969, -0.7640228271484375, -0.620361328125, 0.7400588989257812, 1.0211353302001953, 0.8670139312744141, 0.8181991577148438, -0.13715362548828125, -0.11444091796875, -2.2745895385742188, 0.05057525634765625, 0.010698318481445312, 0.054595947265625, 1.7186508178710938, 0.46482276916503906, 1.863555908203125, 1.4871139526367188, -0.3072547912597656, -0.0240631103515625, -0.583526611328125, 0.3024883270263672, 0.08258438110351562, 0.9450225830078125, 0.8587760925292969, 0.6264114379882812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000162.npy"} +{"epoch": 0.24489795918367346, "step": 163, "batch_size": 64, "mean": 0.6183971166610718, "std": 0.8419542908668518, "min": -1.7199325561523438, "p10": -0.3545001983642577, "median": 0.6041259765625, "p90": 1.730694580078125, "max": 2.6047515869140625, "pos_frac": 0.765625, "sample": [-0.0026693344116210938, 1.7256317138671875, -1.7199325561523438, 1.930755615234375, 0.943572998046875, 0.19918441772460938, -0.9847068786621094, 0.2751922607421875, 1.9727935791015625, -0.38323974609375, 1.2236366271972656, 1.4113845825195312, 2.3435821533203125, -0.22777557373046875, 0.23975372314453125, 1.2551803588867188, 0.5937042236328125, 0.23743438720703125, 2.6047515869140625, 1.0938720703125, -0.0951080322265625, 0.6127090454101562, 0.7702121734619141, -0.2874412536621094, -0.8096141815185547, 0.9029006958007812, 0.3032989501953125, 1.9354400634765625, 1.0551395416259766, 1.242156982421875, 0.8169670104980469, 1.0399341583251953, 0.786651611328125, 0.0147247314453125, 1.5145416259765625, 0.5955429077148438, -0.046413421630859375, 1.5665130615234375, -0.6133460998535156, 0.3378028869628906, 0.6433486938476562, 1.9737110137939453, 0.5668869018554688, 1.6310844421386719, -0.706878662109375, -0.12589645385742188, -0.0521087646484375, -0.061855316162109375, 0.3443794250488281, 0.03887176513671875, -0.5181045532226562, 0.8387737274169922, 0.682861328125, 0.29805946350097656, 0.3706245422363281, 0.1905670166015625, 1.6492538452148438, 1.7328643798828125, 0.9282913208007812, 0.7445087432861328, 0.6175765991210938, 0.9090404510498047, 0.3287925720214844, 0.17804718017578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000163.npy"} +{"epoch": 0.24640967498110355, "step": 164, "batch_size": 64, "mean": 0.524990439414978, "std": 0.8107886910438538, "min": -1.9017791748046875, "p10": -0.38676261901855463, "median": 0.4674196243286133, "p90": 1.517374038696289, "max": 2.225727081298828, "pos_frac": 0.71875, "sample": [-0.6645889282226562, 0.6720123291015625, 0.3730621337890625, 0.2273712158203125, 0.8541030883789062, -0.6614303588867188, 1.8870773315429688, 1.2830429077148438, 0.21028900146484375, 1.1732292175292969, 1.8937835693359375, -0.05776214599609375, 1.1077594757080078, -0.2484893798828125, 0.8661842346191406, 0.7064056396484375, 1.7474784851074219, 0.36376380920410156, 1.173095703125, -0.49828338623046875, 0.5909061431884766, -0.5732803344726562, 0.41120147705078125, -0.219635009765625, -0.3172492980957031, 0.6197853088378906, 0.8406906127929688, 0.8549423217773438, 0.802978515625, 0.067657470703125, 0.2747669219970703, 1.8901309967041016, -0.189056396484375, 0.4082221984863281, 2.225727081298828, -0.9544525146484375, 0.0368499755859375, 1.4984283447265625, 1.965646743774414, -0.40053558349609375, 0.24044418334960938, 1.2249298095703125, -0.08706283569335938, 1.2436027526855469, 0.9253044128417969, 0.16159820556640625, 0.7128791809082031, -0.23134613037109375, 1.327484130859375, -0.23674774169921875, 0.487335205078125, 0.1533985137939453, 1.4201221466064453, -0.3546257019042969, 0.44750404357910156, -1.9017791748046875, -0.221710205078125, 1.4872722625732422, 1.5254936218261719, 0.2927894592285156, 1.06280517578125, -0.15026092529296875, 0.4965648651123047, 1.3315658569335938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000164.npy"} +{"epoch": 0.24792139077853365, "step": 165, "batch_size": 64, "mean": 0.5113040804862976, "std": 0.6837916970252991, "min": -1.5352630615234375, "p10": -0.225627326965332, "median": 0.6063041687011719, "p90": 1.1669923782348635, "max": 2.8239212036132812, "pos_frac": 0.796875, "sample": [1.1786041259765625, 0.31196022033691406, 0.6276073455810547, 0.48322296142578125, -0.03760719299316406, 0.7988662719726562, 0.3169403076171875, -0.542083740234375, 0.4712104797363281, 0.6532878875732422, 0.13145828247070312, 0.9262313842773438, 0.7683029174804688, 0.3933563232421875, 0.73345947265625, 0.4058685302734375, 0.6320343017578125, 0.277801513671875, -0.1018829345703125, 0.6023826599121094, -1.5352630615234375, -0.075836181640625, 2.8239212036132812, -0.2396240234375, 0.6354217529296875, 1.1398983001708984, 0.7481861114501953, 0.5186271667480469, 1.623422622680664, 1.2260818481445312, 0.12247467041015625, -0.19296836853027344, 1.930999755859375, 1.2228164672851562, 0.7968597412109375, -1.0010299682617188, -0.29775238037109375, 0.8183097839355469, 0.7884178161621094, -0.045989990234375, 0.04992103576660156, 0.6521720886230469, 1.1302337646484375, 0.16971778869628906, 1.6763496398925781, 0.35927581787109375, 0.016124725341796875, 0.30304718017578125, 0.6102256774902344, 0.5254898071289062, 0.8095703125, 1.101806640625, -0.069976806640625, 1.0245094299316406, 0.7737770080566406, -0.90283203125, -0.5738525390625, 0.14078521728515625, 0.5012130737304688, 0.7257232666015625, 0.7492904663085938, 0.9938430786132812, 1.1282577514648438, 0.7907943725585938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000165.npy"} +{"epoch": 0.2494331065759637, "step": 166, "batch_size": 64, "mean": 0.3491097092628479, "std": 0.7661978602409363, "min": -1.7584915161132812, "p10": -0.5286647796630859, "median": 0.2712554931640625, "p90": 1.4388183593750006, "max": 2.22100830078125, "pos_frac": 0.65625, "sample": [1.0368576049804688, -0.21703338623046875, -0.89678955078125, 1.215484619140625, 0.41248321533203125, -0.44190216064453125, 0.20059967041015625, 1.65728759765625, 0.8211669921875, 0.77093505859375, 0.4105072021484375, -0.09073829650878906, 0.8472404479980469, -0.14288330078125, 1.496917724609375, 0.121917724609375, 0.686431884765625, 1.8727188110351562, 1.3192214965820312, -0.7640762329101562, -0.06534576416015625, 0.7941703796386719, 0.0195465087890625, 0.0957794189453125, 1.052215576171875, 0.5831737518310547, -0.30132293701171875, 0.22503662109375, 0.80645751953125, -0.0016937255859375, -0.0572357177734375, -0.9162940979003906, 0.5427093505859375, 2.08154296875, 0.2263946533203125, -0.4885597229003906, 0.6080665588378906, 0.1206207275390625, 0.5878486633300781, 0.5763187408447266, 0.64892578125, 2.22100830078125, 0.22023773193359375, -1.7584915161132812, 0.7642822265625, -0.421234130859375, 0.11026763916015625, -0.030536651611328125, 0.34087562561035156, 1.5342330932617188, 0.2097320556640625, -0.965972900390625, -0.44219970703125, 0.6834526062011719, -0.06443977355957031, -0.6100387573242188, 0.5446090698242188, -0.15887069702148438, 1.2399444580078125, -0.5458526611328125, 0.3161163330078125, 1.4900741577148438, -0.13222694396972656, 0.3433494567871094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000166.npy"} +{"epoch": 0.2509448223733938, "step": 167, "batch_size": 64, "mean": 0.5719939470291138, "std": 0.6967406272888184, "min": -1.1129226684570312, "p10": -0.18730316162109376, "median": 0.4966764450073242, "p90": 1.49148063659668, "max": 2.208221435546875, "pos_frac": 0.78125, "sample": [-0.632781982421875, -0.016473770141601562, 0.39703941345214844, 0.6472015380859375, -0.6829071044921875, 1.268310546875, 1.2336883544921875, 0.21795654296875, 0.5133705139160156, -0.188873291015625, 0.9501056671142578, 0.9726295471191406, 0.24579429626464844, 0.46964263916015625, 0.24495887756347656, 0.34974098205566406, 0.5042724609375, 0.02197265625, 0.5214385986328125, -0.095458984375, -1.0200157165527344, 0.9032096862792969, 0.8220024108886719, 0.37286376953125, 0.4846229553222656, 0.7234039306640625, 1.329254150390625, 0.10825157165527344, 0.20730972290039062, 1.0705108642578125, -0.30123138427734375, 2.079164505004883, -0.20228195190429688, 0.7617721557617188, 0.4433135986328125, 1.357086181640625, 1.1474685668945312, 1.6328125, 1.7021331787109375, 0.843505859375, 0.6445732116699219, 0.5285263061523438, -1.1129226684570312, 0.3895092010498047, 1.8528766632080078, 0.25479888916015625, -0.12004852294921875, 1.4403152465820312, 0.3123435974121094, 0.8489761352539062, 0.9776878356933594, -0.1836395263671875, 0.3160667419433594, 1.5134086608886719, 0.7005329132080078, 1.240447998046875, -0.1317901611328125, 2.208221435546875, 0.4577217102050781, 1.1381149291992188, 1.6196823120117188, -0.10428428649902344, -0.07936859130859375, 0.48908042907714844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000167.npy"} +{"epoch": 0.25245653817082386, "step": 168, "batch_size": 64, "mean": 0.40946611762046814, "std": 0.7276245355606079, "min": -1.4571380615234375, "p10": -0.455728530883789, "median": 0.3776998519897461, "p90": 1.3356294631958008, "max": 2.45538330078125, "pos_frac": 0.765625, "sample": [0.9896774291992188, -0.5622100830078125, 0.6917800903320312, -0.19562339782714844, 0.5476150512695312, 0.5655441284179688, 1.3295745849609375, -0.6717376708984375, -0.16457366943359375, -0.9019317626953125, 1.2660369873046875, 0.3971996307373047, 0.587860107421875, 0.9754886627197266, 0.0664520263671875, 0.7906627655029297, 0.14920425415039062, 0.10801887512207031, 0.350067138671875, 0.6139144897460938, 0.5029983520507812, 0.1714305877685547, 0.012237548828125, 0.24334335327148438, 0.23404312133789062, 0.07335662841796875, 0.9569816589355469, 0.5754508972167969, 0.0115814208984375, 0.6754837036132812, 1.5118331909179688, 0.16487884521484375, -1.4571380615234375, 0.7237625122070312, 0.9420642852783203, 0.711639404296875, 2.45538330078125, 1.0705375671386719, -0.04279327392578125, 0.8482208251953125, -0.04929924011230469, 1.6093902587890625, -0.4865531921386719, 0.6581459045410156, 1.5577468872070312, 0.13742828369140625, 1.5506439208984375, -0.3838043212890625, 0.6569976806640625, -1.0669326782226562, 0.3582000732421875, 0.15790176391601562, 0.63018798828125, -0.05209922790527344, 0.01491546630859375, 1.3382244110107422, -0.25014495849609375, -0.9633560180664062, 0.10818099975585938, -0.18965721130371094, 0.6909332275390625, 0.7008438110351562, 2.0882568359375, 0.0713653564453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000168.npy"} +{"epoch": 0.25396825396825395, "step": 169, "batch_size": 64, "mean": 0.523231029510498, "std": 0.9144213199615479, "min": -1.4163780212402344, "p10": -0.4613693237304687, "median": 0.4721260070800781, "p90": 1.4396865844726563, "max": 3.5921478271484375, "pos_frac": 0.6875, "sample": [0.6943511962890625, 0.6848602294921875, 0.47011566162109375, -0.47736358642578125, 1.1200370788574219, 0.6570053100585938, -1.4163780212402344, 0.5453376770019531, 0.6846389770507812, 0.8024139404296875, -0.42404937744140625, 0.4741363525390625, 1.4214324951171875, 2.280670166015625, -0.669281005859375, 0.014413833618164062, 0.09695053100585938, -0.38538360595703125, 0.21147918701171875, -0.3535614013671875, 0.0886688232421875, 1.447509765625, 1.902801513671875, 0.1625213623046875, 0.1996002197265625, 3.1647567749023438, 1.0350418090820312, 0.378570556640625, 0.5742340087890625, 0.7966709136962891, 0.3919811248779297, 1.0839462280273438, 2.0256500244140625, -0.2973651885986328, 1.1576805114746094, 0.624481201171875, 1.0562267303466797, -0.0604400634765625, 0.46511268615722656, -0.03733062744140625, -0.3705596923828125, 1.2593231201171875, -0.026319503784179688, -0.7084274291992188, 0.45182037353515625, -0.34880828857421875, 0.7008590698242188, -0.2503509521484375, 1.1872100830078125, 1.3000946044921875, -0.16599273681640625, 0.7603645324707031, 0.13988494873046875, -1.0115966796875, -0.0904998779296875, 3.5921478271484375, 0.982666015625, 1.0211639404296875, -0.6594963073730469, 2.179119110107422, 0.8476715087890625, 0.7632675170898438, -0.53375244140625, -0.12514495849609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000169.npy"} +{"epoch": 0.25547996976568405, "step": 170, "batch_size": 64, "mean": 0.3785494565963745, "std": 0.7791469693183899, "min": -1.4081497192382812, "p10": -0.6455410003662109, "median": 0.4239635467529297, "p90": 1.1580013275146486, "max": 2.2889480590820312, "pos_frac": 0.71875, "sample": [0.764312744140625, 0.23504638671875, 0.045139312744140625, 0.643218994140625, -0.04327964782714844, 2.2889480590820312, 0.2977771759033203, 0.7611827850341797, -0.47467041015625, 1.8781318664550781, -0.3986663818359375, -0.002655029296875, 0.7359695434570312, 1.0114288330078125, 0.996307373046875, 0.2443866729736328, 0.39846229553222656, 0.1248779296875, 0.8540115356445312, 0.20632171630859375, -1.0544090270996094, 1.3858642578125, 0.18558883666992188, 0.5412101745605469, -0.9807052612304688, 0.34201812744140625, -0.9942474365234375, -0.10628509521484375, -1.4081497192382812, -0.0013885498046875, 1.0877265930175781, 0.9443817138671875, 0.5251998901367188, 0.4037437438964844, -0.882965087890625, 0.52362060546875, -1.26165771484375, 1.0861244201660156, 2.2100982666015625, 0.44510650634765625, 0.8063507080078125, -0.3104133605957031, 0.1004180908203125, 1.1711883544921875, 0.4687843322753906, 1.5002899169921875, -0.49114990234375, 0.9906082153320312, 0.36939239501953125, 0.444183349609375, -0.5416107177734375, -0.6900825500488281, 0.9568939208984375, 1.4799919128417969, 0.051601409912109375, -0.3017253875732422, 0.5664176940917969, 0.2718505859375, 0.8374061584472656, 0.47006988525390625, 1.1272315979003906, 0.9055862426757812, 0.9818572998046875, -0.4951019287109375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000170.npy"} +{"epoch": 0.25699168556311414, "step": 171, "batch_size": 64, "mean": 0.49539363384246826, "std": 0.8006395697593689, "min": -2.02490234375, "p10": -0.3500638961791992, "median": 0.5295171737670898, "p90": 1.4719512939453128, "max": 2.4389495849609375, "pos_frac": 0.796875, "sample": [0.5313720703125, 0.4834098815917969, 1.5002593994140625, 0.7049446105957031, 0.43236541748046875, 1.7940521240234375, 0.25748443603515625, 1.4058990478515625, -0.3579883575439453, 0.3778533935546875, 0.06224632263183594, 0.7010345458984375, 1.1416702270507812, 0.634185791015625, 1.6107673645019531, 0.3136444091796875, 0.5355854034423828, 0.5276622772216797, 0.7738380432128906, -0.1260528564453125, -0.09274864196777344, 0.13669204711914062, 0.9598217010498047, -0.331573486328125, 1.1773662567138672, 0.0056171417236328125, -0.20220947265625, 0.3896331787109375, 1.1512985229492188, -0.7760124206542969, 0.36425018310546875, 0.7874069213867188, 1.0578842163085938, 0.24447250366210938, 0.3822975158691406, 0.7120704650878906, 0.45165252685546875, -0.8901443481445312, 1.7643165588378906, 1.96527099609375, -2.02490234375, 0.15935134887695312, 0.017513275146484375, 0.6983871459960938, 1.3639755249023438, 1.6172332763671875, 0.800933837890625, 0.5422821044921875, 0.2334442138671875, 1.397003173828125, 0.7332305908203125, 1.0137481689453125, 0.9843616485595703, 1.0448684692382812, -0.2066192626953125, -0.0979156494140625, 0.6571807861328125, 0.16263389587402344, -0.5712928771972656, -1.79638671875, 2.4389495849609375, 0.5624542236328125, 0.151275634765625, -0.7381134033203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000171.npy"} +{"epoch": 0.2585034013605442, "step": 172, "batch_size": 64, "mean": 0.5896360874176025, "std": 0.7642790675163269, "min": -1.2518367767333984, "p10": -0.5004623413085936, "median": 0.67974853515625, "p90": 1.5084318161010744, "max": 2.4058837890625, "pos_frac": 0.78125, "sample": [-0.7424240112304688, 0.1226348876953125, 0.9827880859375, 1.1799087524414062, -0.0202789306640625, 0.5420494079589844, 0.029741287231445312, 0.8894538879394531, -0.031219482421875, 1.5190296173095703, 1.379425048828125, 1.04547119140625, 0.7576751708984375, -0.09072113037109375, 0.4942779541015625, 1.834371566772461, 0.7069168090820312, -0.55877685546875, -0.14343643188476562, 0.5822334289550781, 1.48370361328125, 1.2828140258789062, -0.9548377990722656, -0.9640960693359375, 1.059478759765625, 0.6374626159667969, -0.208221435546875, 1.331451416015625, 0.5646514892578125, 0.8985671997070312, 0.7286949157714844, 0.43015289306640625, -0.3643951416015625, 2.4058837890625, -0.203033447265625, 0.02349090576171875, 0.9925804138183594, 1.0846633911132812, 1.8238105773925781, 0.8829727172851562, 0.3708667755126953, 0.683349609375, 0.42313385009765625, 0.6890125274658203, 1.0034637451171875, -0.7453079223632812, 0.7089767456054688, 1.2548789978027344, 1.0614700317382812, 0.8184394836425781, -0.6616592407226562, 1.875091552734375, 1.6308784484863281, -1.2518367767333984, 0.2252063751220703, 1.225341796875, 0.6761474609375, 0.27834320068359375, 0.5093498229980469, 1.2931404113769531, 0.17657470703125, 0.0953369140625, 1.6813201904296875, 0.3002777099609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000172.npy"} +{"epoch": 0.2600151171579743, "step": 173, "batch_size": 64, "mean": 0.6354283094406128, "std": 0.9736626744270325, "min": -1.230224609375, "p10": -0.3052539825439453, "median": 0.507451057434082, "p90": 2.0284622192382815, "max": 3.802703857421875, "pos_frac": 0.71875, "sample": [-0.7339096069335938, 0.186370849609375, -0.0587005615234375, 1.4014434814453125, -0.07208251953125, 0.697052001953125, 0.3079395294189453, -0.10832977294921875, 0.2033710479736328, 0.6387481689453125, -0.15478134155273438, 0.6759872436523438, 1.5870742797851562, -1.230224609375, -0.29474639892578125, 1.2002944946289062, 2.0455780029296875, -0.08856964111328125, 1.0657997131347656, 0.09256553649902344, 1.988525390625, 1.9178237915039062, -0.050960540771484375, 3.802703857421875, -0.5974998474121094, 0.379241943359375, 0.36624908447265625, 0.5474090576171875, 0.7128524780273438, -0.02643585205078125, -0.47057342529296875, 1.2990875244140625, 0.37121009826660156, 2.3226547241210938, 0.8609695434570312, 0.3567638397216797, 0.9862289428710938, -0.21176528930664062, 0.737457275390625, 3.2837982177734375, 0.7177810668945312, 0.3433380126953125, 0.6045856475830078, 1.05755615234375, 0.5575618743896484, -1.144744873046875, 0.539337158203125, -0.02222442626953125, 2.3607025146484375, 1.5222702026367188, 1.2590141296386719, -0.27657318115234375, 0.3024444580078125, 0.8815536499023438, 2.0600662231445312, 0.7070770263671875, -0.9276351928710938, -0.3097572326660156, 0.6317615509033203, 2.50726318359375, 0.40283966064453125, 0.47556495666503906, 0.32799530029296875, 0.15301513671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000173.npy"} +{"epoch": 0.2615268329554044, "step": 174, "batch_size": 64, "mean": 0.6357530355453491, "std": 0.9089133739471436, "min": -1.28802490234375, "p10": -0.6202386856079101, "median": 0.6895751953125, "p90": 1.7684387207031254, "max": 3.456024169921875, "pos_frac": 0.75, "sample": [0.8190383911132812, -0.33050537109375, 1.1463890075683594, 0.68597412109375, 0.6572551727294922, -1.28802490234375, -0.12369537353515625, 1.1591796875, 1.6946792602539062, 0.9947757720947266, 1.0339889526367188, 1.3007698059082031, 0.69317626953125, 0.1159210205078125, 2.009490966796875, -0.37909507751464844, 0.4479961395263672, 1.1738700866699219, 0.34270477294921875, -1.1960334777832031, 1.161224365234375, 0.23850250244140625, -0.6706771850585938, 0.7309284210205078, 1.8555679321289062, -0.7702064514160156, -0.26348876953125, 0.20472335815429688, 0.9523162841796875, 0.5217742919921875, 0.6611099243164062, 3.456024169921875, 2.1144466400146484, -0.6473560333251953, 0.7952251434326172, 1.423187255859375, 1.3161544799804688, 2.0142822265625, -0.17430686950683594, 1.2084197998046875, 0.791778564453125, 1.1407012939453125, 1.2959175109863281, -0.22249603271484375, 0.3666229248046875, 0.6684951782226562, 0.18160247802734375, -0.5569648742675781, 1.9790725708007812, -0.0375823974609375, -1.0838088989257812, 1.795135498046875, -0.25589752197265625, 0.16866683959960938, 1.4862823486328125, -0.6992263793945312, 0.43381500244140625, 0.15720367431640625, 1.173614501953125, 0.048553466796875, 1.0057735443115234, 1.1885299682617188, 1.706146240234375, 0.8705520629882812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000174.npy"} +{"epoch": 0.26303854875283444, "step": 175, "batch_size": 64, "mean": 0.6816866397857666, "std": 0.918885350227356, "min": -1.303436279296875, "p10": -0.5159790039062498, "median": 0.7163228988647461, "p90": 1.8820075988769531, "max": 2.82769775390625, "pos_frac": 0.75, "sample": [-0.374481201171875, 0.4868793487548828, -0.8255767822265625, 0.5227317810058594, 0.6558456420898438, 0.9791545867919922, -0.0039424896240234375, 2.82769775390625, 1.4767608642578125, 0.1820201873779297, 0.6188278198242188, 1.4251937866210938, 0.226593017578125, 0.1391448974609375, 0.08504867553710938, 1.20184326171875, 1.4064483642578125, 0.9517364501953125, 1.2296142578125, -0.211029052734375, 0.810577392578125, 0.8858680725097656, 0.166656494140625, 1.9343338012695312, 1.2900543212890625, 1.002532958984375, -0.16823196411132812, 0.34767913818359375, 1.512247085571289, -1.303436279296875, 1.284881591796875, 1.1053390502929688, 0.5987396240234375, 1.3573532104492188, -0.302398681640625, -0.03331565856933594, 0.5196113586425781, 2.1066665649414062, -0.24740028381347656, -0.3856048583984375, 0.8478927612304688, -0.6677093505859375, 0.16968536376953125, 1.02215576171875, -1.1243896484375, -1.235992431640625, 1.6115951538085938, -0.7256317138671875, 1.6944160461425781, 1.5882759094238281, 0.6866855621337891, 1.1590023040771484, -0.096282958984375, -0.5718536376953125, 0.7459602355957031, 1.0562973022460938, 0.01313018798828125, 1.8672943115234375, 1.8883132934570312, 2.370452880859375, 0.5207138061523438, 1.94000244140625, 1.1615524291992188, 2.2237167358398438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000175.npy"} +{"epoch": 0.26455026455026454, "step": 176, "batch_size": 64, "mean": 0.5668948292732239, "std": 1.1643166542053223, "min": -2.159912109375, "p10": -0.7172225952148438, "median": 0.41745758056640625, "p90": 2.16058807373047, "max": 4.42364501953125, "pos_frac": 0.65625, "sample": [1.9140777587890625, -0.9946136474609375, 2.5664138793945312, 0.9494857788085938, 0.411590576171875, 3.5450515747070312, 0.6442642211914062, 0.33066558837890625, 0.3498516082763672, -0.614044189453125, -0.09259033203125, -0.7249832153320312, -0.5899887084960938, 0.7701644897460938, -0.8003463745117188, 1.7386188507080078, -0.1552581787109375, 0.9347648620605469, -1.0175628662109375, -0.3446464538574219, 0.5111122131347656, 0.00780487060546875, 0.5964431762695312, 2.4566268920898438, 0.9506301879882812, 0.4233245849609375, -0.3084869384765625, 0.857269287109375, -0.39178466796875, 1.1155967712402344, 2.2662353515625, -0.1713428497314453, -0.7834396362304688, 1.4043807983398438, 0.109222412109375, 1.3848381042480469, -0.0756683349609375, 0.5120697021484375, 0.6511459350585938, -0.4720611572265625, 0.2463531494140625, 0.71112060546875, 0.2271099090576172, 1.7359275817871094, -0.00366973876953125, 2.3693504333496094, -0.7094573974609375, -0.2977447509765625, 1.0211944580078125, 2.758575439453125, -0.720550537109375, 0.4964790344238281, 0.018949508666992188, 1.7938766479492188, 0.2435150146484375, 0.6031703948974609, 4.42364501953125, 1.7298431396484375, -0.574554443359375, 1.3058929443359375, 1.2274589538574219, -2.159912109375, -0.0897979736328125, 0.059661865234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000176.npy"} +{"epoch": 0.2660619803476946, "step": 177, "batch_size": 64, "mean": 0.8022229671478271, "std": 1.0080088376998901, "min": -3.11077880859375, "p10": -0.1863513946533203, "median": 0.8083763122558594, "p90": 1.9268569946289062, "max": 2.9516334533691406, "pos_frac": 0.828125, "sample": [0.9877166748046875, 0.4703388214111328, 0.9339923858642578, 2.8767852783203125, 1.0271377563476562, 0.12630462646484375, -0.09575653076171875, 2.299560546875, 1.9270172119140625, 1.2863922119140625, 2.9516334533691406, 0.9130096435546875, 1.6469230651855469, 0.6173019409179688, -0.17001724243164062, -0.43919944763183594, 0.6293258666992188, 0.00954437255859375, -0.7186870574951172, 1.159170150756836, 1.269287109375, 1.1212158203125, 0.40279579162597656, 2.3969478607177734, 0.9937038421630859, 0.44824981689453125, 1.2198371887207031, 1.373178482055664, 0.45953369140625, -0.04721832275390625, 0.697998046875, 0.1511383056640625, 1.7371063232421875, 2.64056396484375, -0.23179244995117188, 1.7433528900146484, 0.9065303802490234, 0.7613143920898438, 0.682861328125, 0.22099876403808594, -0.19335174560546875, 0.17882156372070312, 1.1973304748535156, -3.11077880859375, 1.3769149780273438, -0.7671661376953125, 1.2989501953125, 1.926483154296875, 0.7893867492675781, 0.05449676513671875, 0.6501312255859375, 0.2125835418701172, -1.2031402587890625, 2.4457054138183594, 0.79296875, 1.298004150390625, 1.526906967163086, 1.455413818359375, 1.176727294921875, 0.06908607482910156, 0.15534210205078125, 1.8737602233886719, -0.0721893310546875, 0.8237838745117188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000177.npy"} +{"epoch": 0.2675736961451247, "step": 178, "batch_size": 64, "mean": 0.47716209292411804, "std": 0.7732421159744263, "min": -1.503265380859375, "p10": -0.4305076599121093, "median": 0.4881305694580078, "p90": 1.3977731704711915, "max": 2.4408721923828125, "pos_frac": 0.75, "sample": [-0.1828327178955078, 0.7165908813476562, 1.4290428161621094, -0.36399078369140625, 1.4182510375976562, 0.04719734191894531, 0.23838043212890625, 1.401144027709961, 0.47534942626953125, -0.1395111083984375, -0.3387641906738281, 0.5023269653320312, 1.2025299072265625, 0.04738807678222656, 1.2367477416992188, 0.7695846557617188, 1.3414859771728516, 1.1335639953613281, 0.035888671875, 0.1012725830078125, 1.2325401306152344, 0.1789093017578125, -0.27002716064453125, -1.503265380859375, 1.0544013977050781, 1.41839599609375, 0.68817138671875, 0.630767822265625, 0.408050537109375, 0.5009117126464844, 0.3432464599609375, 0.09294319152832031, -0.079925537109375, 1.285614013671875, 0.2212371826171875, 1.2525405883789062, -0.459014892578125, 0.784393310546875, 1.8241806030273438, -0.18732452392578125, -0.7577972412109375, 0.8990554809570312, 2.4274978637695312, 0.16366958618164062, -0.9366798400878906, 0.6625900268554688, 0.4060821533203125, -0.12782669067382812, 0.86181640625, 1.0751113891601562, 0.15061187744140625, 2.4408721923828125, 0.5075855255126953, 0.8686370849609375, 0.17074203491210938, -0.3393287658691406, 1.3899078369140625, 0.5618553161621094, -0.5004119873046875, -0.7377777099609375, 0.37279510498046875, 0.5280532836914062, 0.8811721801757812, -0.9182510375976562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000178.npy"} +{"epoch": 0.2690854119425548, "step": 179, "batch_size": 64, "mean": 0.5256578922271729, "std": 0.8789588212966919, "min": -1.2897186279296875, "p10": -0.4819467544555663, "median": 0.5028772354125977, "p90": 1.3989624023437501, "max": 3.090850830078125, "pos_frac": 0.703125, "sample": [0.6212615966796875, -0.058807373046875, 1.13934326171875, 1.11553955078125, 0.6846542358398438, 0.6217098236083984, 0.3594169616699219, 1.1199188232421875, 3.090850830078125, 1.3115043640136719, 1.1563129425048828, 2.78900146484375, 0.5423946380615234, -0.35247039794921875, 0.7235794067382812, -0.3804664611816406, -0.9399871826171875, 0.23009681701660156, -0.10353469848632812, 1.1934661865234375, -0.06637382507324219, 0.6211357116699219, -0.9180450439453125, 0.46720123291015625, 1.9170875549316406, 0.286865234375, -0.8534469604492188, 0.8363037109375, -0.00244903564453125, -0.0013294219970703125, 0.6246852874755859, 1.3395004272460938, 0.6966629028320312, 0.02225494384765625, -0.1380767822265625, -0.645050048828125, 0.1603240966796875, 1.7341156005859375, 0.92218017578125, -0.3254241943359375, 0.5365505218505859, 0.6939697265625, 0.6162624359130859, 0.9810256958007812, 1.2119369506835938, 0.3538818359375, 1.2783050537109375, 0.1517791748046875, 1.4244461059570312, 2.635650634765625, 0.9012336730957031, 0.2789154052734375, 2.31341552734375, 0.4357337951660156, -0.21758460998535156, 0.8448333740234375, 0.13440704345703125, -0.35076904296875, 0.4692039489746094, -0.619781494140625, -1.2897186279296875, -0.3535346984863281, 0.19547271728515625, -0.5254383087158203], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000179.npy"} +{"epoch": 0.2705971277399849, "step": 180, "batch_size": 64, "mean": 0.4296168386936188, "std": 0.9299758672714233, "min": -2.5453643798828125, "p10": -0.7045623779296875, "median": 0.40313148498535156, "p90": 1.5271656036376955, "max": 3.0330352783203125, "pos_frac": 0.671875, "sample": [1.28887939453125, 0.718994140625, 0.8473415374755859, -0.69830322265625, 0.5216064453125, 1.1269683837890625, -0.5708847045898438, 0.4167747497558594, 0.6958770751953125, 0.008815765380859375, 1.0289669036865234, 1.1684036254882812, -1.09991455078125, -0.16535186767578125, 0.6548690795898438, 0.2149200439453125, -2.5453643798828125, 0.14422607421875, 0.3149757385253906, -0.5234146118164062, 2.10504150390625, 0.7249317169189453, 1.290283203125, -0.5986480712890625, 0.7116203308105469, 1.505645751953125, -0.20820999145507812, 0.7682266235351562, 0.27982521057128906, 1.0995769500732422, 0.9605255126953125, 1.6988983154296875, -1.2356796264648438, -0.0166778564453125, 1.1937217712402344, 0.6583633422851562, -0.17339324951171875, 1.2335052490234375, -0.002227783203125, 0.035564422607421875, 0.35985755920410156, 0.6803092956542969, 1.6869316101074219, -0.26828575134277344, -0.07848930358886719, 0.20395660400390625, 1.9297332763671875, 0.1903839111328125, 3.0330352783203125, -0.3765373229980469, 0.24718475341796875, -0.7970123291015625, 0.9664535522460938, -0.9922065734863281, -0.11699676513671875, 1.0433616638183594, 1.8433914184570312, -0.7265701293945312, 1.5363883972167969, -0.707244873046875, 0.7158966064453125, 0.38948822021484375, 1.3923568725585938, -0.23918724060058594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000180.npy"} +{"epoch": 0.272108843537415, "step": 181, "batch_size": 64, "mean": 0.686005711555481, "std": 0.8572390079498291, "min": -0.94293212890625, "p10": -0.4771114349365234, "median": 0.7824554443359375, "p90": 1.7189353942871097, "max": 2.6154327392578125, "pos_frac": 0.75, "sample": [-0.4473724365234375, 1.0276336669921875, -0.8945693969726562, -0.16596221923828125, 0.8198890686035156, 0.28844261169433594, 1.1622543334960938, -0.3346099853515625, 0.6950187683105469, 0.20770263671875, 0.35050201416015625, -0.14652633666992188, 1.0382118225097656, 1.07440185546875, 0.6155319213867188, 1.4652023315429688, 0.9719390869140625, 2.2204360961914062, 0.8178253173828125, -0.8555755615234375, -0.7265167236328125, 2.6154327392578125, 1.19293212890625, 1.0428848266601562, 1.5006484985351562, 1.9466400146484375, 0.30576324462890625, 2.2667770385742188, 0.522003173828125, -0.6941986083984375, 2.3311843872070312, 0.898101806640625, 1.65447998046875, -0.7455120086669922, 0.800445556640625, 0.18143463134765625, -0.0584869384765625, 1.5627288818359375, 0.5047225952148438, -0.4898567199707031, 1.4065322875976562, -0.0531768798828125, 1.7465591430664062, 1.4978561401367188, 1.3114280700683594, 0.15509033203125, 0.9266033172607422, 0.7092533111572266, 0.8566131591796875, -0.08912086486816406, 0.6668777465820312, 1.5369071960449219, 2.2289581298828125, 0.7257537841796875, -0.3360137939453125, 1.140218734741211, 0.49607086181640625, 0.8231201171875, 0.9568214416503906, 0.083404541015625, 0.76446533203125, -0.17769622802734375, 0.9487857818603516, -0.94293212890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000181.npy"} +{"epoch": 0.273620559334845, "step": 182, "batch_size": 64, "mean": 0.6177935004234314, "std": 0.9455261826515198, "min": -2.1849594116210938, "p10": -0.43222084045410153, "median": 0.6711387634277344, "p90": 1.7481086730957032, "max": 2.506622314453125, "pos_frac": 0.796875, "sample": [2.06976318359375, -0.4205436706542969, -0.3559284210205078, 0.09018707275390625, 1.6195487976074219, 1.8942642211914062, 0.5302276611328125, -0.5716552734375, -1.1637344360351562, 1.10577392578125, 1.2772541046142578, 0.20195388793945312, 2.506622314453125, -0.5510482788085938, 0.395721435546875, 0.963836669921875, 1.0626983642578125, 0.8795394897460938, 2.250030517578125, 0.2890434265136719, 0.48863792419433594, 0.1691570281982422, 0.721038818359375, 0.6883392333984375, 0.0672149658203125, -0.355560302734375, 1.4107303619384766, 1.01025390625, 1.4239501953125, 1.7328720092773438, 1.3236083984375, 0.9008159637451172, 0.7473907470703125, 0.5876235961914062, 0.43994140625, -0.437225341796875, 0.8932342529296875, 1.0045700073242188, 1.754638671875, 0.12790489196777344, 0.943634033203125, -0.7075328826904297, 2.0775070190429688, 0.06351470947265625, 1.4383621215820312, 2.1265869140625, 0.07977294921875, 0.6222763061523438, 0.6539382934570312, 0.35833740234375, 0.82037353515625, 1.609619140625, 0.051788330078125, 1.6406631469726562, 1.7266006469726562, 0.12348365783691406, -0.1647186279296875, 0.9172153472900391, 0.8304328918457031, 0.3553047180175781, -0.25067138671875, -2.1849594116210938, -0.3036003112792969, -2.0618362426757812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000182.npy"} +{"epoch": 0.2751322751322751, "step": 183, "batch_size": 64, "mean": 0.3405901789665222, "std": 0.857184886932373, "min": -1.666778564453125, "p10": -0.7142364501953122, "median": 0.3009500503540039, "p90": 1.4896888732910158, "max": 2.0160980224609375, "pos_frac": 0.625, "sample": [0.27349853515625, 1.1129302978515625, -0.470428466796875, -0.08874130249023438, 0.13425827026367188, -1.3867263793945312, 0.175994873046875, 1.7978363037109375, -0.22234344482421875, -0.1680755615234375, -1.666778564453125, -0.12471771240234375, 0.23340797424316406, -0.16326904296875, 0.5417537689208984, 0.9746551513671875, -0.3262443542480469, -0.9904937744140625, 1.394287109375, 0.7025203704833984, 1.3017349243164062, -1.0110015869140625, 1.67962646484375, -0.37999725341796875, 0.3213386535644531, -0.0319671630859375, 1.1699371337890625, 0.6628265380859375, -0.2362060546875, -1.4337921142578125, 2.0160980224609375, 0.9278411865234375, 0.06804656982421875, 0.44738006591796875, -0.420318603515625, 0.90252685546875, 0.722747802734375, 1.2354698181152344, -1.0612564086914062, 1.5124282836914062, -0.3667182922363281, 0.42462158203125, 0.8179855346679688, 0.41855812072753906, 0.2805614471435547, 0.4853324890136719, 0.7368659973144531, 0.3499736785888672, 0.061859130859375, -0.2838573455810547, -0.447845458984375, 1.8330535888671875, 0.074859619140625, 0.7136306762695312, 1.70257568359375, -0.28246116638183594, 1.4335098266601562, -0.8187255859375, -0.25267982482910156, -0.183990478515625, 1.635284423828125, 1.4366302490234375, 1.0995159149169922, 0.8024444580078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000183.npy"} +{"epoch": 0.2766439909297052, "step": 184, "batch_size": 64, "mean": 0.5154126882553101, "std": 0.7534717917442322, "min": -0.9894733428955078, "p10": -0.31069030761718747, "median": 0.3803577423095703, "p90": 1.4078580856323242, "max": 2.3208770751953125, "pos_frac": 0.75, "sample": [0.791778564453125, 2.0408401489257812, 1.2832145690917969, 0.9473381042480469, 0.9035491943359375, 0.246337890625, -0.5748786926269531, 0.25930023193359375, 0.3705406188964844, 0.9125442504882812, -0.1388416290283203, 0.8311996459960938, 0.05343055725097656, 0.5772209167480469, 0.9593315124511719, -0.42308807373046875, 1.3279571533203125, 2.0390701293945312, 0.4057655334472656, 2.1146697998046875, -0.173675537109375, 1.9969520568847656, 0.28449249267578125, 2.0951156616210938, 0.06465911865234375, 0.8130950927734375, -0.3264636993408203, 0.6205902099609375, 2.3208770751953125, 0.818328857421875, 0.4406890869140625, 0.23839569091796875, 0.4878387451171875, 0.26093292236328125, 0.0996551513671875, 0.1738128662109375, 1.414377212524414, 0.9512310028076172, -0.0523529052734375, 1.3841018676757812, -0.9894733428955078, 0.39017486572265625, 1.3926467895507812, -0.12750625610351562, 1.2805366516113281, 1.0859832763671875, 0.2147369384765625, -0.7731170654296875, 0.11986541748046875, 0.87640380859375, -0.27240943908691406, -0.0241851806640625, 0.05712890625, 0.7103500366210938, 0.7353057861328125, 0.2858123779296875, -0.7582206726074219, 0.7486343383789062, -0.27388572692871094, 0.0095672607421875, 0.01821136474609375, -0.10847663879394531, -0.4087638854980469, -0.04283905029296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000184.npy"} +{"epoch": 0.2781557067271353, "step": 185, "batch_size": 64, "mean": 0.6196208000183105, "std": 1.013339638710022, "min": -1.898712158203125, "p10": -0.41280345916748046, "median": 0.5029640197753906, "p90": 1.6883888244628908, "max": 3.383331298828125, "pos_frac": 0.765625, "sample": [0.4954338073730469, 3.383331298828125, 0.04654693603515625, 1.0222606658935547, 1.4486846923828125, -1.7053108215332031, 0.8039798736572266, -0.029003143310546875, 0.5066909790039062, 0.05223846435546875, 1.720123291015625, 1.6143417358398438, 0.5494537353515625, -0.236602783203125, 2.8413772583007812, 1.3299598693847656, 1.81121826171875, 0.4946784973144531, 0.4123802185058594, 1.0214862823486328, -0.38065338134765625, 0.9309539794921875, 0.3083839416503906, 1.4491195678710938, 0.499237060546875, 1.4468345642089844, 0.7558746337890625, 0.7370033264160156, 1.099853515625, 0.074127197265625, -0.19467735290527344, 0.44382476806640625, 0.6406650543212891, 1.5402374267578125, -0.8364048004150391, 1.6067886352539062, 0.65301513671875, 1.3722286224365234, 0.4110069274902344, 0.1273651123046875, -1.0693588256835938, 1.0300216674804688, -0.42478370666503906, 1.18035888671875, 1.0160598754882812, -1.898712158203125, 1.2926712036132812, 0.4105682373046875, 1.7985000610351562, -0.1323089599609375, 0.1411285400390625, -1.2650642395019531, 3.2546005249023438, -0.38484954833984375, -0.1009979248046875, 0.30425453186035156, -0.5367698669433594, -0.3030414581298828, 0.08979034423828125, 0.21852874755859375, 0.07391166687011719, 0.9724311828613281, 2.1615371704101562, 1.5592021942138672], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000185.npy"} +{"epoch": 0.2796674225245654, "step": 186, "batch_size": 64, "mean": 0.5566080808639526, "std": 0.8811169266700745, "min": -1.2593154907226562, "p10": -0.5382678985595702, "median": 0.5251026153564453, "p90": 1.6977512359619142, "max": 2.5534439086914062, "pos_frac": 0.703125, "sample": [1.4467315673828125, 2.5534439086914062, -1.2593154907226562, 1.3368148803710938, 0.11893463134765625, 0.617034912109375, 1.2085113525390625, -0.48456573486328125, -0.4485931396484375, 1.0809555053710938, -0.128936767578125, -0.6351509094238281, 0.329803466796875, 1.5799694061279297, 1.926025390625, 1.5798892974853516, 0.5247459411621094, 0.09122467041015625, 0.0468902587890625, -0.1422576904296875, 0.1220245361328125, -0.29497718811035156, 1.1448030471801758, 2.0042076110839844, -1.2166900634765625, -0.5745162963867188, -0.3052024841308594, -0.5612831115722656, 0.5428619384765625, 0.78045654296875, 1.0668792724609375, 1.7283744812011719, 0.949127197265625, -0.003143310546875, -0.08103561401367188, -0.32224082946777344, 0.9452133178710938, 1.6262969970703125, 1.2549705505371094, 1.3665733337402344, 0.3487815856933594, -0.41214752197265625, -0.47240257263183594, 0.5278549194335938, 0.2947540283203125, -0.056549072265625, 0.9340839385986328, 0.9001312255859375, -0.6952362060546875, 0.36005401611328125, 2.1042022705078125, 0.5923366546630859, 0.5254592895507812, 2.491973876953125, 0.05078125, 0.359039306640625, 2.1665496826171875, 0.9232368469238281, 0.28513336181640625, 0.9357223510742188, 1.1790771484375, 0.4326629638671875, -0.5927886962890625, 0.9253501892089844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000186.npy"} +{"epoch": 0.2811791383219955, "step": 187, "batch_size": 64, "mean": 0.6642183661460876, "std": 1.2274832725524902, "min": -2.7777099609375, "p10": -0.42584724426269516, "median": 0.6741943359375, "p90": 1.8920364379882817, "max": 4.2911376953125, "pos_frac": 0.8125, "sample": [1.0502243041992188, 0.022735595703125, 3.0506210327148438, 1.7661056518554688, 3.736572265625, 1.07965087890625, 1.613861083984375, 0.13578414916992188, 1.5128326416015625, 1.0731658935546875, -0.291534423828125, 2.529266357421875, 0.08991622924804688, 2.3109588623046875, 1.7415313720703125, 0.7265396118164062, 0.18227767944335938, 1.4230499267578125, 0.7380523681640625, 0.7144966125488281, -2.7777099609375, -1.7036018371582031, 1.9460067749023438, 0.12345695495605469, 1.0386428833007812, -0.16520309448242188, 0.10011100769042969, 0.1861724853515625, 0.705047607421875, 0.8679580688476562, 0.9402236938476562, 1.4747238159179688, -0.9596099853515625, 0.4440193176269531, 1.3602676391601562, 3.2446365356445312, 0.3241920471191406, 0.08008575439453125, 0.26213836669921875, 0.5170192718505859, -0.4834098815917969, -0.5411529541015625, 1.1923904418945312, -0.24419784545898438, 0.126190185546875, 0.23990631103515625, 0.659881591796875, 0.05507659912109375, 1.6292877197265625, 1.29998779296875, 1.2153701782226562, 0.1192474365234375, 0.9070205688476562, 4.2911376953125, 0.22766494750976562, -0.0876312255859375, 0.7548370361328125, 0.688507080078125, -0.07066726684570312, 0.1627044677734375, 0.00855255126953125, -1.0261287689208984, -2.670166015625, 0.84088134765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000187.npy"} +{"epoch": 0.28269085411942557, "step": 188, "batch_size": 64, "mean": 0.56996750831604, "std": 1.0339672565460205, "min": -3.7782745361328125, "p10": -0.392462921142578, "median": 0.5694789886474609, "p90": 1.7502307891845705, "max": 2.664562225341797, "pos_frac": 0.734375, "sample": [-0.01702117919921875, 0.1944446563720703, 1.3048934936523438, -0.7297248840332031, 0.39111328125, 1.1419448852539062, 1.618927001953125, 0.545379638671875, -0.580474853515625, -0.03045654296875, 0.5935783386230469, 1.4816741943359375, 1.206939697265625, 0.05896759033203125, 0.04686164855957031, 0.4838409423828125, 1.8492889404296875, 1.5239715576171875, -0.18920135498046875, 0.07283782958984375, 0.3984184265136719, -0.305267333984375, 0.4506683349609375, 0.27056884765625, 0.6084461212158203, 0.8410415649414062, -0.5263881683349609, 1.2249755859375, -0.0994110107421875, 1.6445236206054688, 1.427825927734375, 0.33162689208984375, 1.268402099609375, 1.6903762817382812, 1.7804183959960938, -0.09266090393066406, 2.1910781860351562, 0.8027191162109375, -0.42983245849609375, -0.2782440185546875, 2.664562225341797, 0.7692031860351562, 0.4847393035888672, 0.7029876708984375, -0.193145751953125, -1.0167388916015625, 0.7902774810791016, 1.3783111572265625, 1.0393104553222656, 0.5237903594970703, 1.81072998046875, -2.4162750244140625, 0.371246337890625, 0.9893760681152344, 1.13311767578125, 1.2364425659179688, 0.3853626251220703, -0.2207965850830078, 1.7758827209472656, 2.0209999084472656, 0.7765865325927734, 1.3423538208007812, -0.2592010498046875, -3.7782745361328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000188.npy"} +{"epoch": 0.2842025699168556, "step": 189, "batch_size": 64, "mean": 0.6669397950172424, "std": 0.8434544801712036, "min": -1.3483238220214844, "p10": -0.23953094482421874, "median": 0.6790761947631836, "p90": 1.7717821121215824, "max": 2.68426513671875, "pos_frac": 0.78125, "sample": [0.352783203125, 0.9773216247558594, 0.1434803009033203, -0.22802734375, 0.6924285888671875, 1.528900146484375, 1.1546478271484375, 0.7936573028564453, -0.03820610046386719, 1.0755615234375, -0.8367919921875, 1.205291748046875, -0.369964599609375, 1.0385055541992188, 0.6946640014648438, -0.2190990447998047, 2.5667266845703125, 1.447662353515625, -1.3483238220214844, 0.729156494140625, 0.5531234741210938, 1.4710426330566406, 0.0126953125, 0.886688232421875, 1.009063720703125, 0.6354331970214844, 0.7737197875976562, 0.5565719604492188, 0.8119125366210938, 0.13043212890625, 1.1020660400390625, 0.1992034912109375, 0.7945098876953125, 2.460918426513672, 2.5208663940429688, 0.7478790283203125, 0.32161712646484375, 0.59100341796875, 0.08191108703613281, 0.9085922241210938, 1.6141948699951172, -0.417236328125, 0.024343490600585938, -0.06134033203125, 0.5900726318359375, 0.9924964904785156, -0.09898757934570312, -0.4779071807861328, 0.160491943359375, 0.5046615600585938, -0.2444610595703125, 1.9155330657958984, 0.226104736328125, 1.8160324096679688, 1.2480316162109375, -0.16089630126953125, -0.8500747680664062, -0.22269058227539062, 0.6657238006591797, 2.68426513671875, 0.1705150604248047, 0.9940567016601562, 2.0130615234375, 1.6685314178466797], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000189.npy"} +{"epoch": 0.2857142857142857, "step": 190, "batch_size": 64, "mean": 0.6427220106124878, "std": 0.9766984581947327, "min": -2.8711395263671875, "p10": -0.26463909149169923, "median": 0.4002408981323242, "p90": 2.1232643127441406, "max": 2.5560455322265625, "pos_frac": 0.75, "sample": [0.6546897888183594, 1.6533050537109375, 2.17498779296875, -2.8711395263671875, 1.0503883361816406, 0.8506698608398438, -0.2821502685546875, -0.04552459716796875, -0.49770545959472656, 2.08056640625, 0.41825103759765625, 0.907501220703125, 0.27027320861816406, 0.0186767578125, 0.3716468811035156, 0.156341552734375, 0.11790084838867188, 1.155853271484375, 0.5938453674316406, 0.7286529541015625, 1.0592079162597656, 2.3097686767578125, -0.207977294921875, -0.13586807250976562, -0.2627372741699219, 0.9920883178710938, -1.0362396240234375, 0.4054145812988281, 0.09537506103515625, 0.3283405303955078, 1.8483200073242188, -0.31215667724609375, 0.42469024658203125, 1.548248291015625, 0.3316497802734375, 0.10074996948242188, 0.8256988525390625, 0.09292411804199219, 0.1540679931640625, 0.2630882263183594, 0.27205657958984375, 2.3332977294921875, 2.5560455322265625, 1.296173095703125, -0.016933441162109375, -0.2613792419433594, -0.2654132843017578, 2.072601318359375, 0.2640838623046875, -0.36401939392089844, 0.352813720703125, 0.7628498077392578, 0.3950672149658203, 0.7246856689453125, 1.822845458984375, 1.91644287109375, 2.163665771484375, -0.03174591064453125, 2.1455535888671875, 2.1415634155273438, -0.2628326416015625, 1.9359130859375, -0.157562255859375, 1.0067520141601562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000190.npy"} +{"epoch": 0.2872260015117158, "step": 191, "batch_size": 64, "mean": 0.6305709481239319, "std": 0.9767934083938599, "min": -2.0642852783203125, "p10": -0.5241058349609374, "median": 0.6512012481689453, "p90": 1.933936309814454, "max": 3.19488525390625, "pos_frac": 0.796875, "sample": [1.0642547607421875, -0.23376083374023438, -1.5179672241210938, 0.555755615234375, 2.5230636596679688, 1.1681900024414062, 0.86651611328125, 1.7377395629882812, 0.8977832794189453, 0.5497970581054688, 1.1508636474609375, 2.0180206298828125, 1.3667373657226562, 1.2016792297363281, 0.6097869873046875, 0.7114124298095703, -1.1001358032226562, -1.1687469482421875, 3.19488525390625, 0.8410148620605469, 0.916900634765625, 2.1043701171875, 0.3828754425048828, 0.8428192138671875, -0.8698806762695312, 1.5007247924804688, 0.4273529052734375, 2.1849288940429688, -0.380859375, 0.2942066192626953, 0.6274127960205078, -0.5290679931640625, 0.1037445068359375, 1.0810546875, 2.6809539794921875, -0.9386138916015625, 0.7321205139160156, -0.5125274658203125, 2.026111602783203, 0.8110427856445312, 0.5829887390136719, 0.39933013916015625, 0.2201080322265625, 1.00054931640625, 0.6749897003173828, 0.4618873596191406, 1.232147216796875, 1.0168113708496094, 0.0054759979248046875, 0.40355682373046875, -0.2712211608886719, 0.5919532775878906, 0.79534912109375, 0.9044647216796875, -0.3296928405761719, 1.337005615234375, 1.6648941040039062, -2.0642852783203125, 0.4038066864013672, -0.43549346923828125, 0.14533233642578125, 0.87969970703125, 0.4381561279296875, 0.37616729736328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000191.npy"} +{"epoch": 0.2887377173091459, "step": 192, "batch_size": 64, "mean": 0.4744090139865875, "std": 0.8610674738883972, "min": -1.7027626037597656, "p10": -0.5543357849121093, "median": 0.423736572265625, "p90": 1.6416988372802737, "max": 2.554483413696289, "pos_frac": 0.703125, "sample": [-1.3434371948242188, -0.9678001403808594, -0.13767623901367188, 1.7415523529052734, 0.7858047485351562, -0.11540603637695312, -0.7057952880859375, 1.4821929931640625, 1.8540191650390625, 0.9617156982421875, -0.8642921447753906, 0.6769943237304688, 0.2721366882324219, -0.14351463317871094, 1.4692420959472656, 1.249420166015625, 1.6630363464355469, 0.1385345458984375, 0.27721595764160156, 0.0053558349609375, 0.7183513641357422, 1.4403457641601562, 0.086273193359375, 0.27024269104003906, 0.6862030029296875, 0.13513755798339844, -0.1119537353515625, -0.378021240234375, 0.62677001953125, 0.035060882568359375, 1.365936279296875, 0.5598983764648438, 0.7050342559814453, 0.4919166564941406, 0.2239513397216797, 0.2890148162841797, 1.126312255859375, -0.10187721252441406, 0.2537841796875, 0.45714569091796875, 0.5930042266845703, -0.053646087646484375, -1.7027626037597656, 2.554483413696289, 2.3036041259765625, 0.5371952056884766, -0.36487770080566406, 0.8172187805175781, -0.05011749267578125, -0.70098876953125, 0.42010498046875, 1.6849365234375, 0.2792816162109375, 1.5919113159179688, 0.4404296875, 0.4273681640625, 1.5193443298339844, -0.5888557434082031, 1.9544296264648438, 1.5162506103515625, 0.6891574859619141, -0.4737892150878906, -0.17513275146484375, -0.035198211669921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000192.npy"} +{"epoch": 0.29024943310657597, "step": 193, "batch_size": 64, "mean": 0.6579139232635498, "std": 0.9208496809005737, "min": -1.3756637573242188, "p10": -0.2575939178466797, "median": 0.6739788055419922, "p90": 1.6731164932250977, "max": 3.6728973388671875, "pos_frac": 0.78125, "sample": [0.10291671752929688, 0.8064517974853516, 0.6905860900878906, 1.5571136474609375, 0.2733879089355469, 0.9205474853515625, 0.6573715209960938, 0.93487548828125, -0.370849609375, 0.2678565979003906, 1.7755203247070312, -0.2378101348876953, 1.103302001953125, 0.9472427368164062, 0.8900985717773438, 0.10250091552734375, 1.2666397094726562, -0.1023406982421875, 0.6923866271972656, 1.0002479553222656, -0.5869674682617188, 1.8787288665771484, 3.4993743896484375, 0.07134628295898438, 1.6774520874023438, 0.7664031982421875, 0.15613174438476562, 1.2217254638671875, 0.39136505126953125, 0.00771331787109375, 2.586139678955078, 0.7089614868164062, -0.13516616821289062, 0.35009002685546875, 0.83843994140625, -0.26428985595703125, -0.8391494750976562, 0.4300804138183594, 0.5478591918945312, 0.746429443359375, 0.935272216796875, -0.24197006225585938, 0.7002182006835938, 3.6728973388671875, -0.46967124938964844, -0.6937713623046875, 0.4391040802001953, -0.0003948211669921875, 0.07172966003417969, 1.5812606811523438, 1.3244857788085938, -0.22388458251953125, 1.3201522827148438, 1.6630001068115234, 2.4356307983398438, 1.4163055419921875, 0.1726531982421875, -1.3756637573242188, 0.31391143798828125, 0.6955585479736328, 0.26123809814453125, 0.742767333984375, -0.06534576416015625, 0.10029220581054688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000193.npy"} +{"epoch": 0.29176114890400606, "step": 194, "batch_size": 64, "mean": 0.5165130496025085, "std": 0.9875158667564392, "min": -1.5929737091064453, "p10": -0.7176063537597657, "median": 0.5373001098632812, "p90": 1.5898225784301758, "max": 3.9434661865234375, "pos_frac": 0.671875, "sample": [0.5348052978515625, 0.9108123779296875, 0.8679542541503906, -0.4380359649658203, 0.0800628662109375, 0.48239898681640625, -0.37553977966308594, 0.3633460998535156, 0.2831001281738281, 0.89794921875, 0.6559906005859375, 1.2542800903320312, -0.1108551025390625, 1.001251220703125, -0.7172164916992188, -1.119598388671875, 1.6995925903320312, -0.9361953735351562, 0.21418380737304688, -1.0422592163085938, 0.539794921875, 0.4359169006347656, 1.2692489624023438, 1.175201416015625, -0.36766815185546875, 1.2105712890625, -0.062408447265625, 0.547271728515625, -0.0801849365234375, 1.19317626953125, -0.6328601837158203, 1.2487030029296875, -0.8151664733886719, -0.7177734375, -0.9381179809570312, -0.15231895446777344, 1.6045665740966797, 2.5481414794921875, 0.7810020446777344, 2.1772193908691406, -1.5929737091064453, 1.3623580932617188, 0.7751102447509766, 0.273193359375, 1.232757568359375, 1.852457046508789, 1.091268539428711, -0.28125762939453125, 2.3601913452148438, 0.573577880859375, 1.555419921875, -0.361602783203125, 1.5121841430664062, 1.1904830932617188, 0.1185150146484375, 3.9434661865234375, 0.5596294403076172, -0.2762260437011719, -0.0723419189453125, 0.91015625, 0.5668106079101562, 0.5021743774414062, 0.24764251708984375, -0.4565010070800781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000194.npy"} +{"epoch": 0.29327286470143615, "step": 195, "batch_size": 64, "mean": 0.44856685400009155, "std": 0.9266215562820435, "min": -2.39300537109375, "p10": -0.546808624267578, "median": 0.4068775177001953, "p90": 1.6056297302246094, "max": 2.360912322998047, "pos_frac": 0.671875, "sample": [0.9455642700195312, 0.7278347015380859, -0.44197845458984375, -0.88177490234375, 2.1827239990234375, 0.308746337890625, 1.0076313018798828, -0.12896728515625, 1.5719146728515625, 0.6574554443359375, 1.6291046142578125, 1.4471855163574219, -1.4815292358398438, -0.25218963623046875, -0.3091888427734375, -2.39300537109375, -0.20879173278808594, -0.18941497802734375, 0.05630302429199219, 1.6200790405273438, 0.19287109375, 1.3717803955078125, 1.0624561309814453, 1.004302978515625, 0.18807220458984375, 0.20243263244628906, 1.0498542785644531, 1.8549308776855469, -0.01535797119140625, 0.6637535095214844, -0.09146690368652344, 0.19382095336914062, 2.360912322998047, 0.8107414245605469, 1.2143402099609375, 0.3648834228515625, 0.03632545471191406, 1.1518745422363281, 0.9198532104492188, 0.7595062255859375, -0.007495880126953125, -0.9188156127929688, 0.47229766845703125, 0.4008026123046875, 0.3899078369140625, 0.2427196502685547, -1.762237548828125, 1.3556442260742188, -1.2770233154296875, -0.19108009338378906, -0.0214996337890625, 0.9557342529296875, 1.8048782348632812, 1.2240562438964844, -0.152069091796875, 1.6814098358154297, 1.506256103515625, 0.6108245849609375, -0.59173583984375, 0.8182449340820312, 0.4129524230957031, -0.05815887451171875, -0.2812042236328125, 0.9302806854248047], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000195.npy"} +{"epoch": 0.2947845804988662, "step": 196, "batch_size": 64, "mean": 0.5237653255462646, "std": 0.9700738191604614, "min": -1.9887619018554688, "p10": -0.5823692321777344, "median": 0.5782337188720703, "p90": 1.9719062805175793, "max": 2.54052734375, "pos_frac": 0.6875, "sample": [-0.28802490234375, -1.9887619018554688, -0.7927875518798828, 1.124542236328125, 0.07363700866699219, -0.4627838134765625, 0.39926910400390625, 0.0751495361328125, 0.34275054931640625, -0.3825721740722656, 1.2156982421875, 0.958709716796875, 1.1784095764160156, 0.5631294250488281, 0.9147415161132812, -0.5684890747070312, -1.1271858215332031, 1.3056640625, -1.6610794067382812, 2.54052734375, -0.6690425872802734, -0.17458724975585938, 1.2303848266601562, 0.08893203735351562, -0.24153709411621094, 0.8974075317382812, 0.4868621826171875, 1.4752349853515625, 1.6095428466796875, 2.2042694091796875, -0.29316139221191406, 1.1119918823242188, 0.665008544921875, -0.16727066040039062, 0.6424407958984375, -0.29084014892578125, 0.06859207153320312, 0.45632362365722656, 0.7513236999511719, 2.2403106689453125, 0.5933380126953125, 0.6411590576171875, 0.45064544677734375, 1.0746345520019531, 1.2649612426757812, -0.9656333923339844, -0.1967906951904297, 0.917327880859375, 0.9307785034179688, 2.270477294921875, 0.6397552490234375, 1.6926422119140625, -0.58831787109375, -0.03040313720703125, 0.5953598022460938, 2.5166015625, 2.135486602783203, 2.0915908813476562, -0.25341796875, 0.7961502075195312, 0.33791351318359375, -0.11482620239257812, 0.0638427734375, 1.1449737548828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000196.npy"} +{"epoch": 0.2962962962962963, "step": 197, "batch_size": 64, "mean": 0.6141484975814819, "std": 1.1714394092559814, "min": -2.56005859375, "p10": -0.6017564773559569, "median": 0.40694665908813477, "p90": 2.167148780822754, "max": 3.793182373046875, "pos_frac": 0.71875, "sample": [-0.48117828369140625, 1.939605712890625, 0.5229740142822266, 0.3026885986328125, -0.45654296875, 0.07350921630859375, -0.7793083190917969, -2.56005859375, 0.38295745849609375, -0.07740020751953125, 2.606121063232422, 1.0057811737060547, -0.6599769592285156, -0.4099159240722656, -0.021112442016601562, 2.1829891204833984, -0.2957305908203125, 0.8775444030761719, 1.51763916015625, 2.2247085571289062, -0.04889678955078125, 0.6575469970703125, 1.8135871887207031, 0.25627899169921875, 0.7531661987304688, 0.18578338623046875, 0.1903839111328125, -0.37030982971191406, 1.2617931365966797, 1.9705429077148438, 1.0849800109863281, 2.7286224365234375, -1.916778564453125, 1.3063831329345703, -0.6561355590820312, -2.171550750732422, 0.7236251831054688, 0.490814208984375, 0.306396484375, 1.8683624267578125, 0.2127246856689453, 2.13018798828125, 1.1757049560546875, -0.3095550537109375, -0.3349151611328125, 2.2195587158203125, 0.33062744140625, 0.2862052917480469, 0.5056304931640625, 1.5150184631347656, -0.6534328460693359, -0.23751068115234375, 0.9678611755371094, 1.140960693359375, 0.4309358596801758, 0.17496490478515625, 3.793182373046875, 0.2860260009765625, 0.3276634216308594, 0.7074127197265625, 2.9601898193359375, 0.3712158203125, 1.29620361328125, 1.6787548065185547], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000197.npy"} +{"epoch": 0.29780801209372637, "step": 198, "batch_size": 64, "mean": 0.796296238899231, "std": 1.0114785432815552, "min": -2.0018463134765625, "p10": -0.346173095703125, "median": 0.7516098022460938, "p90": 2.121853828430176, "max": 3.4684295654296875, "pos_frac": 0.78125, "sample": [-0.5992584228515625, 1.2897109985351562, 0.22649192810058594, 0.8663711547851562, 0.10358238220214844, 1.1703720092773438, -0.8540802001953125, 1.3253860473632812, 0.28098106384277344, 0.4017791748046875, 2.4639129638671875, 0.230560302734375, -0.1836986541748047, -0.37767982482910156, 2.1312808990478516, 1.5519371032714844, 1.68255615234375, -0.71307373046875, 0.483551025390625, 0.11319160461425781, 0.6547698974609375, 1.171621322631836, 1.755910873413086, -0.2252941131591797, 2.566253662109375, -0.7348480224609375, -0.31179046630859375, -0.36090850830078125, 1.6584854125976562, 0.5396404266357422, 0.6545791625976562, 1.5543975830078125, 0.3953704833984375, -2.0018463134765625, 0.02764129638671875, -0.06536865234375, -0.04987907409667969, 0.9926738739013672, 0.5108871459960938, 0.747772216796875, 0.7575607299804688, 0.7898483276367188, 2.4680328369140625, 0.9313850402832031, 1.2336692810058594, 1.173553466796875, 0.22557830810546875, 1.4657344818115234, 3.1021156311035156, 2.1508941650390625, 1.872833251953125, 0.33884429931640625, -0.08901214599609375, 1.4174842834472656, 0.833282470703125, 2.0561981201171875, 1.1129608154296875, 1.2338809967041016, -0.17580032348632812, 0.177215576171875, 0.48902130126953125, 2.0998573303222656, 3.4684295654296875, 0.7554473876953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000198.npy"} +{"epoch": 0.29931972789115646, "step": 199, "batch_size": 64, "mean": 0.6032347679138184, "std": 0.9973053932189941, "min": -1.1778888702392578, "p10": -0.661488628387451, "median": 0.6409225463867188, "p90": 1.78992919921875, "max": 3.5967559814453125, "pos_frac": 0.71875, "sample": [0.08472061157226562, 0.6353225708007812, -0.377685546875, 0.5508823394775391, 0.18834877014160156, -1.1459884643554688, 3.5967559814453125, 1.5759963989257812, 0.9652633666992188, 1.4195556640625, 1.83135986328125, -0.29778289794921875, 1.9221916198730469, 1.4312934875488281, 1.782196044921875, 1.215576171875, 2.0732460021972656, 1.7677764892578125, 1.793243408203125, 1.6510734558105469, -0.3926544189453125, -0.7079868316650391, 1.23468017578125, 0.648529052734375, -0.007549285888671875, 0.699615478515625, 0.9675388336181641, -0.3572845458984375, 0.98876953125, 0.0483245849609375, 0.6465225219726562, 0.2389373779296875, 3.2746238708496094, 0.562286376953125, -0.5529928207397461, 0.0155487060546875, 0.984588623046875, -0.7176704406738281, -0.449615478515625, 0.0898895263671875, -1.1778888702392578, 0.89617919921875, 0.241729736328125, 0.4018402099609375, -1.061370849609375, 1.5658493041992188, 0.6831779479980469, 1.0162506103515625, -0.13898086547851562, -0.44149017333984375, -0.017549514770507812, -1.000579833984375, 0.9761581420898438, 1.4734992980957031, -0.840972900390625, -0.47728729248046875, 1.1207637786865234, 0.5087661743164062, 0.7364044189453125, 0.6572322845458984, 1.2426109313964844, 0.17478561401367188, 1.8988876342773438, 0.2915668487548828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000199.npy"} +{"epoch": 0.30083144368858655, "step": 200, "batch_size": 64, "mean": 0.7240477800369263, "std": 1.1198185682296753, "min": -1.731292724609375, "p10": -0.5675888061523438, "median": 0.5972671508789062, "p90": 2.1331558227539062, "max": 3.78961181640625, "pos_frac": 0.734375, "sample": [1.3416061401367188, 2.146087646484375, 3.78961181640625, 0.02759552001953125, 1.7567596435546875, 0.4865589141845703, 0.606658935546875, -0.6594161987304688, 1.6078338623046875, -0.1816864013671875, -0.7551116943359375, 0.669464111328125, -0.056079864501953125, 3.061920166015625, 0.0545806884765625, -0.1811065673828125, 0.6289520263671875, -0.10326766967773438, 1.9570846557617188, -1.731292724609375, -0.5365676879882812, 0.5541534423828125, 1.6056747436523438, 3.1223907470703125, -0.09096145629882812, 1.2154178619384766, 0.6206760406494141, 0.6603622436523438, 1.1233787536621094, 0.6288795471191406, 2.0035247802734375, 0.6541290283203125, 0.5670356750488281, 2.5000762939453125, 1.2344169616699219, 0.2489604949951172, -1.0343170166015625, 1.441009521484375, 2.7358551025390625, 1.2466678619384766, 1.6239700317382812, 0.4015960693359375, 2.6893692016601562, 0.4815673828125, 0.5878753662109375, -1.4124603271484375, 0.0145263671875, 0.43658447265625, 0.1621875762939453, 1.7573013305664062, -0.5674819946289062, 0.4034309387207031, 1.6251544952392578, -0.3942832946777344, 0.8104019165039062, 0.42169189453125, -0.14945602416992188, -0.5676345825195312, -0.21770477294921875, 0.8712196350097656, 0.18786239624023438, -0.7604904174804688, 2.1029815673828125, 0.8633346557617188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000200.npy"} +{"epoch": 0.30234315948601664, "step": 201, "batch_size": 64, "mean": 0.6285424828529358, "std": 0.901455819606781, "min": -0.9816513061523438, "p10": -0.402179718017578, "median": 0.5927495956420898, "p90": 1.8086120605468752, "max": 2.7253456115722656, "pos_frac": 0.71875, "sample": [0.8717803955078125, 1.2631912231445312, 0.5214653015136719, 0.14649200439453125, -0.2380962371826172, 1.1807136535644531, 2.5871315002441406, 0.0769195556640625, 0.7106571197509766, 0.165863037109375, 0.48322296142578125, 1.065988540649414, 0.14113235473632812, 1.1612396240234375, 0.6100540161132812, 1.0421142578125, 2.2765579223632812, 1.1644134521484375, -0.05548095703125, 2.7253456115722656, -0.22613525390625, 0.6178817749023438, 0.1047515869140625, -0.8627357482910156, 0.5754451751708984, 2.037933349609375, 0.2882881164550781, -0.02628326416015625, -0.7746601104736328, 0.628631591796875, 0.37622642517089844, 0.8577880859375, 0.145172119140625, -0.9816513061523438, 1.3551464080810547, 2.265422821044922, -0.8712158203125, -0.4440765380859375, -0.1998443603515625, -0.30442047119140625, 1.3773956298828125, 1.1744441986083984, 0.008069992065429688, 0.7617073059082031, -0.30039405822753906, 1.6754150390625, 0.499908447265625, -0.007537841796875, 0.723052978515625, 1.829742431640625, -0.21186256408691406, 2.6851348876953125, -0.002925872802734375, 1.759307861328125, 0.2341327667236328, 1.4759445190429688, 0.7612686157226562, -0.49800872802734375, -0.05108642578125, 1.4571990966796875, 1.6074943542480469, 0.7455902099609375, 0.9163360595703125, -0.8559799194335938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000201.npy"} +{"epoch": 0.30385487528344673, "step": 202, "batch_size": 64, "mean": 0.7447031736373901, "std": 0.9737898707389832, "min": -1.2753753662109375, "p10": -0.4112009048461914, "median": 0.6919937133789062, "p90": 2.010543632507325, "max": 3.2022972106933594, "pos_frac": 0.75, "sample": [-0.4205513000488281, -0.2219696044921875, 2.837268829345703, 1.735321044921875, 1.3433170318603516, 0.05698394775390625, 0.8873062133789062, 0.84844970703125, 0.31508827209472656, 0.9798202514648438, -0.12136459350585938, 1.0344123840332031, 0.4105033874511719, 0.6207752227783203, -0.1226654052734375, 1.1758651733398438, 1.7668685913085938, -0.2311553955078125, 0.1402587890625, -0.7599945068359375, 1.1118392944335938, 0.9645957946777344, 1.3496780395507812, 0.23603057861328125, -0.38257789611816406, 2.3750457763671875, 0.6347084045410156, 3.2022972106933594, 0.6147689819335938, 2.0683460235595703, -0.09391021728515625, 1.5535392761230469, 0.5194034576416016, 0.531036376953125, 0.6667022705078125, -1.2753753662109375, 0.9791412353515625, 0.7884349822998047, 0.5884590148925781, -1.0426368713378906, 1.87567138671875, 1.4183349609375, 0.2937602996826172, 0.7648239135742188, 0.41295623779296875, 2.3292312622070312, 1.305633544921875, 0.8703346252441406, 1.2043533325195312, 0.0417327880859375, -0.018220901489257812, -0.5563774108886719, -0.38938331604003906, -0.8449649810791016, 1.7476654052734375, -0.0212860107421875, 2.2663421630859375, 0.5492439270019531, 0.71728515625, 0.86114501953125, -0.5213661193847656, 3.1641006469726562, 0.8163108825683594, 1.7096118927001953], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000202.npy"} +{"epoch": 0.30536659108087677, "step": 203, "batch_size": 64, "mean": 0.454935759305954, "std": 0.9786709547042847, "min": -1.9995346069335938, "p10": -0.794769287109375, "median": 0.43938732147216797, "p90": 1.6221939086914068, "max": 2.915557861328125, "pos_frac": 0.671875, "sample": [1.3872604370117188, 0.179840087890625, -1.9995346069335938, 0.7113037109375, -0.6156768798828125, -0.05597686767578125, -0.35706329345703125, 0.9525146484375, -0.391082763671875, 0.668701171875, 0.8843536376953125, 1.0200920104980469, 0.2795085906982422, 1.7173919677734375, -0.13742637634277344, 2.7802276611328125, 1.0751876831054688, -0.883758544921875, 1.3335800170898438, 0.566375732421875, -1.20623779296875, -0.44336700439453125, 1.6754684448242188, -0.36034584045410156, -0.13276100158691406, -0.783447265625, 0.02538299560546875, 2.915557861328125, -0.21697235107421875, 1.0822219848632812, 1.7142486572265625, 0.6957931518554688, 1.5168991088867188, 0.3612689971923828, -0.2631988525390625, 0.6294021606445312, 1.34075927734375, 1.2945480346679688, 1.04901123046875, 0.14705276489257812, 1.1382827758789062, -0.577728271484375, 1.3721160888671875, 0.3100128173828125, 0.5175056457519531, -0.8509597778320312, 0.9635772705078125, 0.8015899658203125, 0.318145751953125, 2.756103515625, 0.18779563903808594, 0.15062713623046875, -0.03563690185546875, 1.0219497680664062, 0.6844482421875, -1.205404281616211, -0.8582839965820312, 1.33154296875, 1.6673202514648438, 0.5410079956054688, 0.018642425537109375, -0.79962158203125, -0.6590042114257812, 0.16475677490234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000203.npy"} +{"epoch": 0.30687830687830686, "step": 204, "batch_size": 64, "mean": 0.6477770805358887, "std": 1.1246488094329834, "min": -1.80419921875, "p10": -0.7747045516967772, "median": 0.6065673828125, "p90": 1.837790679931641, "max": 4.0659637451171875, "pos_frac": 0.71875, "sample": [4.0659637451171875, -0.3197479248046875, -0.6876010894775391, 1.1666755676269531, 0.2278308868408203, 3.6001434326171875, 0.598114013671875, 0.5167808532714844, -0.41611480712890625, -1.127227783203125, 0.905670166015625, -1.1181259155273438, -0.35449790954589844, 1.3461036682128906, 0.03049468994140625, 2.5128707885742188, 1.3222274780273438, 0.058078765869140625, -0.939422607421875, -0.8120346069335938, 1.1654167175292969, -0.29392242431640625, 0.730926513671875, 1.8714065551757812, 0.24886703491210938, -0.18206787109375, 1.099395751953125, 1.0486087799072266, 0.3761405944824219, 0.7190017700195312, 2.3213558197021484, 0.615020751953125, 0.7171630859375, 1.57232666015625, 1.1821060180664062, 0.1281566619873047, 0.812042236328125, 2.8690719604492188, -0.5772438049316406, 1.5167312622070312, -0.3213615417480469, -0.048358917236328125, 1.408233642578125, 0.6853504180908203, -1.1857948303222656, 1.4738082885742188, 0.2880096435546875, -0.9998149871826172, 0.4395599365234375, 0.4746360778808594, 1.7593536376953125, 2.1100082397460938, 0.4284477233886719, 1.730133056640625, 0.80633544921875, 1.4912757873535156, 1.6981735229492188, 0.5531158447265625, -0.14180755615234375, 0.337493896484375, -1.80419921875, -0.34721946716308594, 1.4068336486816406, 0.6988372802734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000204.npy"} +{"epoch": 0.30839002267573695, "step": 205, "batch_size": 64, "mean": 0.4502222239971161, "std": 1.1173069477081299, "min": -3.481689453125, "p10": -0.8779598236083983, "median": 0.6769981384277344, "p90": 1.826274871826172, "max": 2.63629150390625, "pos_frac": 0.59375, "sample": [2.63629150390625, -0.08074760437011719, -0.43804931640625, 1.015146255493164, 0.26946067810058594, 2.257770538330078, 0.30808258056640625, 0.876190185546875, -3.481689453125, -0.0205535888671875, -0.04193305969238281, -0.4368457794189453, 2.087982177734375, 1.0338325500488281, 0.887115478515625, -0.3011627197265625, -0.20928955078125, -0.2836151123046875, 0.838653564453125, 0.3819732666015625, -0.07044029235839844, 1.1563720703125, -0.2047405242919922, 1.0967025756835938, 1.2113037109375, -0.02439117431640625, -0.5333251953125, -1.2080841064453125, -0.6917152404785156, 1.5960922241210938, 1.1123580932617188, -1.0868186950683594, 1.7789306640625, 0.91448974609375, -1.0654296875, 1.294708251953125, 0.9804515838623047, 0.6836967468261719, -0.5447196960449219, 1.62921142578125, 1.6522216796875, 2.4940872192382812, 1.3352203369140625, 0.68060302734375, 2.2544021606445312, -0.039093017578125, 0.7069931030273438, 0.20066452026367188, 2.0733718872070312, -1.5437240600585938, -0.32009124755859375, 1.3573150634765625, 1.1126594543457031, 0.7427597045898438, -0.9577789306640625, 0.7691268920898438, 0.6733932495117188, -0.5508232116699219, 1.8465652465820312, -0.5265083312988281, 0.7423782348632812, -1.5659217834472656, -0.25470733642578125, 0.6078433990478516], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000205.npy"} +{"epoch": 0.30990173847316704, "step": 206, "batch_size": 64, "mean": 0.497803270816803, "std": 1.024369478225708, "min": -2.091001510620117, "p10": -0.7115951538085937, "median": 0.5786972045898438, "p90": 1.813837051391602, "max": 2.527362823486328, "pos_frac": 0.703125, "sample": [0.07886886596679688, -0.67706298828125, -0.8580856323242188, -0.6019363403320312, -1.3363418579101562, -0.3797454833984375, 0.683929443359375, 0.8250312805175781, 1.22589111328125, 0.8135871887207031, 1.4429969787597656, 2.1266326904296875, 1.8632469177246094, -0.14415740966796875, 0.7277984619140625, 2.507781982421875, -0.3164482116699219, -0.12517929077148438, 0.84210205078125, 0.014390945434570312, 1.2538604736328125, 0.4102821350097656, 0.3098297119140625, 1.69854736328125, -0.9449691772460938, 0.7797622680664062, 1.1890430450439453, -2.091001510620117, 0.6345729827880859, 1.2982559204101562, 1.3450355529785156, 1.5376625061035156, -0.3966789245605469, 1.080718994140625, 0.700225830078125, 0.25732421875, -2.0037841796875, -0.49832916259765625, -0.07262802124023438, 1.9263839721679688, 2.1202926635742188, 0.4533882141113281, 2.219146728515625, 0.01629638671875, 0.141387939453125, 0.0782012939453125, 2.527362823486328, -0.39276123046875, 0.6225509643554688, 1.20709228515625, 1.3134613037109375, 1.2024307250976562, 0.3620338439941406, -0.18299484252929688, 0.995452880859375, 0.02410125732421875, -0.07769203186035156, 0.39569091796875, 1.0811004638671875, 1.5963134765625, -1.6092586517333984, -0.7263946533203125, 0.5348434448242188, 0.8299484252929688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000206.npy"} +{"epoch": 0.31141345427059713, "step": 207, "batch_size": 64, "mean": 0.5409282445907593, "std": 1.0575623512268066, "min": -1.883392333984375, "p10": -0.6069599151611328, "median": 0.5103874206542969, "p90": 1.5901367187500002, "max": 5.44854736328125, "pos_frac": 0.703125, "sample": [5.44854736328125, -0.28052520751953125, -0.6149673461914062, 0.2865142822265625, 0.6016273498535156, 0.25925254821777344, 1.016571044921875, -0.09086990356445312, -0.47287750244140625, -0.35353851318359375, 0.5680046081542969, 0.6604080200195312, -0.6255893707275391, 1.360687255859375, -0.4799232482910156, 0.6201095581054688, 0.35150146484375, 2.30517578125, 0.3192424774169922, 1.3546218872070312, 0.9569091796875, 1.568817138671875, 0.5715293884277344, 1.348968505859375, -0.17502975463867188, -1.2593994140625, -0.6721172332763672, 1.02886962890625, 0.47904205322265625, 0.41082763671875, 1.45550537109375, 0.4332923889160156, 1.7529296875, 0.4029541015625, 1.0710906982421875, -0.4079780578613281, -0.5882759094238281, 1.0983963012695312, 1.39459228515625, 0.5941696166992188, 1.1738739013671875, 1.906585693359375, 0.113067626953125, 0.6015625, -0.06686019897460938, 0.5417327880859375, 0.40860748291015625, -0.8352737426757812, 0.4080028533935547, 1.599273681640625, -1.1238327026367188, 1.4932632446289062, -1.883392333984375, 0.6218585968017578, 0.8345489501953125, 2.0402145385742188, 0.410858154296875, 0.20988845825195312, -0.2755889892578125, 0.8525409698486328, -0.5267105102539062, -0.384185791015625, 0.854644775390625, 1.9456596374511719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000207.npy"} +{"epoch": 0.3129251700680272, "step": 208, "batch_size": 64, "mean": 0.509753406047821, "std": 0.9918364882469177, "min": -1.475494384765625, "p10": -0.8205183029174804, "median": 0.6132383346557617, "p90": 1.6944511413574221, "max": 3.0453414916992188, "pos_frac": 0.6875, "sample": [-0.7371025085449219, 0.4283447265625, 1.9461994171142578, -0.8562679290771484, 2.0238189697265625, 2.6347808837890625, 0.48038482666015625, 1.3401641845703125, 0.7364501953125, -1.1759757995605469, -0.4112701416015625, -0.44976806640625, 1.0269203186035156, 0.9702835083007812, 1.0480155944824219, 0.6436347961425781, 0.8539886474609375, 0.723785400390625, -0.13131332397460938, -0.23050689697265625, 0.824951171875, -0.6323699951171875, 0.5828418731689453, 0.5694808959960938, 0.17935752868652344, 1.3617916107177734, 1.435028076171875, -0.35385704040527344, 0.8950996398925781, 1.7147254943847656, 0.1864032745361328, 0.7642745971679688, -0.30695343017578125, 0.985748291015625, -1.2079315185546875, 0.703826904296875, 2.003173828125, -1.475494384765625, 0.5066680908203125, 0.4093189239501953, 1.8363876342773438, 0.224029541015625, 0.465972900390625, -0.5589714050292969, 1.3725833892822266, 0.9696578979492188, 1.5446319580078125, 0.3531990051269531, 1.2073211669921875, 0.3750724792480469, -1.2842636108398438, 1.1257247924804688, -0.6052932739257812, 1.6471443176269531, -1.2813606262207031, 3.0453414916992188, 0.7037353515625, -0.6332969665527344, -0.11052513122558594, 1.1762847900390625, -0.955963134765625, 1.0791778564453125, -0.47681427001953125, 1.3937911987304688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000208.npy"} +{"epoch": 0.3144368858654573, "step": 209, "batch_size": 64, "mean": 0.7123589515686035, "std": 1.1099075078964233, "min": -2.7400360107421875, "p10": -0.4490776062011719, "median": 0.6494102478027344, "p90": 2.166080093383789, "max": 3.375518798828125, "pos_frac": 0.75, "sample": [1.7165794372558594, 1.9895744323730469, 0.01706695556640625, 0.43657684326171875, -0.3021392822265625, 1.25634765625, 1.1560306549072266, 1.4758148193359375, 0.19272613525390625, 2.925201416015625, -0.44889068603515625, 0.9649734497070312, 1.036285400390625, 0.9665603637695312, 1.1138916015625, 0.341400146484375, 0.1377716064453125, -0.14813613891601562, -0.6418418884277344, 2.101043701171875, 0.22777557373046875, -1.1372909545898438, -0.24167633056640625, 0.8520126342773438, 0.7795944213867188, -0.28510284423828125, 2.8932952880859375, 0.9983062744140625, 2.211517333984375, -0.9224700927734375, 0.7053470611572266, 2.731658935546875, 0.5798511505126953, 1.4431533813476562, 0.6070098876953125, 0.1372528076171875, 0.7411117553710938, -0.0914154052734375, -2.7400360107421875, 0.5264739990234375, -0.1629791259765625, 0.3995094299316406, 0.11109161376953125, 0.01062774658203125, 1.6719818115234375, 0.9120998382568359, 3.2385940551757812, 1.5807056427001953, 0.8373870849609375, 1.0595436096191406, 0.6057815551757812, -0.8075065612792969, -0.03613471984863281, -0.10662841796875, -0.44915771484375, 0.6702842712402344, 3.375518798828125, -0.6844482421875, 2.1849822998046875, 0.6285362243652344, 1.1901378631591797, 2.1219749450683594, 0.05941009521484375, 0.8764533996582031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000209.npy"} +{"epoch": 0.31594860166288735, "step": 210, "batch_size": 64, "mean": 0.8102930784225464, "std": 1.0802526473999023, "min": -1.004058837890625, "p10": -0.4842384338378905, "median": 0.5869636535644531, "p90": 2.31923828125, "max": 3.2038421630859375, "pos_frac": 0.6875, "sample": [2.3435592651367188, 0.09481048583984375, -0.568359375, 1.6034622192382812, 0.09003448486328125, 1.4000701904296875, 2.6800575256347656, 0.7366943359375, 3.2038421630859375, 3.030120849609375, 1.1793251037597656, 1.77996826171875, 2.2536773681640625, -0.1337738037109375, 3.114990234375, -0.0115509033203125, -0.5339984893798828, -0.2158374786376953, 1.10467529296875, -0.7284164428710938, 2.76055908203125, 0.4965019226074219, -0.07268905639648438, -0.253936767578125, 1.4893035888671875, -0.5333633422851562, 0.15349769592285156, -0.3220195770263672, 1.7002182006835938, -0.09107208251953125, 1.7947998046875, 0.4442253112792969, 2.1289634704589844, 0.722076416015625, 1.7583866119384766, 0.13064956665039062, -0.09594345092773438, 0.5427436828613281, 0.5535011291503906, 1.6030826568603516, -0.8579196929931641, 2.429443359375, 0.564422607421875, 1.0785293579101562, 1.1353988647460938, -0.06289482116699219, 1.2680244445800781, -0.13074111938476562, 1.8783645629882812, 0.2154541015625, -0.15203094482421875, 0.24078750610351562, 1.8521156311035156, -0.7338390350341797, 0.8848953247070312, 2.2624893188476562, 1.1795310974121094, 0.6095046997070312, 0.88385009765625, -1.004058837890625, -0.1291637420654297, -0.3696136474609375, 0.48626708984375, 0.9971065521240234], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000210.npy"} +{"epoch": 0.31746031746031744, "step": 211, "batch_size": 64, "mean": 0.6399465799331665, "std": 0.9210435748100281, "min": -1.5232315063476562, "p10": -0.25508346557617184, "median": 0.4735221862792969, "p90": 1.9056182861328126, "max": 3.120391845703125, "pos_frac": 0.765625, "sample": [0.1281871795654297, -1.0771503448486328, 1.4244537353515625, 0.8473930358886719, 0.45140838623046875, 0.6285877227783203, -1.1389312744140625, 0.399017333984375, -0.167083740234375, 0.7149066925048828, 2.6797637939453125, 0.053985595703125, -0.22540283203125, 1.423431396484375, 0.5357818603515625, 1.2722187042236328, 0.3653602600097656, 0.018032073974609375, 1.0405960083007812, 0.4929618835449219, 0.5914382934570312, -0.037567138671875, 0.3490753173828125, -0.24005126953125, 0.329193115234375, -0.24387359619140625, 0.27259063720703125, 0.8868331909179688, 2.5148353576660156, 0.6331558227539062, 0.4203987121582031, 1.858642578125, 0.5298595428466797, -0.210601806640625, 0.99127197265625, 1.1664886474609375, 1.2488784790039062, 0.3942108154296875, -0.2794914245605469, 0.9679374694824219, 0.4540824890136719, 3.120391845703125, 1.925750732421875, 0.1988964080810547, -0.0820770263671875, 0.18149948120117188, 2.0121002197265625, -0.2598876953125, 0.37261962890625, 1.76507568359375, -0.3808441162109375, 0.9511070251464844, 2.6277313232421875, 1.7292022705078125, 2.3171234130859375, 1.1403388977050781, 0.5246963500976562, 1.4324188232421875, -0.08129501342773438, 0.411956787109375, -0.44918060302734375, -1.5232315063476562, 0.5503387451171875, 0.00702667236328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000211.npy"} +{"epoch": 0.31897203325774753, "step": 212, "batch_size": 64, "mean": 0.678282618522644, "std": 1.0403882265090942, "min": -2.037445068359375, "p10": -0.5202331542968749, "median": 0.8024826049804688, "p90": 1.8940254211425782, "max": 4.3505859375, "pos_frac": 0.765625, "sample": [1.5827560424804688, 0.9489402770996094, 0.97576904296875, 1.5206298828125, 0.07765579223632812, 1.3151741027832031, 1.5245323181152344, 0.9765834808349609, -0.1756439208984375, 0.8199844360351562, -0.4240226745605469, -2.037445068359375, 1.9215126037597656, 4.3505859375, 0.7103347778320312, 2.0301132202148438, 1.1541824340820312, 0.6353797912597656, -1.0110702514648438, 0.8571662902832031, 0.24351882934570312, -0.020847320556640625, -0.2605762481689453, 0.6580467224121094, -1.4070510864257812, 0.9659652709960938, 2.136810302734375, -1.2437629699707031, 1.9123783111572266, 1.5808258056640625, 0.7036476135253906, 1.2881126403808594, 1.2317581176757812, 0.8502044677734375, 1.7690582275390625, 0.9150466918945312, -0.3557548522949219, -0.38770294189453125, 0.46067237854003906, 0.22092247009277344, 0.09101104736328125, 0.320709228515625, 0.9716300964355469, -0.57415771484375, 1.0274887084960938, 0.7849807739257812, 1.1995582580566406, 2.62548828125, -0.5614662170410156, -0.3311767578125, 0.13644981384277344, 0.761138916015625, 1.260955810546875, 0.08445358276367188, 0.1974334716796875, 1.8896942138671875, 1.6759872436523438, 1.8958816528320312, 0.8247871398925781, 0.18566131591796875, 0.934234619140625, 0.054046630859375, -1.0021095275878906, -0.05698394775390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000212.npy"} +{"epoch": 0.3204837490551776, "step": 213, "batch_size": 64, "mean": 0.6786131858825684, "std": 1.083539605140686, "min": -1.9692802429199219, "p10": -0.5853313446044921, "median": 0.5193195343017578, "p90": 1.9691810607910158, "max": 3.733795166015625, "pos_frac": 0.765625, "sample": [1.1341590881347656, -0.004150390625, 0.460113525390625, 1.2177200317382812, 1.045684814453125, 0.4451751708984375, -0.486785888671875, -0.517578125, -1.628021240234375, 1.589691162109375, 1.738067626953125, 0.32930946350097656, 0.48313140869140625, -0.89752197265625, -1.9692802429199219, -0.7555694580078125, 0.8341903686523438, 2.06585693359375, -0.8826484680175781, 0.9860687255859375, 1.8236312866210938, -1.351675033569336, 2.8843765258789062, 1.677581787109375, 1.9894256591796875, 0.37011146545410156, -0.16803932189941406, -0.4398956298828125, 1.035074234008789, -0.16293716430664062, -0.6143684387207031, -0.0419921875, 1.7108154296875, 0.4180450439453125, 0.6338653564453125, 1.314910888671875, 0.33757972717285156, 1.4097576141357422, 1.3557319641113281, 0.17638015747070312, 0.5555076599121094, 0.38516998291015625, 2.6014156341552734, 0.1921539306640625, 1.5920028686523438, 0.31198883056640625, 0.10770797729492188, 0.21026229858398438, 1.9219436645507812, -0.26206207275390625, 0.09433746337890625, 0.8674125671386719, 1.326171875, 0.5856170654296875, 1.03009033203125, 0.4001922607421875, 0.60943603515625, 3.733795166015625, 0.3489990234375, 0.012664794921875, 0.7766189575195312, 2.978412628173828, 1.4411849975585938, 2.064228057861328], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000213.npy"} +{"epoch": 0.3219954648526077, "step": 214, "batch_size": 64, "mean": 0.679936408996582, "std": 1.0573475360870361, "min": -1.4730224609375, "p10": -0.6112939834594726, "median": 0.5749998092651367, "p90": 1.7938056945800782, "max": 3.62652587890625, "pos_frac": 0.765625, "sample": [1.4756088256835938, 0.19165802001953125, -1.216217041015625, 0.4911766052246094, 0.9429206848144531, 1.2401256561279297, -0.001201629638671875, 0.025358200073242188, 0.7869949340820312, 0.8262252807617188, 1.9117603302001953, 0.9887714385986328, 0.05460357666015625, 0.7821502685546875, 0.12377166748046875, 3.573760986328125, 0.5696182250976562, 1.4207916259765625, 0.984466552734375, 0.5132102966308594, -0.077239990234375, 0.7867240905761719, -0.04034423828125, 0.10247802734375, -0.24027252197265625, 1.2121124267578125, -0.8659439086914062, 0.3879261016845703, 1.8097610473632812, 1.6736679077148438, 1.3913650512695312, 0.2712860107421875, -0.9882564544677734, 1.287841796875, 1.4987564086914062, 1.4844436645507812, -1.4730224609375, -0.0836639404296875, 1.9197521209716797, 3.62652587890625, -0.4644775390625, -0.6275920867919922, 1.0870437622070312, 0.484344482421875, 3.033740997314453, 3.4718780517578125, 0.5917930603027344, -0.026702880859375, 0.19570541381835938, -0.5732650756835938, -0.6992034912109375, 1.7565765380859375, 0.6118679046630859, 0.016780853271484375, 0.5803813934326172, 0.1630687713623047, 0.820220947265625, 0.4178276062011719, -0.7946052551269531, 0.7283782958984375, 1.3180007934570312, 1.5570106506347656, 0.18861007690429688, 0.3090934753417969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000214.npy"} +{"epoch": 0.3235071806500378, "step": 215, "batch_size": 64, "mean": 0.6934951543807983, "std": 1.1191221475601196, "min": -1.668121337890625, "p10": -0.7350914001464843, "median": 0.4878273010253906, "p90": 2.0292240142822267, "max": 3.582233428955078, "pos_frac": 0.6875, "sample": [0.3495006561279297, 0.34979248046875, -0.7666168212890625, -0.2326812744140625, 1.6310958862304688, 0.1295623779296875, 0.8102493286132812, -0.8627471923828125, 1.997262954711914, 1.6797866821289062, 0.45214080810546875, -0.3537864685058594, -0.9910507202148438, -0.12334442138671875, 1.1659469604492188, 1.9924240112304688, -0.7351303100585938, 1.2545242309570312, -0.14641571044921875, -1.668121337890625, 1.2413330078125, 1.6299247741699219, 0.2665519714355469, 2.001861572265625, 1.6772136688232422, -0.0669403076171875, -0.61175537109375, 0.21262359619140625, -0.243988037109375, -0.4856700897216797, 0.4510002136230469, 2.5293617248535156, 0.7790603637695312, 0.733978271484375, 0.582305908203125, -1.03912353515625, 0.38617515563964844, 1.0519027709960938, 0.407623291015625, 2.306243896484375, 0.4808063507080078, 0.48383331298828125, -0.6891021728515625, -0.7350006103515625, 1.560791015625, 3.1456451416015625, 1.1828765869140625, 3.582233428955078, 2.0409507751464844, 1.257598876953125, 1.2253093719482422, 2.4415817260742188, 0.4918212890625, -0.9305458068847656, 0.8661746978759766, -0.06549072265625, 1.6488037109375, 2.9325103759765625, 0.7864837646484375, -0.28258705139160156, 1.7155380249023438, 0.3002185821533203, 1.3023757934570312, -0.10120964050292969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000215.npy"} +{"epoch": 0.3250188964474679, "step": 216, "batch_size": 64, "mean": 0.8188657760620117, "std": 1.564525842666626, "min": -1.6856460571289062, "p10": -0.8006614685058594, "median": 0.5163822174072266, "p90": 2.2569095611572267, "max": 8.789642333984375, "pos_frac": 0.75, "sample": [0.31626129150390625, 1.337554931640625, 0.029327392578125, 2.9179153442382812, 1.7115249633789062, 0.30947113037109375, 0.1480579376220703, 0.23665618896484375, 1.614227294921875, 2.585235595703125, 1.3859329223632812, 1.2923355102539062, 0.16791534423828125, -1.1207427978515625, 1.7736663818359375, -0.900238037109375, 0.6034908294677734, 0.938995361328125, -0.4210968017578125, 0.01021575927734375, 0.9090576171875, 0.9067535400390625, -0.8021011352539062, 1.6508865356445312, 1.2836227416992188, 0.8215007781982422, 1.4359283447265625, -0.07959365844726562, 0.24997711181640625, 3.6205902099609375, 1.37451171875, -0.33698272705078125, 0.4292736053466797, 1.309295654296875, -1.466888427734375, 1.009552001953125, -1.2686004638671875, 0.397003173828125, 2.0032272338867188, 0.013669967651367188, -0.058345794677734375, 8.789642333984375, 4.568164825439453, 0.3743133544921875, 0.07088851928710938, -1.6856460571289062, 1.16766357421875, 0.17288589477539062, -0.14897918701171875, 2.207244873046875, 2.2781944274902344, -0.79730224609375, -0.8922805786132812, 0.6635875701904297, 2.1114578247070312, -0.3886985778808594, 0.05550384521484375, 1.172515869140625, -0.3838348388671875, 2.31060791015625, -0.7515239715576172, 1.2431983947753906, 0.19931793212890625, 1.7314453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000216.npy"} +{"epoch": 0.32653061224489793, "step": 217, "batch_size": 64, "mean": 0.966893196105957, "std": 1.2002817392349243, "min": -1.4853363037109375, "p10": -0.16903800964355467, "median": 0.71435546875, "p90": 2.3303737640380864, "max": 5.3933258056640625, "pos_frac": 0.828125, "sample": [1.0902252197265625, 3.159149169921875, 0.18905258178710938, 0.8810501098632812, 0.651702880859375, -0.2118377685546875, 0.1965007781982422, 5.3933258056640625, -0.0628814697265625, 0.570159912109375, 2.95819091796875, 2.2549514770507812, 0.6800079345703125, 0.7487030029296875, 1.2739486694335938, 0.1119384765625, 0.810302734375, 0.23418045043945312, 0.3342742919921875, 0.3268585205078125, 0.2164764404296875, 1.5344257354736328, 0.6485595703125, 1.0542030334472656, 1.2406082153320312, 1.9737167358398438, -0.8031234741210938, 0.6115570068359375, 0.44773101806640625, 1.7107734680175781, 2.3626976013183594, 1.68212890625, 0.519317626953125, -0.17195510864257812, 1.5438079833984375, -0.8492965698242188, 0.7972335815429688, 2.577299118041992, 0.2618675231933594, 1.2488632202148438, -1.4853363037109375, 1.8175029754638672, 1.2455902099609375, -0.7589492797851562, 0.5372390747070312, 0.5734329223632812, -0.1622314453125, 0.47747802734375, -0.0595703125, 4.7446136474609375, 0.5433521270751953, 2.0732192993164062, 0.7779159545898438, 0.15692710876464844, 2.2240524291992188, 1.1037445068359375, 2.5168914794921875, 0.8806686401367188, 0.199371337890625, 2.1004714965820312, -0.07079315185546875, 1.132659912109375, 1.6949386596679688, -0.5787200927734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000217.npy"} +{"epoch": 0.328042328042328, "step": 218, "batch_size": 64, "mean": 0.5216437578201294, "std": 1.1808879375457764, "min": -3.0334548950195312, "p10": -0.7871156692504881, "median": 0.37345123291015625, "p90": 1.8313911437988282, "max": 4.113197326660156, "pos_frac": 0.703125, "sample": [0.19274139404296875, 0.6753673553466797, -1.2630157470703125, 0.8262119293212891, -0.11529541015625, 0.988250732421875, 0.01674652099609375, -0.261260986328125, -0.6277313232421875, 1.1174545288085938, 0.77325439453125, 0.5974502563476562, 1.1597709655761719, -0.43933868408203125, -0.8553733825683594, -0.31828880310058594, 0.38614845275878906, 1.5044479370117188, -1.89825439453125, -0.4376220703125, 1.6434707641601562, 0.304443359375, 0.3353729248046875, 0.2833251953125, 0.9654369354248047, 3.2720794677734375, 1.8370285034179688, 1.8824310302734375, 1.4206600189208984, 1.1359710693359375, -0.5109043121337891, 2.431976318359375, 0.27691650390625, -1.122446060180664, -0.6278476715087891, 0.3192901611328125, 0.7793426513671875, 0.36075401306152344, 0.9639778137207031, 1.8182373046875, 2.906627655029297, 0.21893310546875, -0.17514419555664062, -0.2843284606933594, 0.17658233642578125, 0.04264640808105469, 0.83642578125, 0.5570964813232422, -0.9241943359375, -0.45536041259765625, 0.7069454193115234, -1.1442527770996094, 1.6909027099609375, 0.05078125, 0.8566207885742188, 1.721282958984375, 1.23773193359375, 4.113197326660156, -3.0334548950195312, -0.08648681640625, 0.8393173217773438, 0.31745147705078125, 1.256622314453125, 2.1680755615234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000218.npy"} +{"epoch": 0.3295540438397581, "step": 219, "batch_size": 64, "mean": 0.512610912322998, "std": 0.8763750791549683, "min": -1.28857421875, "p10": -0.551102066040039, "median": 0.3947124481201172, "p90": 1.590409088134766, "max": 2.9342498779296875, "pos_frac": 0.6875, "sample": [1.6093215942382812, 1.2910614013671875, 2.1008758544921875, 0.9031715393066406, 0.5111160278320312, -0.0147705078125, 0.053249359130859375, 0.16202163696289062, 1.1777572631835938, -0.6049041748046875, -0.28958702087402344, 2.9342498779296875, -0.963836669921875, -0.10753440856933594, 0.02841949462890625, 1.5198974609375, 0.36115264892578125, 0.8908805847167969, 0.7901458740234375, 0.16926002502441406, 0.2855110168457031, -0.14101409912109375, -0.9566650390625, 0.3815116882324219, 0.1055908203125, -0.18804168701171875, -0.189544677734375, -0.31876182556152344, 0.665924072265625, 0.9583892822265625, 0.3995018005371094, 1.070220947265625, 1.6994171142578125, 1.4933605194091797, 0.7447357177734375, 1.5342483520507812, 0.21826171875, 0.6248149871826172, -0.337005615234375, 0.0545196533203125, -0.027812957763671875, -0.3298492431640625, -0.7803421020507812, 1.0255050659179688, 1.1294403076171875, -0.17905807495117188, 1.1693344116210938, -1.28857421875, 0.6588134765625, 2.1905059814453125, 0.629364013671875, 0.318267822265625, 0.8388099670410156, -0.10546875, -0.8009452819824219, -0.6544647216796875, 1.5462799072265625, 1.1684837341308594, 1.9537200927734375, -0.4255638122558594, 0.7160377502441406, 0.389923095703125, 2.433185577392578, 0.604583740234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000219.npy"} +{"epoch": 0.3310657596371882, "step": 220, "batch_size": 64, "mean": 0.6585406064987183, "std": 0.9681487083435059, "min": -1.3036079406738281, "p10": -0.46912727355957023, "median": 0.5930976867675781, "p90": 1.857683563232422, "max": 3.37969970703125, "pos_frac": 0.71875, "sample": [0.026885986328125, -0.232330322265625, -0.1252288818359375, 0.39427947998046875, 0.4199028015136719, 1.8286361694335938, 3.37969970703125, 0.5510101318359375, 1.0533905029296875, 0.6351852416992188, -0.5030670166015625, 3.308013916015625, -0.15995407104492188, -0.7930679321289062, -0.20910072326660156, 0.33936500549316406, 2.1339340209960938, 0.176116943359375, 0.344635009765625, 0.2325286865234375, 1.1759624481201172, -0.049346923828125, -0.36966514587402344, -0.5688323974609375, 0.6895751953125, -0.2213878631591797, 0.35412025451660156, -1.0614242553710938, 1.3848876953125, 1.5035400390625, 1.7081985473632812, -0.07108306884765625, -0.248626708984375, 1.611917495727539, 2.177074432373047, 0.4020423889160156, 0.7503204345703125, 1.7880401611328125, 1.9753475189208984, 0.41144561767578125, 0.3108253479003906, 1.0842018127441406, 0.8677597045898438, 0.8841972351074219, 1.9631805419921875, 0.22119903564453125, 0.6628952026367188, -0.0029582977294921875, 1.2805118560791016, 1.4491195678710938, 0.9972648620605469, -1.2068519592285156, 1.212289810180664, 0.39834022521972656, 0.666046142578125, 1.1122055053710938, -1.3036079406738281, 1.3276748657226562, 1.4519424438476562, -0.6240005493164062, 1.8701324462890625, -0.3899345397949219, 1.0405464172363281, 0.7306785583496094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000220.npy"} +{"epoch": 0.3325774754346183, "step": 221, "batch_size": 64, "mean": 0.7435950636863708, "std": 1.0813034772872925, "min": -1.4416656494140625, "p10": -0.4301776885986328, "median": 0.5719842910766602, "p90": 1.9537548065185548, "max": 4.2741241455078125, "pos_frac": 0.78125, "sample": [0.05773162841796875, 0.2823028564453125, -0.437255859375, 0.4125213623046875, 0.21304702758789062, 1.7943115234375, 0.3318920135498047, 0.2656402587890625, 1.5207748413085938, 1.13690185546875, 0.16566085815429688, 1.113983154296875, 0.7865142822265625, 0.3090972900390625, 0.00778961181640625, 0.4196643829345703, -0.6018753051757812, 0.5717754364013672, 0.7046470642089844, 0.8359222412109375, 1.0574932098388672, 1.1497688293457031, 1.9168434143066406, 0.5809707641601562, 1.631195068359375, 1.5216598510742188, 1.4239330291748047, 2.7363853454589844, -0.657196044921875, -0.09314727783203125, 1.444427490234375, 2.5281524658203125, -0.178924560546875, 0.8412055969238281, 3.340789794921875, 0.35471343994140625, 1.969573974609375, -1.4416656494140625, 0.5721931457519531, 1.057098388671875, 1.4686203002929688, -0.7652091979980469, 0.9323310852050781, -0.40180397033691406, -0.4136619567871094, -0.30899810791015625, 0.13871002197265625, 2.828277587890625, 0.406219482421875, -0.0856475830078125, -0.4871673583984375, -1.2597503662109375, 0.4426689147949219, 3.244781494140625, 0.7628021240234375, -0.3843650817871094, 0.4509124755859375, 1.0443344116210938, 4.2741241455078125, 0.32486724853515625, 0.7169113159179688, 0.3942108154296875, 1.5435638427734375, 1.0768356323242188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000221.npy"} +{"epoch": 0.3340891912320484, "step": 222, "batch_size": 64, "mean": 0.8200874328613281, "std": 0.9884326457977295, "min": -1.355438232421875, "p10": -0.42623825073242183, "median": 0.7858438491821289, "p90": 1.9533452987670898, "max": 3.95849609375, "pos_frac": 0.796875, "sample": [-0.1415729522705078, -1.355438232421875, 0.7997913360595703, 0.9312896728515625, 1.3116645812988281, 1.9361820220947266, 1.9607009887695312, 0.053592681884765625, 1.40087890625, 2.0147933959960938, 0.440521240234375, 0.8695125579833984, 0.5552902221679688, -0.15192031860351562, 1.6697959899902344, 0.44509124755859375, 1.4842071533203125, 0.6843757629394531, 0.03965950012207031, 0.7549667358398438, 0.6835174560546875, 0.12482643127441406, 1.3245697021484375, 0.9527969360351562, -0.3677215576171875, 0.8084716796875, 0.4248504638671875, 2.9439697265625, -0.4647369384765625, 1.0545539855957031, 0.689239501953125, 1.372314453125, 1.3569793701171875, 1.5565776824951172, 1.2414703369140625, 1.6223220825195312, 1.0659370422363281, -0.31388092041015625, -0.5351333618164062, -0.2386932373046875, -1.083740234375, 1.125335693359375, -0.03354644775390625, 2.2342605590820312, 1.1934700012207031, 1.29388427734375, 0.21788787841796875, 0.7718963623046875, -0.45131683349609375, 0.7364501953125, 0.7349929809570312, 0.142608642578125, 1.6229248046875, 2.679189682006836, -0.45886993408203125, 0.4792289733886719, 0.37851715087890625, 0.002044677734375, 1.620391845703125, 2.9915771484375, 1.1094818115234375, 0.9727039337158203, 3.95849609375, -0.7578887939453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000222.npy"} +{"epoch": 0.3356009070294785, "step": 223, "batch_size": 64, "mean": 0.9309903383255005, "std": 1.0017635822296143, "min": -1.2676734924316406, "p10": -0.21974868774414058, "median": 1.0859565734863281, "p90": 2.2049766540527354, "max": 3.241779327392578, "pos_frac": 0.8125, "sample": [1.3050403594970703, 0.9745941162109375, 1.5994033813476562, 1.8437728881835938, -0.23647308349609375, 1.956207275390625, -0.8022308349609375, 1.4440765380859375, 0.665802001953125, 1.1787776947021484, -0.10543632507324219, 2.4692840576171875, 1.1220989227294922, 1.6176376342773438, -1.2676734924316406, -0.07818222045898438, 0.16354942321777344, 1.1029129028320312, 1.8809814453125, 0.20246124267578125, 1.3218193054199219, 0.505218505859375, 0.2665863037109375, 0.9239768981933594, 0.103057861328125, 1.1099090576171875, 0.34268951416015625, 1.6126556396484375, -0.6341590881347656, -0.31708526611328125, 0.7602920532226562, 1.5887947082519531, -0.18072509765625, 1.1161689758300781, 1.5152740478515625, 2.821849822998047, 1.3742141723632812, 1.4788627624511719, -0.0997161865234375, -0.16847610473632812, 0.31798553466796875, 0.19696044921875, 1.0852432250976562, 0.552978515625, 2.506805419921875, 0.4028167724609375, 3.186187744140625, 1.4826297760009766, 1.5548553466796875, 1.086669921875, 3.241779327392578, -0.9192352294921875, 1.6556854248046875, 2.3115921020507812, 1.67071533203125, 1.4356918334960938, 1.3029708862304688, 0.8868446350097656, 0.6253395080566406, -0.7128257751464844, 0.15303802490234375, 2.8871307373046875, 0.16038131713867188, 0.033329010009765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000223.npy"} +{"epoch": 0.3371126228269085, "step": 224, "batch_size": 64, "mean": 0.4811859726905823, "std": 0.8317098021507263, "min": -1.0922889709472656, "p10": -0.5208671569824219, "median": 0.2779998779296875, "p90": 1.6447736740112309, "max": 2.8178329467773438, "pos_frac": 0.71875, "sample": [0.8927345275878906, 2.0894088745117188, 1.4523353576660156, 0.1725616455078125, -0.02581787109375, 1.3755607604980469, 1.0211639404296875, 0.19725799560546875, 0.1691303253173828, 1.5276641845703125, 1.2979621887207031, 0.4141807556152344, 1.2159500122070312, 1.6992912292480469, -0.3178253173828125, 0.2616138458251953, 0.18389320373535156, 0.37433815002441406, 1.8910865783691406, 0.497833251953125, -0.14072418212890625, 0.724090576171875, 0.25860595703125, -1.0922889709472656, -0.5162582397460938, -0.6730880737304688, 0.02651214599609375, -0.9924697875976562, -0.081146240234375, 1.01416015625, 0.8111305236816406, 0.18985748291015625, -0.44603538513183594, 0.17017364501953125, -0.7014846801757812, 0.06560134887695312, -0.15224266052246094, 0.026836395263671875, 0.8971023559570312, 0.2001190185546875, -0.5228424072265625, 1.0478363037109375, 0.16278076171875, -0.3726673126220703, 1.2746429443359375, -0.345458984375, 1.6949634552001953, -0.052825927734375, -0.7422866821289062, 0.5477294921875, 0.15520858764648438, 0.2943859100341797, 1.7115325927734375, 0.9479026794433594, 2.8178329467773438, 0.9009628295898438, 0.3940315246582031, 1.2330780029296875, -0.11669921875, 1.2764625549316406, -0.819122314453125, 2.208953857421875, 0.4280548095703125, 0.6926708221435547], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000224.npy"} +{"epoch": 0.3386243386243386, "step": 225, "batch_size": 64, "mean": 0.7516759634017944, "std": 1.1306260824203491, "min": -1.625030517578125, "p10": -0.48363666534423827, "median": 0.4878196716308594, "p90": 2.1123611450195314, "max": 3.846405029296875, "pos_frac": 0.734375, "sample": [0.1769428253173828, 0.9464569091796875, -0.17733383178710938, -0.4700469970703125, 0.28143310546875, 2.48095703125, -0.4019660949707031, 0.43502235412597656, 0.3863792419433594, 1.3031501770019531, 0.9178123474121094, 1.934234619140625, 0.5309486389160156, 1.8736114501953125, -0.4884757995605469, 0.5272560119628906, 0.48946380615234375, 1.8230972290039062, 0.5550594329833984, 1.443084716796875, 2.133411407470703, 1.1719303131103516, 1.849090576171875, 0.923492431640625, 3.846405029296875, -0.16352462768554688, -0.673736572265625, 0.5983772277832031, 2.613008499145508, -0.47234535217285156, -0.07177352905273438, -1.625030517578125, 0.30301666259765625, -0.7578163146972656, 2.058197021484375, 1.399505615234375, -0.008016586303710938, 3.7339019775390625, 0.152008056640625, -0.0909576416015625, 0.184967041015625, 0.486175537109375, 2.0374374389648438, 0.09948348999023438, 2.598114013671875, -0.6282958984375, -0.11053466796875, -0.05481719970703125, 2.063243865966797, 0.18470001220703125, 1.2212982177734375, 0.20013999938964844, 1.6943511962890625, 1.2894058227539062, 1.6935806274414062, 0.0039539337158203125, 0.28932952880859375, 1.48577880859375, 2.2051448822021484, 0.3267803192138672, -1.4267120361328125, 0.483551025390625, 1.322509765625, -1.0285568237304688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000225.npy"} +{"epoch": 0.3401360544217687, "step": 226, "batch_size": 64, "mean": 0.6820334792137146, "std": 1.0654981136322021, "min": -1.713134765625, "p10": -0.47329101562499987, "median": 0.5360279083251953, "p90": 2.144526290893555, "max": 3.6206436157226562, "pos_frac": 0.734375, "sample": [1.0577621459960938, -0.9569854736328125, 1.0186386108398438, 0.9678077697753906, -0.23819732666015625, 0.1310272216796875, 3.6206436157226562, 0.6608543395996094, 0.057888031005859375, -0.8156852722167969, 0.6966476440429688, 0.14101600646972656, 0.6826553344726562, 1.579061508178711, -0.3641510009765625, -0.0142364501953125, 1.5371589660644531, 0.7036590576171875, 2.009246826171875, 1.6236038208007812, 2.1269264221191406, 0.425506591796875, -0.14501953125, -0.5200653076171875, -0.01860809326171875, 0.32810401916503906, 0.3106269836425781, 2.7808151245117188, 1.545562744140625, 2.3380355834960938, 0.3187122344970703, 1.6066131591796875, 0.6423988342285156, 0.5619583129882812, 2.1228713989257812, -0.0341339111328125, 1.6556434631347656, -1.2744293212890625, 0.014820098876953125, -0.22324752807617188, 0.739166259765625, 1.880157470703125, -0.22999191284179688, -1.2519245147705078, 2.1894302368164062, 2.152069091796875, -1.713134765625, 1.3414840698242188, -1.0597915649414062, 0.33589935302734375, 0.897735595703125, 0.5100975036621094, 0.06219482421875, 0.8351955413818359, 1.615447998046875, 2.2166175842285156, 0.46099853515625, 1.1732559204101562, 0.22961044311523438, 0.13993072509765625, 2.4085464477539062, -0.04498291015625, 0.15692901611328125, -0.056304931640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000226.npy"} +{"epoch": 0.3416477702191988, "step": 227, "batch_size": 64, "mean": 0.5975878238677979, "std": 1.1150716543197632, "min": -3.2262115478515625, "p10": -0.5149772644042968, "median": 0.5856094360351562, "p90": 2.0145175933837893, "max": 2.88055419921875, "pos_frac": 0.734375, "sample": [1.9278640747070312, 0.6184101104736328, -0.1580352783203125, 1.8927078247070312, 0.7356491088867188, 0.08916473388671875, 1.75958251953125, 2.88055419921875, 0.3896331787109375, -0.35552215576171875, 1.5137100219726562, 0.0240478515625, -0.6718559265136719, 2.0534019470214844, 2.051654815673828, 0.5390548706054688, -0.164398193359375, 0.009990692138671875, 1.2824897766113281, 0.7586898803710938, -0.0736846923828125, 1.4721221923828125, 1.6260910034179688, 0.2591400146484375, 2.241058349609375, 0.9123687744140625, -0.3629875183105469, 1.103546142578125, -0.196563720703125, 0.18863868713378906, 1.2586822509765625, 2.6897201538085938, 1.4678802490234375, -0.5382843017578125, -3.2262115478515625, 1.1175880432128906, 2.8609962463378906, -0.46059417724609375, 1.0861892700195312, 1.3168792724609375, -0.1437225341796875, 0.6122894287109375, 0.36611175537109375, 0.7188873291015625, 1.4841461181640625, -0.16062164306640625, 0.2978973388671875, 0.558929443359375, -1.1088790893554688, -0.1384429931640625, 0.2905120849609375, 0.6551685333251953, 0.8155670166015625, 0.6680526733398438, 0.34662628173828125, 0.29608154296875, -0.5423660278320312, 0.14835357666015625, 0.8622531890869141, -2.0087966918945312, 0.27274322509765625, -1.7755508422851562, 2.0802059173583984, 1.730804443359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000227.npy"} +{"epoch": 0.3431594860166289, "step": 228, "batch_size": 64, "mean": 0.709984540939331, "std": 1.2416167259216309, "min": -2.082723617553711, "p10": -0.9832006454467772, "median": 0.8117074966430664, "p90": 2.3521545410156253, "max": 3.7451171875, "pos_frac": 0.734375, "sample": [0.08952140808105469, 1.1157703399658203, 1.0568084716796875, 3.619047164916992, 1.2538986206054688, 0.6009902954101562, 0.4317436218261719, 2.4785079956054688, 1.88818359375, -0.8506240844726562, 0.5940914154052734, -0.181549072265625, 0.8814697265625, -2.082723617553711, -0.0263671875, 0.30641746520996094, 0.8297500610351562, -0.6314315795898438, 1.1030006408691406, 0.9845352172851562, -0.39514923095703125, 2.4305419921875, -0.153228759765625, 1.6694717407226562, -1.163116455078125, 0.8443641662597656, 1.0797290802001953, 3.415863037109375, 0.8248710632324219, 1.3054161071777344, 0.3776283264160156, -0.28704833984375, 0.3680534362792969, -0.38797760009765625, 1.3206787109375, -1.0172405242919922, 1.1026191711425781, 0.03509330749511719, -1.333038330078125, -1.1615314483642578, 0.7809829711914062, 1.1993026733398438, 3.7451171875, -0.9037742614746094, -0.2779541015625, 0.16989707946777344, 1.2343826293945312, 2.37762451171875, -1.8253517150878906, 0.8026351928710938, 0.8207798004150391, 0.4287872314453125, 3.023223876953125, 2.1032142639160156, 0.02835845947265625, 1.6030807495117188, 2.292724609375, 1.654937744140625, 0.9127960205078125, 0.7236785888671875, 1.6074600219726562, 0.7819061279296875, 1.0359020233154297, -1.217742919921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000228.npy"} +{"epoch": 0.34467120181405897, "step": 229, "batch_size": 64, "mean": 0.5615620613098145, "std": 0.9434514045715332, "min": -2.237262725830078, "p10": -0.46720046997070314, "median": 0.6254777908325195, "p90": 1.780450439453125, "max": 2.504362106323242, "pos_frac": 0.765625, "sample": [0.9019927978515625, 1.7464523315429688, 0.9592514038085938, 0.54644775390625, 1.0839900970458984, 1.7950210571289062, 0.7820262908935547, 0.4768524169921875, 0.05059051513671875, -0.06378936767578125, 0.5713577270507812, -0.909576416015625, 1.08935546875, -1.4630813598632812, 0.301116943359375, 0.7686405181884766, 0.58087158203125, -0.4282341003417969, 0.9785690307617188, 0.9976234436035156, 0.31295204162597656, 0.04739189147949219, -0.18625450134277344, 1.0182037353515625, 0.18829345703125, 1.1867713928222656, -0.9661178588867188, 0.37853431701660156, 0.02313995361328125, -1.1761283874511719, 0.7517547607421875, 2.084136962890625, -2.237262725830078, 2.504362106323242, 0.0594482421875, -0.1258220672607422, 1.1555938720703125, -0.4723320007324219, 0.7172927856445312, 0.6479339599609375, 1.8290328979492188, 1.3507308959960938, 0.6318531036376953, -0.11396408081054688, 1.1917991638183594, -0.4552268981933594, 1.960367202758789, 0.9706783294677734, 1.3979969024658203, 0.42291259765625, 1.0218734741210938, -0.33293724060058594, 0.6191024780273438, -0.09644317626953125, 1.8345794677734375, 1.580413818359375, 0.26442718505859375, 0.29798126220703125, 1.7214813232421875, 2.438720703125, 1.1633224487304688, 1.068878173828125, 0.014446258544921875, -1.5194225311279297], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000229.npy"} +{"epoch": 0.34618291761148906, "step": 230, "batch_size": 64, "mean": 0.6093416213989258, "std": 1.1483663320541382, "min": -1.8448104858398438, "p10": -0.639063262939453, "median": 0.6112136840820312, "p90": 2.0677511215209963, "max": 4.26416015625, "pos_frac": 0.734375, "sample": [0.19925689697265625, 1.0903778076171875, 1.0258235931396484, 1.0208740234375, 0.7300033569335938, -1.2109375, -0.5216407775878906, -0.38863372802734375, 0.9026260375976562, 1.6606597900390625, 1.3889312744140625, 0.9689102172851562, 0.617767333984375, 2.3440093994140625, 0.5822677612304688, 0.6046600341796875, 0.9248933792114258, 1.2821578979492188, -0.7317962646484375, -0.20432662963867188, -1.8448104858398438, 0.4923286437988281, 0.259063720703125, 0.9306983947753906, 0.9594001770019531, -0.675384521484375, 0.6043815612792969, 1.1640548706054688, 2.4751205444335938, 0.7701644897460938, 2.0477066040039062, 0.6199760437011719, -0.48841094970703125, 0.2956047058105469, -0.38275146484375, -0.5543136596679688, 0.9474945068359375, -0.12331581115722656, 4.26416015625, 0.8075790405273438, 0.6428928375244141, 0.7242813110351562, 0.050380706787109375, -0.7660751342773438, 2.4407272338867188, 1.42242431640625, -1.6688346862792969, 3.634113311767578, 0.43822479248046875, 0.31319427490234375, 3.2931976318359375, -1.6318817138671875, 0.22264480590820312, 2.0763416290283203, 0.5652828216552734, 0.6858596801757812, 0.9943771362304688, -0.06601905822753906, 0.26824378967285156, 0.23622512817382812, 0.24859333038330078, 0.887298583984375, -0.5420131683349609, -0.3262443542480469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000230.npy"} +{"epoch": 0.3476946334089191, "step": 231, "batch_size": 64, "mean": 0.9415853023529053, "std": 1.030574917793274, "min": -1.1430892944335938, "p10": -0.19548492431640624, "median": 0.8841400146484375, "p90": 2.4184907913208007, "max": 3.1187744140625, "pos_frac": 0.828125, "sample": [0.9133148193359375, 0.03476715087890625, 0.6230583190917969, 1.2556076049804688, 0.5220718383789062, 1.6188125610351562, 0.8363380432128906, 2.151214599609375, -0.07863616943359375, 0.4905719757080078, 0.6235542297363281, 0.5682830810546875, 0.2566680908203125, 0.1381206512451172, -1.1430892944335938, 0.876800537109375, 1.1707916259765625, 2.1747817993164062, 0.955535888671875, -0.7315521240234375, 3.1187744140625, -0.6942710876464844, 0.1495361328125, 1.4161567687988281, -0.033382415771484375, 3.0238037109375, -0.7500991821289062, 0.23345565795898438, 3.0956993103027344, 1.279672622680664, 1.66204833984375, 0.7417640686035156, 2.6451416015625, 1.70269775390625, 2.357776641845703, 1.163299560546875, 0.09825897216796875, 0.8938961029052734, 1.958831787109375, -0.2215423583984375, 2.4795150756835938, 0.8914794921875, 0.8925743103027344, 1.6527252197265625, 2.5261306762695312, 0.26922607421875, 2.34796142578125, -0.20513916015625, -0.033420562744140625, -0.1729583740234375, 1.5340595245361328, 0.73443603515625, 0.35363006591796875, 0.9334030151367188, 0.9297447204589844, 2.4288711547851562, 0.9009552001953125, 0.5028934478759766, 0.4232940673828125, 0.5171966552734375, -1.0389404296875, 0.029117584228515625, 1.8718986511230469, 2.3942699432373047], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000231.npy"} +{"epoch": 0.3492063492063492, "step": 232, "batch_size": 64, "mean": 0.812435507774353, "std": 1.2512754201889038, "min": -1.621063232421875, "p10": -0.8382369995117185, "median": 0.7368907928466797, "p90": 2.3123291015625, "max": 4.093170166015625, "pos_frac": 0.75, "sample": [0.1780242919921875, 2.159423828125, 1.9062118530273438, 3.2776107788085938, 0.77459716796875, 3.198211669921875, 0.39205169677734375, 2.2615890502929688, -0.05266571044921875, -1.0173568725585938, -0.5572586059570312, 0.9533634185791016, -0.04349517822265625, 0.7266941070556641, 0.8813438415527344, 0.3109130859375, 0.31569671630859375, 1.0408439636230469, 0.22338104248046875, 0.8696098327636719, -0.4181861877441406, 3.1036834716796875, 0.987396240234375, 1.88555908203125, 2.314178466796875, 0.7340126037597656, -1.171539306640625, 2.208536148071289, -0.4596977233886719, 1.2931594848632812, 0.03180694580078125, 2.2976226806640625, -1.1483821868896484, -0.1700916290283203, 2.0396728515625, 0.76434326171875, 0.24158287048339844, -1.18603515625, 1.4055633544921875, -0.22969818115234375, 0.931396484375, 2.3460693359375, 0.31322479248046875, 4.093170166015625, 2.308013916015625, 3.2373046875, -0.9586563110351562, -1.0602874755859375, 0.12865447998046875, 0.3056507110595703, 0.7544784545898438, 1.4360370635986328, 2.156585693359375, 1.0608367919921875, 0.8408393859863281, -0.37688446044921875, 0.07034492492675781, -0.2247467041015625, 0.10538482666015625, 0.6303520202636719, 2.0345993041992188, -1.621063232421875, 0.7397689819335938, 0.42252349853515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000232.npy"} +{"epoch": 0.3507180650037793, "step": 233, "batch_size": 64, "mean": 0.5172350406646729, "std": 1.1399213075637817, "min": -1.3476181030273438, "p10": -0.760972213745117, "median": 0.3473701477050781, "p90": 1.9304010391235356, "max": 5.120025634765625, "pos_frac": 0.6875, "sample": [5.120025634765625, 0.3188514709472656, 2.9825363159179688, 0.24082565307617188, 0.6159095764160156, 1.976064682006836, 1.1311798095703125, 0.1810131072998047, 0.047893524169921875, -0.8200454711914062, 1.0248565673828125, 1.8238525390625, 0.17650985717773438, -0.2687416076660156, -1.2665176391601562, 0.9471206665039062, 0.9432582855224609, -0.04053688049316406, -0.4444580078125, 0.5982112884521484, 1.1879119873046875, 1.2026557922363281, 0.6294288635253906, -0.35419464111328125, -1.246002197265625, 0.847991943359375, -1.3170318603515625, 0.9766635894775391, -1.0256729125976562, -0.4109344482421875, 1.7202911376953125, 0.32848548889160156, 0.8069229125976562, 0.3630867004394531, 2.0797958374023438, -1.3476181030273438, 1.0039901733398438, 0.21742630004882812, -0.041301727294921875, 0.23493194580078125, 1.3328704833984375, 0.033599853515625, 3.0997238159179688, -1.020355224609375, 0.41584014892578125, 0.8321723937988281, -0.6231346130371094, 0.05096435546875, 1.0792694091796875, 0.8502750396728516, -0.6063575744628906, -0.2914161682128906, 0.35105133056640625, 0.9319000244140625, 2.270294189453125, 2.5933380126953125, 0.34368896484375, -0.024463653564453125, 0.4008331298828125, -0.251556396484375, 0.04790496826171875, -0.24908828735351562, -0.17400360107421875, 0.5650558471679688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000233.npy"} +{"epoch": 0.35222978080120937, "step": 234, "batch_size": 64, "mean": 0.48579323291778564, "std": 1.0202462673187256, "min": -1.150054931640625, "p10": -0.704037094116211, "median": 0.33383655548095703, "p90": 1.8533523559570313, "max": 2.954864501953125, "pos_frac": 0.671875, "sample": [0.13117218017578125, 0.250457763671875, -0.6116867065429688, -0.714935302734375, 0.5801162719726562, -0.4938087463378906, 0.7837104797363281, -0.5814094543457031, 1.1840438842773438, -0.9723930358886719, 1.646209716796875, 0.28505516052246094, 0.4378204345703125, 0.8040447235107422, 1.838836669921875, -0.349700927734375, -1.150054931640625, -1.0348243713378906, 1.266754150390625, 0.007976531982421875, 0.31291770935058594, 0.3547554016113281, 2.565256118774414, 2.954864501953125, -0.1642131805419922, -0.4404487609863281, 1.4992523193359375, -0.31063079833984375, -0.45510101318359375, -0.6786079406738281, 1.811279296875, 1.3102607727050781, 2.296205520629883, 1.6453418731689453, 0.2557373046875, -1.1190052032470703, 0.25058746337890625, 0.150421142578125, 0.5376262664794922, -0.6098098754882812, 1.5429229736328125, 1.21551513671875, 0.18245315551757812, 1.8948287963867188, 1.2760677337646484, 0.5540084838867188, 0.4543418884277344, 1.8595733642578125, 2.4151992797851562, -0.5378742218017578, -0.273834228515625, 0.5559501647949219, -0.9681892395019531, 2.508882522583008, -0.2320995330810547, 0.6327037811279297, 0.563262939453125, 0.7470970153808594, -1.0482711791992188, 0.17319488525390625, 0.08005523681640625, 1.2535057067871094, 1.0101165771484375, -0.24271774291992188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000234.npy"} +{"epoch": 0.35374149659863946, "step": 235, "batch_size": 64, "mean": 0.897940993309021, "std": 0.9623847007751465, "min": -1.806488037109375, "p10": -0.05487060546874998, "median": 0.8763666152954102, "p90": 2.1868797302246095, "max": 3.4359703063964844, "pos_frac": 0.859375, "sample": [0.9777565002441406, 2.348388671875, 0.451324462890625, 1.7279052734375, 2.1732025146484375, 0.2280426025390625, 0.4420623779296875, 2.1927413940429688, 0.8010711669921875, 0.9185791015625, 0.05759429931640625, 0.6097335815429688, 0.5835418701171875, 1.2314529418945312, -1.806488037109375, 1.7614803314208984, 1.3469772338867188, -0.11957931518554688, 0.8887424468994141, 0.7569923400878906, 1.2856826782226562, 0.9806251525878906, 0.2613372802734375, 0.42642974853515625, 2.275909423828125, 0.6403236389160156, -0.6365776062011719, 0.17864990234375, 3.4359703063964844, 0.38306617736816406, 2.19439697265625, 0.2984428405761719, 0.9713897705078125, 3.274322509765625, 1.4659996032714844, 0.2818164825439453, 1.5377998352050781, 0.4793663024902344, 2.4085540771484375, -0.7736358642578125, 0.022832870483398438, 1.7060966491699219, -0.0344696044921875, 0.9174079895019531, 1.7836532592773438, 1.0756950378417969, 1.544525146484375, 1.1965141296386719, 1.76318359375, 0.716705322265625, 0.9981613159179688, 0.496978759765625, 2.02978515625, 0.35271453857421875, 0.20961761474609375, -0.5317039489746094, 0.8639907836914062, 1.2456130981445312, 0.9539794921875, -0.00563812255859375, 1.941070556640625, 0.374176025390625, -0.0636138916015625, -1.0304450988769531], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000235.npy"} +{"epoch": 0.35525321239606955, "step": 236, "batch_size": 64, "mean": 0.7657992839813232, "std": 1.3400806188583374, "min": -1.4383697509765625, "p10": -0.89952392578125, "median": 0.6393232345581055, "p90": 2.3951770782470705, "max": 5.2504119873046875, "pos_frac": 0.6875, "sample": [1.3249855041503906, -0.8419342041015625, -0.39534759521484375, 1.3748283386230469, 1.3526420593261719, -0.988800048828125, 0.5968551635742188, 2.9833221435546875, 0.467376708984375, 0.8402996063232422, 0.7383041381835938, 0.48260498046875, -0.21819496154785156, 0.7578125, -0.4167194366455078, 0.32085418701171875, -0.04648017883300781, -0.90985107421875, 0.8582992553710938, 2.5771656036376953, 0.23711395263671875, -0.05767822265625, 0.31329345703125, 0.07147216796875, 1.7381057739257812, 0.8386688232421875, 0.6817913055419922, 2.429229736328125, 2.4026260375976562, 2.1915931701660156, 2.377796173095703, -0.87542724609375, -0.0709991455078125, 1.4817543029785156, 4.9526824951171875, -0.4690399169921875, -0.993865966796875, 5.2504119873046875, 0.7004127502441406, 0.5831851959228516, -1.0382881164550781, 0.17120361328125, 1.445779800415039, 1.2048721313476562, 0.5226593017578125, 0.7502651214599609, 1.247802734375, -0.999176025390625, 2.2592506408691406, -0.1472759246826172, 0.053318023681640625, -1.1032257080078125, -1.4383697509765625, 2.531108856201172, 2.1360397338867188, 2.0920333862304688, 2.2545928955078125, -0.5029659271240234, -0.4335823059082031, -0.4064750671386719, 1.1526870727539062, 0.43732452392578125, 1.3234748840332031, 0.8569488525390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000236.npy"} +{"epoch": 0.35676492819349964, "step": 237, "batch_size": 64, "mean": 0.7191513180732727, "std": 1.259556770324707, "min": -2.2449722290039062, "p10": -0.7989093780517575, "median": 0.6143455505371094, "p90": 2.308249664306641, "max": 4.3516998291015625, "pos_frac": 0.8125, "sample": [0.21793746948242188, 1.6401290893554688, 1.297271728515625, 1.6152667999267578, 0.393218994140625, -1.72760009765625, 0.7783279418945312, 2.57403564453125, 0.6970539093017578, 1.1403312683105469, 0.780853271484375, 0.079925537109375, -0.9626922607421875, -2.0788116455078125, 1.1013603210449219, 1.2348861694335938, 0.37633705139160156, 3.0466232299804688, -1.0585174560546875, 0.428924560546875, -0.9385833740234375, 1.2531681060791016, 1.4902801513671875, 1.7188987731933594, 2.74078369140625, -0.4730033874511719, -0.2979583740234375, 1.039480209350586, 0.9664306640625, 0.46291160583496094, 1.2533226013183594, 0.5408477783203125, 0.232696533203125, 0.1821136474609375, 0.6911678314208984, 0.10692596435546875, 0.6418380737304688, -2.2449722290039062, 3.507354736328125, -0.046375274658203125, 0.8298149108886719, 0.24361419677734375, 0.5129852294921875, 0.053913116455078125, 0.3546943664550781, -0.24827003479003906, 1.7296466827392578, 0.58685302734375, 3.748748779296875, 2.3409271240234375, 1.1816902160644531, 0.269866943359375, 2.2320022583007812, 4.3516998291015625, -1.66937255859375, 1.3129196166992188, -0.3439159393310547, 1.2701377868652344, 1.3105812072753906, 0.08982086181640625, 0.5619926452636719, 0.01947021484375, 0.20856475830078125, 0.67510986328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000237.npy"} +{"epoch": 0.35827664399092973, "step": 238, "batch_size": 64, "mean": 0.5112046003341675, "std": 1.3634570837020874, "min": -5.0218505859375, "p10": -1.0412635803222656, "median": 0.5874595642089844, "p90": 2.122733306884766, "max": 3.3346328735351562, "pos_frac": 0.671875, "sample": [-0.9259185791015625, -0.1192626953125, 1.1150894165039062, -1.5385665893554688, 0.9804916381835938, 2.664031982421875, 0.6227073669433594, -0.19179534912109375, 1.5102996826171875, 0.36704063415527344, -0.49829864501953125, 0.5431137084960938, 2.092041015625, 0.5541458129882812, 0.9597911834716797, 1.4818267822265625, -0.3357429504394531, -0.2630348205566406, 1.351837158203125, 1.3736763000488281, 1.2747802734375, 3.3346328735351562, 0.9959754943847656, 1.0924263000488281, -0.5823707580566406, 0.5058879852294922, 0.9200820922851562, 0.0715484619140625, -1.3518905639648438, 2.1358871459960938, 1.0520496368408203, 1.6476821899414062, -0.35711669921875, 0.10540771484375, -0.2810478210449219, 0.1309185028076172, -5.0218505859375, -0.3248004913330078, 2.0282135009765625, 2.2484054565429688, 1.0828514099121094, -0.832000732421875, 0.9463424682617188, -1.055694580078125, 1.75079345703125, 0.6252288818359375, 3.2639007568359375, 2.1950950622558594, 0.5449695587158203, -0.441162109375, -1.649322509765625, 1.2101211547851562, 1.4930171966552734, -1.6223258972167969, -1.0075912475585938, 0.008493423461914062, -1.62060546875, 1.3872146606445312, 1.8699417114257812, 0.6207733154296875, 2.179534912109375, 0.1011199951171875, -0.14374923706054688, 0.44185447692871094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000238.npy"} +{"epoch": 0.35978835978835977, "step": 239, "batch_size": 64, "mean": 0.764798641204834, "std": 1.1721928119659424, "min": -2.5919189453125, "p10": -0.6377250671386717, "median": 0.6955795288085938, "p90": 2.6117572784423833, "max": 3.1475830078125, "pos_frac": 0.765625, "sample": [1.2186737060546875, 2.6385421752929688, -0.0540924072265625, 0.2591590881347656, 1.8219528198242188, 2.512298583984375, -0.1362762451171875, -0.1078643798828125, 0.7775421142578125, 0.39522552490234375, -0.8209915161132812, 0.1977691650390625, 0.7970085144042969, 1.6824951171875, 2.7850189208984375, -0.774200439453125, -0.3997154235839844, -1.0282936096191406, 2.640716552734375, 1.397216796875, 0.5722198486328125, 0.840087890625, 1.4576740264892578, 2.6812057495117188, 1.7938117980957031, 0.4690818786621094, 1.3103885650634766, 0.4503440856933594, 0.6363410949707031, 1.0592689514160156, 1.3554000854492188, 1.1005496978759766, -0.46839141845703125, 0.3252754211425781, 0.6680126190185547, 3.018218994140625, 0.3808765411376953, 0.0258331298828125, -2.5919189453125, 0.6880340576171875, -0.4676990509033203, 0.5965042114257812, 0.9988021850585938, -0.710296630859375, -0.7466239929199219, 1.0289993286132812, 2.03765869140625, 3.1475830078125, 3.1283721923828125, -0.3387031555175781, -0.4335212707519531, 1.1681747436523438, -1.8250198364257812, 2.5492591857910156, 0.1746826171875, 1.2488937377929688, 0.9717884063720703, 0.5815849304199219, 0.09381103515625, 1.5528564453125, 0.703125, 0.8228759765625, 0.8312454223632812, 0.25826263427734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000239.npy"} +{"epoch": 0.36130007558578986, "step": 240, "batch_size": 64, "mean": 0.8184475898742676, "std": 1.1971474885940552, "min": -1.9273223876953125, "p10": -0.6193143844604491, "median": 0.8715267181396484, "p90": 2.0172134399414063, "max": 4.950408935546875, "pos_frac": 0.78125, "sample": [0.6569557189941406, -1.0689849853515625, 1.0697021484375, 1.1768646240234375, 0.14908599853515625, 2.6187286376953125, -0.03326416015625, 1.9377899169921875, -0.49994850158691406, -0.015840530395507812, 1.0125579833984375, 0.39234161376953125, 1.0054664611816406, 2.4730911254882812, 3.239063262939453, 3.2240066528320312, 1.394510269165039, -1.291818618774414, 1.7372360229492188, 1.6557998657226562, -1.2826118469238281, 0.8329620361328125, 0.4740009307861328, -0.67047119140625, 1.8090744018554688, 4.950408935546875, 1.26678466796875, 1.2429351806640625, 2.1703338623046875, 1.777008056640625, 1.5569572448730469, 1.20263671875, 0.26856231689453125, 0.9451904296875, 1.1075210571289062, 0.93560791015625, 0.7943038940429688, 0.030670166015625, 0.8600959777832031, 0.5392837524414062, 0.8829574584960938, 0.022686004638671875, -0.8734474182128906, 1.0994415283203125, 1.6905441284179688, -1.9273223876953125, 0.3335552215576172, 0.741363525390625, 0.5741329193115234, -0.1913909912109375, 1.6078147888183594, 1.2704696655273438, -0.0541839599609375, 0.44240570068359375, 0.3247032165527344, 0.16536712646484375, 1.56097412109375, 1.3378524780273438, -0.19303131103515625, -0.27217674255371094, 2.0448760986328125, 0.04278564453125, -1.8469963073730469, 1.952667236328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000240.npy"} +{"epoch": 0.36281179138321995, "step": 241, "batch_size": 64, "mean": 0.33667343854904175, "std": 1.0262504816055298, "min": -3.4178466796875, "p10": -0.8334527969360351, "median": 0.4116859436035156, "p90": 1.5498647689819336, "max": 2.8667755126953125, "pos_frac": 0.625, "sample": [-0.85650634765625, -0.06727981567382812, 1.5373210906982422, -0.53009033203125, -0.01113128662109375, -1.5279693603515625, 0.5193367004394531, 1.4079322814941406, 1.1340522766113281, 2.8667755126953125, -0.7452926635742188, 1.1017074584960938, -0.6752815246582031, 0.30429649353027344, 0.5910873413085938, 1.9060516357421875, 0.22615814208984375, 0.3388996124267578, -1.2623672485351562, -3.4178466796875, 1.1170654296875, 1.202667236328125, -0.22100830078125, 0.7184600830078125, 0.7669143676757812, -0.7796611785888672, 1.8205528259277344, 0.6651477813720703, 0.39417266845703125, -0.42132568359375, 1.7638988494873047, 1.2030181884765625, 1.416290283203125, 0.3904571533203125, -0.24406814575195312, -1.74420166015625, -0.17819976806640625, 1.6941375732421875, -0.45415496826171875, 0.5121879577636719, 1.1440811157226562, 0.3105583190917969, -0.07571601867675781, 0.4897918701171875, 0.5222454071044922, 1.5552406311035156, 0.42919921875, 1.8994979858398438, 0.6902084350585938, -0.03143310546875, -1.04425048828125, 0.5669326782226562, 1.0200653076171875, -0.1654815673828125, 1.1672325134277344, -0.4452781677246094, -1.1340179443359375, 1.0419063568115234, 0.3885345458984375, 0.4513568878173828, -0.10080146789550781, 0.5387611389160156, -0.16872406005859375, 0.03498649597167969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000241.npy"} +{"epoch": 0.36432350718065004, "step": 242, "batch_size": 64, "mean": 0.8218023180961609, "std": 1.151469111442566, "min": -1.3277816772460938, "p10": -0.43421974182128903, "median": 0.6377239227294922, "p90": 2.110696792602539, "max": 4.8518524169921875, "pos_frac": 0.78125, "sample": [-1.3277816772460938, 0.9507675170898438, -0.39521026611328125, 0.4562492370605469, 2.0331649780273438, -0.9659423828125, 2.0608291625976562, 2.132068634033203, 0.13683700561523438, 1.6230239868164062, -0.5657501220703125, 0.9485950469970703, 0.16089439392089844, 1.865631103515625, 2.048309326171875, -0.5390625, 1.9920673370361328, -0.30609130859375, 0.22116851806640625, 0.9101753234863281, 0.2963409423828125, 1.8843536376953125, 2.2831077575683594, 0.8929462432861328, 0.4142608642578125, 1.9662704467773438, 0.2962646484375, 0.14907455444335938, 1.423858642578125, -0.4440765380859375, 1.298065185546875, 0.017316818237304688, 4.042572021484375, 0.9840087890625, 0.8339328765869141, 2.8830947875976562, 0.49930381774902344, -0.22574234008789062, 4.8518524169921875, 0.5829944610595703, 0.9414520263671875, 0.3853034973144531, 0.6138648986816406, -0.4112205505371094, 0.55377197265625, -0.175628662109375, 1.5766239166259766, 1.3198394775390625, 1.1831512451171875, 1.6605167388916016, 2.654590606689453, 0.9514312744140625, 2.1490020751953125, -1.0083446502685547, 0.11064338684082031, 0.7669143676757812, -0.13256072998046875, 0.6890869140625, 0.6615829467773438, -0.05536651611328125, 0.25960540771484375, 0.038936614990234375, -0.78155517578125, 0.27396392822265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000242.npy"} +{"epoch": 0.36583522297808013, "step": 243, "batch_size": 64, "mean": 0.650065541267395, "std": 1.1035120487213135, "min": -1.920196533203125, "p10": -0.7317474365234374, "median": 0.7321014404296875, "p90": 2.0474300384521484, "max": 3.173891067504883, "pos_frac": 0.6875, "sample": [0.19747161865234375, 2.0337181091308594, 0.9327392578125, -1.920196533203125, 3.173891067504883, 0.37561798095703125, -0.2925872802734375, 0.23498153686523438, 0.8389739990234375, -0.17911529541015625, 2.9731903076171875, 0.5753936767578125, 0.69482421875, 0.769378662109375, -0.17002105712890625, 2.3663711547851562, -0.22098350524902344, 0.5617141723632812, 0.8074874877929688, -0.01761627197265625, -0.078857421875, 2.3076019287109375, 0.9316024780273438, -1.0035362243652344, -1.1120758056640625, 1.1885662078857422, 0.024566650390625, 0.4011344909667969, 0.80328369140625, -0.1864013671875, 0.969146728515625, -1.5109367370605469, 1.1559410095214844, -0.6558799743652344, 0.9810581207275391, 1.6990108489990234, 1.5857925415039062, 1.7469100952148438, -0.9161376953125, 0.23883056640625, 2.0533065795898438, 1.1395721435546875, 0.11240768432617188, 1.6766719818115234, 2.252960205078125, -0.0472869873046875, -0.02735137939453125, 1.0420761108398438, 1.815582275390625, 1.5668106079101562, 0.3594818115234375, -0.1071624755859375, 1.1256828308105469, 2.360363006591797, -0.07331466674804688, 1.2794570922851562, 1.1859550476074219, 0.9684677124023438, -0.6954421997070312, -0.7473068237304688, -1.7540435791015625, 1.9042835235595703, 1.8150177001953125, 0.0931549072265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000243.npy"} +{"epoch": 0.3673469387755102, "step": 244, "batch_size": 64, "mean": 0.8689748048782349, "std": 0.9713467955589294, "min": -1.0546875, "p10": -0.1331068038940429, "median": 0.6634635925292969, "p90": 1.9731605529785157, "max": 5.052764892578125, "pos_frac": 0.84375, "sample": [1.9584579467773438, -0.40044403076171875, 0.61529541015625, 0.551361083984375, 1.5504150390625, 0.45477867126464844, -0.1803913116455078, 1.618408203125, 0.43495941162109375, 0.2352752685546875, 0.4417247772216797, 0.6155052185058594, -1.0546875, 2.1239776611328125, 0.5173492431640625, 1.6319122314453125, 0.3179664611816406, 3.527496337890625, -0.19940185546875, 0.879150390625, 1.1015625, 0.8634109497070312, -0.4822349548339844, 0.9477157592773438, 1.3320465087890625, 1.3571624755859375, 0.4305229187011719, 1.6263046264648438, 1.0636672973632812, 0.27172279357910156, 2.4939041137695312, -0.06372833251953125, -0.1619281768798828, 0.9700393676757812, -0.054378509521484375, 0.29947662353515625, 0.1624603271484375, 0.7642898559570312, 1.440643310546875, 5.052764892578125, 0.3450355529785156, 1.1031494140625, 0.7880744934082031, 0.321563720703125, 0.003467559814453125, 2.147808074951172, 0.7114219665527344, 1.979461669921875, -0.2870292663574219, 0.20887374877929688, 0.1321258544921875, 2.2159042358398438, 1.625619888305664, 1.4212837219238281, 1.5189971923828125, 0.7814788818359375, -0.06585693359375, 1.115570068359375, 0.2558174133300781, 1.3757095336914062, 0.40972137451171875, 0.4165821075439453, 1.4913444519042969, 0.543731689453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000244.npy"} +{"epoch": 0.3688586545729403, "step": 245, "batch_size": 64, "mean": 0.8861616849899292, "std": 1.2769910097122192, "min": -3.1524658203125, "p10": -0.4096780776977539, "median": 0.8097896575927734, "p90": 2.4246217727661135, "max": 4.047882080078125, "pos_frac": 0.765625, "sample": [0.572509765625, -0.5836181640625, 1.1507453918457031, 4.047882080078125, 1.5480785369873047, 0.6905097961425781, 0.37178802490234375, 0.7322006225585938, 0.5104255676269531, 0.8752517700195312, 0.2268829345703125, 1.56231689453125, 1.4060344696044922, 1.1259498596191406, 2.871337890625, 2.8465919494628906, -0.17871856689453125, 1.5663928985595703, -0.429779052734375, 3.4835853576660156, 2.3579063415527344, 0.46378326416015625, -0.267852783203125, 1.3246002197265625, 1.2262039184570312, 1.0959396362304688, 0.40415191650390625, -0.3229179382324219, 0.4564056396484375, -0.8906421661376953, 2.3707046508789062, -0.6980781555175781, -0.2337188720703125, 0.5170211791992188, 0.41351318359375, -0.3039703369140625, -2.303558349609375, 1.2370147705078125, 1.6638031005859375, 3.0319671630859375, 1.2690277099609375, -0.271881103515625, 3.639251708984375, 0.5635528564453125, -0.3627758026123047, 0.8004417419433594, 2.0514469146728516, 2.1178817749023438, -0.3309173583984375, 1.314361572265625, 1.80035400390625, 0.8617172241210938, -3.1524658203125, 2.4477291107177734, 2.1545257568359375, 0.80755615234375, -0.714813232421875, 1.473489761352539, 0.7526168823242188, 0.8120231628417969, 0.8489055633544922, 0.7148284912109375, 0.3402519226074219, 0.8385963439941406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000245.npy"} +{"epoch": 0.37037037037037035, "step": 246, "batch_size": 64, "mean": 0.9072258472442627, "std": 1.152986764907837, "min": -1.3170852661132812, "p10": -0.3861707687377929, "median": 0.7827177047729492, "p90": 2.4705886840820335, "max": 3.8910369873046875, "pos_frac": 0.765625, "sample": [1.156982421875, 1.3606491088867188, 1.812234878540039, 2.792234420776367, 0.4380645751953125, 0.20868301391601562, -1.0463142395019531, 3.4026641845703125, 0.06249237060546875, -0.25448036193847656, 1.7694320678710938, 0.4747314453125, 0.0183563232421875, -0.2761077880859375, 1.48394775390625, 0.7910385131835938, 1.0710086822509766, -0.4327373504638672, -0.8455581665039062, -0.49853515625, 0.5084228515625, 1.6890888214111328, 1.4718704223632812, 1.9636611938476562, 3.181243896484375, 0.0615081787109375, 1.7791595458984375, 3.379180908203125, 0.153778076171875, 1.4182357788085938, 0.06652641296386719, 0.6310195922851562, 0.8765106201171875, 0.6269378662109375, 3.32891845703125, 0.6143341064453125, 0.486114501953125, 1.9203624725341797, 2.6878433227539062, -0.0751190185546875, 1.030120849609375, -1.3170852661132812, -0.6630325317382812, 1.7112579345703125, 1.5613136291503906, -0.2120819091796875, 0.2338104248046875, 1.6489486694335938, -0.24353790283203125, 0.3488311767578125, 0.762786865234375, -0.22507286071777344, 1.1140308380126953, 3.8910369873046875, 0.7743968963623047, 1.0960350036621094, 1.4210281372070312, 1.783843994140625, -0.621551513671875, 1.9029541015625, 1.0141410827636719, -0.2775154113769531, 1.0791740417480469, -0.009765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000246.npy"} +{"epoch": 0.37188208616780044, "step": 247, "batch_size": 64, "mean": 0.44914010167121887, "std": 1.2032238245010376, "min": -3.2310562133789062, "p10": -0.8203540802001953, "median": 0.5206966400146484, "p90": 2.1058738708496096, "max": 3.5986328125, "pos_frac": 0.6875, "sample": [-0.80609130859375, 0.5664634704589844, -0.538848876953125, 0.5076255798339844, 0.6121597290039062, -1.5378799438476562, 0.76812744140625, 2.350250244140625, 2.4217681884765625, 0.5337677001953125, 2.2858428955078125, 0.433349609375, 1.1057262420654297, 1.4054546356201172, 0.0457611083984375, 0.19757080078125, 0.5946769714355469, 1.3541831970214844, 0.1336688995361328, 2.126708984375, 3.5986328125, 1.1519927978515625, -1.7712936401367188, -0.8249969482421875, -0.0072784423828125, -0.0059051513671875, 1.4706974029541016, -0.68780517578125, -1.7316818237304688, -0.1879425048828125, 2.0572586059570312, 0.6220664978027344, 0.5399932861328125, 0.6715316772460938, 1.202880859375, 0.3236808776855469, 0.59161376953125, -3.2310562133789062, -1.2439498901367188, -0.8095207214355469, 0.00826263427734375, 0.16304779052734375, -0.6605072021484375, 2.16229248046875, 2.6855926513671875, -0.07493972778320312, 1.4393806457519531, 0.232696533203125, 0.24459457397460938, 1.0168304443359375, -0.668304443359375, 0.9105777740478516, -0.3677406311035156, 0.18303298950195312, -1.3520698547363281, 1.339437484741211, 0.3669452667236328, 1.6632862091064453, 1.1678009033203125, -0.5352020263671875, -0.785797119140625, 1.0599365234375, 1.6339263916015625, 0.6226825714111328], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000247.npy"} +{"epoch": 0.37339380196523053, "step": 248, "batch_size": 64, "mean": 0.6935799717903137, "std": 1.1088380813598633, "min": -1.5883331298828125, "p10": -0.6780183792114257, "median": 0.6187362670898438, "p90": 2.064068984985352, "max": 3.6614456176757812, "pos_frac": 0.734375, "sample": [0.5392589569091797, 0.2923736572265625, -1.513885498046875, -1.5883331298828125, 1.9532279968261719, 0.5475006103515625, 1.8341217041015625, 1.3354530334472656, 0.6901893615722656, -0.6871891021728516, -0.07673072814941406, 1.537506103515625, 0.7055816650390625, -0.46183204650878906, 0.9288482666015625, -0.33638763427734375, 2.250701904296875, 0.39661407470703125, -0.9470272064208984, 0.1529998779296875, -0.7748947143554688, 0.32090187072753906, 1.2664718627929688, 0.7412681579589844, 2.1294097900390625, 0.9852981567382812, 0.9512672424316406, 0.22884750366210938, 1.5329093933105469, 1.7040557861328125, 0.22872161865234375, 2.8164825439453125, 0.010568618774414062, -0.32854270935058594, 1.8939361572265625, 0.4772052764892578, -0.1349029541015625, 1.4182815551757812, 0.1672821044921875, -0.018238067626953125, 1.4062271118164062, 2.4856529235839844, 0.7655601501464844, 3.6614456176757812, 1.5335922241210938, 0.9900474548339844, 0.12948989868164062, -0.6566200256347656, 0.3801288604736328, 1.1275634765625, 3.5942001342773438, 1.0716667175292969, 0.6687774658203125, 1.2753372192382812, -1.0717086791992188, 0.9838352203369141, 0.568695068359375, 2.111572265625, 1.8236770629882812, -0.3350982666015625, -0.062183380126953125, -0.19220542907714844, -1.203094482421875, 0.163238525390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000248.npy"} +{"epoch": 0.3749055177626606, "step": 249, "batch_size": 64, "mean": 0.3260522484779358, "std": 1.0874457359313965, "min": -2.4713134765625, "p10": -1.2321710586547852, "median": 0.48143768310546875, "p90": 1.4384864807128908, "max": 3.11309814453125, "pos_frac": 0.6875, "sample": [1.9641761779785156, -1.1918163299560547, 0.4006614685058594, 0.1322021484375, 0.09830665588378906, -2.007598876953125, -1.2494659423828125, 1.301513671875, 0.442352294921875, 1.1910858154296875, 0.2700481414794922, 0.6427536010742188, 0.33400726318359375, 0.9537811279296875, -0.6568145751953125, -0.3234710693359375, -0.25301361083984375, -1.6052627563476562, -0.16828155517578125, -0.434539794921875, -0.34906005859375, -0.7475814819335938, 0.6695480346679688, 0.8583621978759766, 0.8276443481445312, 0.1901702880859375, -0.4626007080078125, -0.157684326171875, 3.11309814453125, 0.23464202880859375, -0.8530654907226562, 0.60400390625, 0.49605369567871094, 0.9177970886230469, 1.80621337890625, -2.17498779296875, 0.4890289306640625, -0.3328399658203125, 0.6932086944580078, -2.4713134765625, 0.6241550445556641, 1.320322036743164, 1.4453048706054688, 2.1090927124023438, 0.8127288818359375, 1.1627578735351562, -1.319488525390625, 1.422576904296875, 1.7890625, 0.8344268798828125, 0.008275985717773438, 0.473846435546875, 0.65692138671875, 0.3828773498535156, 0.9706268310546875, 0.855987548828125, 0.7140617370605469, 0.7344970703125, 2.2694549560546875, -1.780853271484375, 0.001567840576171875, -0.3489246368408203, 1.3021240234375, 1.23468017578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000249.npy"} +{"epoch": 0.3764172335600907, "step": 250, "batch_size": 64, "mean": 0.8719134330749512, "std": 1.2681790590286255, "min": -2.5200347900390625, "p10": -0.353573989868164, "median": 0.6002731323242188, "p90": 2.5860074996948246, "max": 4.9185791015625, "pos_frac": 0.765625, "sample": [0.9682769775390625, -0.169036865234375, -0.12425994873046875, 0.5805797576904297, 0.2191619873046875, 3.3857345581054688, 1.3805313110351562, 1.7007293701171875, 1.40057373046875, 0.7550735473632812, 0.5601615905761719, 0.5644149780273438, 3.091705322265625, 0.264678955078125, 0.96160888671875, 0.3825054168701172, 0.38543701171875, -2.5200347900390625, 0.6610469818115234, 0.20211029052734375, 0.1987285614013672, -1.2689437866210938, 1.0154800415039062, -0.31015777587890625, 0.5950984954833984, 0.448699951171875, 2.3672866821289062, 1.785909652709961, -0.403533935546875, 3.4090805053710938, -0.10353851318359375, 3.01165771484375, 2.467966079711914, 0.6054477691650391, 2.1394004821777344, -0.25852203369140625, -0.1987895965576172, 1.1806564331054688, 3.6963882446289062, 1.0982208251953125, -0.17417144775390625, -0.3721809387207031, 0.2699623107910156, -0.044887542724609375, 2.6365966796875, 0.7716827392578125, 4.9185791015625, 2.204080581665039, 0.8757133483886719, 1.4847240447998047, 0.5192108154296875, -1.0074081420898438, 0.7862625122070312, 0.22828102111816406, -0.43280029296875, 1.1112442016601562, 1.98614501953125, 1.2929229736328125, 1.5749549865722656, 0.13144683837890625, -0.6507682800292969, 0.36739349365234375, 0.8638515472412109, 0.3340911865234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000250.npy"} +{"epoch": 0.3779289493575208, "step": 251, "batch_size": 64, "mean": 0.6663837432861328, "std": 1.278817892074585, "min": -2.2931671142578125, "p10": -0.6758255004882813, "median": 0.5671186447143555, "p90": 2.358591842651368, "max": 3.9799365997314453, "pos_frac": 0.703125, "sample": [0.31522178649902344, 3.1815338134765625, 2.4398231506347656, -0.12682151794433594, 0.3523406982421875, -0.5855331420898438, -1.2289047241210938, 0.7924957275390625, 0.6415481567382812, 3.7540817260742188, -0.6542892456054688, -0.6711578369140625, 1.2759552001953125, -0.42232513427734375, 0.2264728546142578, -0.9856948852539062, 0.12584686279296875, 3.30712890625, 0.4926891326904297, -0.6972503662109375, 0.9682121276855469, 3.2394180297851562, 0.6445846557617188, -2.2931671142578125, 0.47153472900390625, -0.677825927734375, 2.1690521240234375, 1.4411773681640625, 0.09340667724609375, 0.28600311279296875, 0.7276458740234375, 1.4589462280273438, 0.3665504455566406, 0.6910762786865234, 1.1996688842773438, 0.9776268005371094, 1.5425338745117188, -0.301055908203125, -0.14986228942871094, -0.4184837341308594, 1.9783077239990234, 1.4167938232421875, -0.18604278564453125, 3.9799365997314453, 0.7113723754882812, 0.1839447021484375, 0.1407470703125, -0.5796546936035156, 1.1359100341796875, 1.158233642578125, 1.469879150390625, 1.2745819091796875, 3.118183135986328, -0.5681915283203125, 0.40938568115234375, -0.7841320037841797, 1.262115478515625, -1.8796844482421875, 0.7746391296386719, -0.287811279296875, 0.13182830810546875, 1.98040771484375, 1.0392303466796875, 0.7983760833740234], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000251.npy"} +{"epoch": 0.3794406651549509, "step": 252, "batch_size": 64, "mean": 0.6779407262802124, "std": 1.3843283653259277, "min": -4.1277923583984375, "p10": -0.5158842086791992, "median": 0.7219676971435547, "p90": 2.2344741821289062, "max": 3.84368896484375, "pos_frac": 0.734375, "sample": [1.0142269134521484, -4.1277923583984375, -0.5349369049072266, -1.8349609375, 1.1077041625976562, 0.3934783935546875, 0.6937141418457031, 1.094146728515625, 0.1315593719482422, 2.5765914916992188, 3.6460647583007812, 0.7502212524414062, 0.01685333251953125, 1.6368675231933594, 2.8363494873046875, 0.9932403564453125, 2.6057815551757812, -0.26985740661621094, 0.8315963745117188, -0.14632797241210938, -1.0066356658935547, 0.09225654602050781, 2.1289520263671875, 1.123373031616211, 1.1147918701171875, 1.0022048950195312, 0.45920562744140625, -0.7364273071289062, 1.804037094116211, -0.6275138854980469, 1.157449722290039, 1.1758880615234375, 1.4639854431152344, 1.626556396484375, 0.8915252685546875, 0.4894142150878906, 1.934967041015625, 0.35097694396972656, 0.5454883575439453, 0.06615066528320312, 0.5991287231445312, -0.09477996826171875, -0.052310943603515625, 0.2245006561279297, 1.3982696533203125, 3.405059814453125, -0.47142791748046875, 0.6161041259765625, -3.836742401123047, -0.15737533569335938, -0.36833953857421875, 0.7624359130859375, -0.008081436157226562, 0.7625885009765625, 0.492828369140625, -0.353179931640625, 1.5379714965820312, 2.2467422485351562, 0.9939002990722656, 1.4273681640625, 3.84368896484375, 2.2058486938476562, -0.4498786926269531, 0.192718505859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000252.npy"} +{"epoch": 0.38095238095238093, "step": 253, "batch_size": 64, "mean": 0.7623116374015808, "std": 1.2073894739151, "min": -1.4031753540039062, "p10": -0.6433324813842772, "median": 0.5827884674072266, "p90": 2.554972457885743, "max": 4.9417572021484375, "pos_frac": 0.703125, "sample": [0.7312278747558594, -0.7738418579101562, 2.237245559692383, -0.2635002136230469, -0.020238876342773438, 1.1721115112304688, 0.449615478515625, 1.669351577758789, 0.9496917724609375, 0.01630401611328125, 0.9006500244140625, -1.3854217529296875, 2.4290122985839844, 1.0085563659667969, 0.3495006561279297, -0.12923049926757812, -0.09035301208496094, 0.5853195190429688, -1.4031753540039062, 1.9608535766601562, 4.9417572021484375, 0.3815288543701172, -0.16181182861328125, 1.0413131713867188, 0.3428001403808594, 0.2777557373046875, 1.3382568359375, 1.1369247436523438, -0.47397613525390625, 2.6089553833007812, 2.1627655029296875, 1.0918388366699219, 1.1224861145019531, 0.6858978271484375, 0.507965087890625, 3.0455322265625, -0.5595855712890625, -1.0009117126464844, 3.0272979736328125, 0.5802574157714844, 0.458221435546875, -0.09946441650390625, -0.5043487548828125, -0.6792240142822266, 1.1496505737304688, -0.16665267944335938, -0.06686019897460938, 1.1551895141601562, 2.9593067169189453, -0.10085296630859375, 0.8541717529296875, 3.0135421752929688, 2.8303451538085938, 1.1755867004394531, 1.3336353302001953, 0.30274200439453125, 0.73236083984375, 0.17210960388183594, 1.3499374389648438, 0.27892303466796875, -0.9536170959472656, -0.6857967376708984, 0.5635929107666016, 1.2247200012207031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000253.npy"} +{"epoch": 0.382464096749811, "step": 254, "batch_size": 64, "mean": 0.7698097229003906, "std": 1.2764997482299805, "min": -2.643768310546875, "p10": -0.7352642059326172, "median": 0.6495132446289062, "p90": 2.410210418701172, "max": 4.002813339233398, "pos_frac": 0.71875, "sample": [-0.1278228759765625, 0.5399932861328125, 2.4351654052734375, 1.55078125, -0.21726226806640625, 0.2914581298828125, 1.7073249816894531, 1.32757568359375, 0.21075057983398438, 0.17420196533203125, -0.8167724609375, 1.2048873901367188, 2.591419219970703, 0.5356388092041016, 1.6825790405273438, 1.3714485168457031, 1.717315673828125, 1.2122650146484375, -0.7857208251953125, 1.2686996459960938, 1.1299972534179688, 4.002813339233398, -1.2009048461914062, 3.294004440307617, 0.759033203125, -1.3911361694335938, 3.2152099609375, 1.090768814086914, 2.715597152709961, -0.3384971618652344, 2.307769775390625, -1.155487060546875, 1.2742156982421875, -0.4251728057861328, -0.6191520690917969, 1.1026458740234375, -0.15888214111328125, 0.3168792724609375, 0.00262451171875, 0.06780242919921875, 1.27777099609375, 0.18369102478027344, 1.611593246459961, -0.7382888793945312, 0.4374217987060547, 1.3417282104492188, -0.7282066345214844, 2.3519821166992188, 1.562530517578125, 0.18428421020507812, 2.329681396484375, -0.2581329345703125, -0.36568450927734375, 0.2911529541015625, 2.3206787109375, 1.3350143432617188, 0.2620506286621094, 0.44851112365722656, 1.8844833374023438, 2.5407180786132812, 0.8650264739990234, -2.643768310546875, -0.3662261962890625, -0.7242431640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000254.npy"} +{"epoch": 0.3839758125472411, "step": 255, "batch_size": 64, "mean": 0.7369774580001831, "std": 1.1975857019424438, "min": -2.1957321166992188, "p10": -0.6302091598510742, "median": 0.65887451171875, "p90": 2.373102378845215, "max": 3.4278793334960938, "pos_frac": 0.765625, "sample": [2.3532047271728516, -0.09348678588867188, 2.0305633544921875, 0.123504638671875, 1.116485595703125, -1.196258544921875, 0.15326309204101562, 0.692779541015625, -0.0565948486328125, 0.8163070678710938, -0.027393341064453125, -0.24695205688476562, -0.3570518493652344, 1.4704437255859375, -0.5649700164794922, 0.6172294616699219, 0.38436126708984375, 2.3311080932617188, 0.265655517578125, -1.9177017211914062, 2.2886619567871094, 0.02754974365234375, 0.7578964233398438, 0.0080108642578125, 0.7161712646484375, 0.8049850463867188, -0.6862030029296875, 0.072265625, 2.5454864501953125, 0.6927566528320312, 3.4278793334960938, 1.3057327270507812, -0.7516937255859375, 0.4722862243652344, 1.844146728515625, 0.8882122039794922, 1.5385608673095703, 2.927295684814453, 2.3816299438476562, 0.8896465301513672, 0.15590667724609375, 0.927764892578125, 2.1145477294921875, -2.1957321166992188, 0.49892616271972656, -0.1859283447265625, 0.44689178466796875, 3.068603515625, 0.26631927490234375, 1.3113784790039062, 0.7209930419921875, 1.0634193420410156, 0.5088043212890625, 3.221792221069336, 0.2583503723144531, 1.0957717895507812, 0.40721893310546875, 1.1949882507324219, 3.0662994384765625, -0.6581687927246094, -0.328765869140625, 0.6249923706054688, 0.9786911010742188, -1.4422836303710938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000255.npy"} +{"epoch": 0.3854875283446712, "step": 256, "batch_size": 64, "mean": 0.7227222323417664, "std": 1.2103911638259888, "min": -2.7511444091796875, "p10": -0.41085166931152334, "median": 0.48096466064453125, "p90": 2.1991882324218754, "max": 4.10797119140625, "pos_frac": 0.796875, "sample": [0.2805938720703125, 0.1959381103515625, 0.7520484924316406, 0.8906059265136719, 1.907196044921875, 0.4810638427734375, 0.018121719360351562, 2.097076416015625, 0.9783687591552734, 0.5394248962402344, 0.11243820190429688, 0.1648578643798828, 0.298797607421875, 2.083831787109375, 2.317413330078125, 3.5822525024414062, -0.30914306640625, 0.59368896484375, -2.7511444091796875, 1.3726997375488281, 1.2102012634277344, -0.250335693359375, -0.4544410705566406, 0.2964630126953125, -1.7730712890625, 0.18737030029296875, 0.8961162567138672, -0.17734527587890625, 1.3709335327148438, 0.08587646484375, 0.3068084716796875, 1.6283721923828125, 0.08521270751953125, 1.5763053894042969, 1.1762332916259766, 0.9053573608398438, 1.2598953247070312, 2.242950439453125, -1.144134521484375, 1.3216190338134766, -1.2053070068359375, 1.0633792877197266, 3.299530029296875, -0.262969970703125, 1.9373779296875, 0.0486297607421875, 1.2299957275390625, 0.0873870849609375, 0.34187889099121094, 3.1893310546875, 0.416107177734375, 1.7401046752929688, -0.5848541259765625, -0.00969696044921875, 1.533111572265625, 0.0562896728515625, 4.10797119140625, 0.16982269287109375, 0.3436126708984375, 2.3276596069335938, -0.12053298950195312, 0.5815315246582031, -0.8735198974609375, 0.480865478515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000256.npy"} +{"epoch": 0.3869992441421013, "step": 257, "batch_size": 64, "mean": 1.0861549377441406, "std": 1.5498048067092896, "min": -2.867429733276367, "p10": -0.18551025390624992, "median": 0.8108177185058594, "p90": 3.0772346496582035, "max": 6.292510986328125, "pos_frac": 0.8125, "sample": [-0.4534473419189453, -0.0663604736328125, 1.5959930419921875, 6.292510986328125, 3.0921096801757812, 2.1456298828125, -0.21175384521484375, 1.5866317749023438, 0.6528816223144531, 1.6098403930664062, 1.035308837890625, 0.58416748046875, 0.5181407928466797, -0.22154998779296875, 2.4559478759765625, 3.3132057189941406, 3.38330078125, -0.0064849853515625, -2.5393524169921875, 0.4452362060546875, -0.04294776916503906, 1.0677566528320312, 2.70562744140625, 0.109832763671875, 0.5961456298828125, 0.32122802734375, 1.3495826721191406, 3.0425262451171875, 1.431549072265625, 0.6520538330078125, 1.5999641418457031, 2.7959346771240234, 0.0131988525390625, 0.7829055786132812, 0.8387298583984375, 0.9506874084472656, 1.20477294921875, 1.2854995727539062, 3.745838165283203, 0.5876960754394531, -2.867429733276367, 4.9259033203125, 0.48731231689453125, -0.12427520751953125, 0.347503662109375, -0.08502769470214844, 0.1711273193359375, 0.630401611328125, 0.630828857421875, 1.8494491577148438, 1.5731792449951172, 0.9378128051757812, 2.0830535888671875, 0.000316619873046875, 0.31219482421875, 4.135379791259766, 0.17975997924804688, -0.7802963256835938, 2.8156814575195312, 1.1214752197265625, 1.1164512634277344, 1.338714599609375, 0.1481170654296875, -1.6842575073242188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000257.npy"} +{"epoch": 0.3885109599395314, "step": 258, "batch_size": 64, "mean": 0.7705294489860535, "std": 1.040041446685791, "min": -1.873260498046875, "p10": -0.41568489074707027, "median": 0.607025146484375, "p90": 2.083509826660156, "max": 3.627288818359375, "pos_frac": 0.8125, "sample": [0.8734283447265625, 0.4025115966796875, -0.6926116943359375, -0.39122772216796875, 0.15529632568359375, 3.4662647247314453, 2.0855560302734375, 2.0787353515625, 1.8394947052001953, 0.06554603576660156, 0.11525726318359375, 0.60528564453125, 0.9743499755859375, 1.2621917724609375, 0.2799835205078125, 1.81317138671875, 0.5528411865234375, -0.4261665344238281, 0.2865409851074219, -1.1989860534667969, 0.4748878479003906, 0.565948486328125, -0.901275634765625, 1.2633438110351562, 1.5951690673828125, 0.24647903442382812, 0.96612548828125, 0.37358856201171875, 0.9868812561035156, 0.15734100341796875, 1.471405029296875, 0.26039695739746094, 0.2340087890625, 1.4070491790771484, 0.36989784240722656, -0.4681434631347656, 0.8206100463867188, -0.47702789306640625, -0.09906005859375, 3.083587646484375, 1.1861915588378906, -0.16998291015625, 1.4415130615234375, 0.7707977294921875, 3.627288818359375, 1.240203857421875, 2.365234375, 1.1469249725341797, -0.3509521484375, 1.4209346771240234, 0.6087646484375, 0.9300384521484375, 0.5478591918945312, 0.4496650695800781, 0.6845855712890625, 0.3433380126953125, 2.4986572265625, -1.873260498046875, 1.1107177734375, 0.2073535919189453, 1.2042312622070312, 0.8734474182128906, 2.6773605346679688, -0.10570144653320312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000258.npy"} +{"epoch": 0.3900226757369615, "step": 259, "batch_size": 64, "mean": 1.290165901184082, "std": 1.26756751537323, "min": -1.351827621459961, "p10": -0.20341053009033203, "median": 1.3156681060791016, "p90": 2.7976356506347657, "max": 4.647186279296875, "pos_frac": 0.796875, "sample": [0.5074348449707031, 2.35174560546875, -0.21026611328125, 1.4876117706298828, 2.50115966796875, 1.6728019714355469, 2.612438201904297, 3.5106201171875, 1.895050048828125, -0.48554229736328125, 2.5964431762695312, -0.6637954711914062, -0.032962799072265625, 1.685546875, 1.3932418823242188, 1.1257057189941406, -0.49684906005859375, -0.08041000366210938, 2.908660888671875, 2.610321044921875, 0.5851554870605469, 2.7587432861328125, 3.3198070526123047, 2.2170753479003906, 1.7225799560546875, 0.044467926025390625, 0.818939208984375, -0.057037353515625, 0.2814521789550781, 1.2575721740722656, 1.2182769775390625, 0.40985107421875, -1.351827621459961, 2.2858047485351562, -0.6510391235351562, 1.1919021606445312, 2.6312789916992188, 1.0902252197265625, 2.67291259765625, 1.377410888671875, 2.7976608276367188, 1.1568565368652344, -0.07666015625, 4.647186279296875, 0.3135528564453125, 1.894287109375, -0.21594810485839844, -0.18741416931152344, 0.04503631591796875, 0.12606430053710938, 2.703765869140625, 3.125019073486328, 0.9778900146484375, 0.007049560546875, 0.14978790283203125, 2.8647003173828125, 0.5467071533203125, 1.5724964141845703, -0.18716049194335938, 1.5574226379394531, 1.3737640380859375, 2.797576904296875, 1.495697021484375, 2.3727664947509766], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000259.npy"} +{"epoch": 0.3915343915343915, "step": 260, "batch_size": 64, "mean": 0.873779296875, "std": 1.2872263193130493, "min": -1.0322628021240234, "p10": -0.8197540283203125, "median": 0.9337253570556641, "p90": 2.3675411224365237, "max": 4.3275146484375, "pos_frac": 0.71875, "sample": [1.9614295959472656, -0.19500350952148438, 1.536508560180664, -0.75531005859375, -0.5274848937988281, 4.3275146484375, 0.2990264892578125, 2.1330718994140625, 1.9565963745117188, 0.1266937255859375, 2.0392074584960938, 0.4235420227050781, 1.3952407836914062, 2.2552261352539062, 0.48303985595703125, -0.8295478820800781, 2.3975830078125, 0.36701393127441406, 0.9735260009765625, 3.4300384521484375, 3.618896484375, -0.01927947998046875, 0.9452590942382812, 2.0908050537109375, -0.35808563232421875, 0.3733863830566406, 2.5464725494384766, 2.297443389892578, -0.8770599365234375, 1.4087677001953125, 1.9472732543945312, 1.45404052734375, 1.9502487182617188, 0.201202392578125, 1.0148735046386719, 0.9221916198730469, -0.708770751953125, 0.6434745788574219, -1.0322628021240234, 1.0711402893066406, 0.06105232238769531, 1.3028030395507812, 0.2927207946777344, -0.7969017028808594, 1.5325546264648438, 1.2534008026123047, 2.4382591247558594, -0.503997802734375, 2.284482955932617, -1.020782470703125, -0.07764053344726562, 0.5934925079345703, -0.0854949951171875, 0.9480743408203125, 1.0267162322998047, 0.6760711669921875, -0.7169189453125, 1.071044921875, 3.7674560546875, -1.0156402587890625, 1.0560379028320312, -1.0016937255859375, -0.9754638671875, 0.5243148803710938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000260.npy"} +{"epoch": 0.3930461073318216, "step": 261, "batch_size": 64, "mean": 0.8054369688034058, "std": 1.1788699626922607, "min": -1.655548095703125, "p10": -0.5328998565673827, "median": 0.7123508453369141, "p90": 2.4359771728515627, "max": 3.3301753997802734, "pos_frac": 0.75, "sample": [-0.13917922973632812, 1.543182373046875, 0.9747867584228516, -0.4462089538574219, -0.30001068115234375, 2.310140609741211, -0.1923980712890625, 1.6258430480957031, 1.9373092651367188, 0.24186325073242188, -1.655548095703125, 0.43682861328125, -1.0615768432617188, 2.227916717529297, 0.04839897155761719, -0.340972900390625, -0.1671905517578125, 2.0465850830078125, 0.0502166748046875, 3.1080894470214844, 0.575775146484375, 1.3853263854980469, 0.3376579284667969, -0.927703857421875, 2.470855712890625, 0.722747802734375, 3.317230224609375, 1.0525989532470703, 1.1384353637695312, 0.37830162048339844, 0.9109764099121094, 1.0691642761230469, 3.1514453887939453, 1.3844451904296875, 0.8979396820068359, -0.5700531005859375, 3.3301753997802734, 0.1495361328125, -0.3918304443359375, 0.23135757446289062, 1.6061935424804688, 0.2020587921142578, 1.1896600723266602, 0.5958633422851562, 1.1143341064453125, -0.875152587890625, 0.7908134460449219, -1.462738037109375, -0.41968536376953125, 0.35910797119140625, 0.7895660400390625, 0.5283279418945312, 2.3614654541015625, 3.228761672973633, 0.6730194091796875, -0.6272125244140625, 0.7019538879394531, 1.1763687133789062, -0.38524627685546875, 2.4679107666015625, 0.5211181640625, 0.7293891906738281, 2.1045379638671875, 1.3150901794433594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000261.npy"} +{"epoch": 0.3945578231292517, "step": 262, "batch_size": 64, "mean": 0.8231501579284668, "std": 1.239174246788025, "min": -1.2545318603515625, "p10": -0.3936408996582031, "median": 0.5426568984985352, "p90": 2.6813594818115236, "max": 4.0101776123046875, "pos_frac": 0.75, "sample": [-0.5463943481445312, 0.63775634765625, 0.03995513916015625, 0.9371509552001953, -1.2545318603515625, 0.5722198486328125, 0.0737762451171875, -0.181854248046875, 0.6472129821777344, 2.3652000427246094, -0.06640625, 1.9879379272460938, 0.7564849853515625, 0.11724853515625, 2.5982627868652344, -0.8587589263916016, 0.3436298370361328, 4.0101776123046875, 3.7447509765625, 0.6910800933837891, 0.66790771484375, 0.3962287902832031, 0.14492034912109375, -0.3609619140625, 1.1590423583984375, 3.421377182006836, -0.136474609375, 1.8868179321289062, -0.0169677734375, 0.3699321746826172, 3.0783843994140625, 0.3701591491699219, 0.3393096923828125, 0.5590076446533203, -1.1611862182617188, 1.4698257446289062, 0.7708568572998047, 2.7169723510742188, -0.29888916015625, 0.7936935424804688, -0.6404170989990234, 0.52630615234375, -0.2338581085205078, 1.3271636962890625, 3.632659912109375, 0.4203929901123047, 0.50823974609375, 0.3487567901611328, -1.0453948974609375, 0.9505329132080078, 1.067962646484375, -0.0305938720703125, 3.3212509155273438, 0.4215221405029297, 2.4463729858398438, 0.0197296142578125, 2.1202545166015625, 1.3020172119140625, 0.629669189453125, 2.048248291015625, -0.2707023620605469, 0.0212860107421875, 1.4130020141601562, -0.40764617919921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000262.npy"} +{"epoch": 0.3960695389266818, "step": 263, "batch_size": 64, "mean": 0.5800192356109619, "std": 1.2537556886672974, "min": -1.5982894897460938, "p10": -1.1561855316162108, "median": 0.6343441009521484, "p90": 2.420099639892578, "max": 4.2496490478515625, "pos_frac": 0.6875, "sample": [0.07520294189453125, 0.7950248718261719, -1.5731353759765625, -1.217987060546875, -1.229654312133789, -0.3707466125488281, -0.6866912841796875, 0.6531982421875, 1.0339717864990234, 3.4993228912353516, 0.8631591796875, -0.1289520263671875, 0.4658355712890625, 0.8294334411621094, 0.0384063720703125, -1.3270492553710938, 0.8443832397460938, 1.571533203125, 0.9382553100585938, -0.5245208740234375, 0.42960357666015625, 0.8848648071289062, 0.45592498779296875, 1.3559455871582031, -0.8123359680175781, 0.09234809875488281, 2.5134658813476562, 0.56341552734375, 0.9938316345214844, 0.9681396484375, 2.7788143157958984, 1.44696044921875, -1.0119819641113281, 0.327178955078125, -0.7530231475830078, -0.56903076171875, -1.3929519653320312, 2.4297561645507812, 0.8884315490722656, 1.1906204223632812, 1.2850341796875, -1.287384033203125, -0.1851348876953125, 4.2496490478515625, -0.2212982177734375, 0.352813720703125, 2.3975677490234375, 0.9449291229248047, 1.218048095703125, 0.6154899597167969, -0.380462646484375, 2.7366714477539062, -0.158843994140625, 0.824737548828125, 2.891326904296875, 0.5516948699951172, 1.8408222198486328, 1.3776397705078125, 0.7845420837402344, -0.9690570831298828, 0.081573486328125, -1.5982894897460938, 1.6162395477294922, 0.8239536285400391], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000263.npy"} +{"epoch": 0.3975812547241119, "step": 264, "batch_size": 64, "mean": 0.9128766059875488, "std": 1.5118409395217896, "min": -1.983612060546875, "p10": -0.7522216796875, "median": 0.6524448394775391, "p90": 3.3909446716308596, "max": 4.120168685913086, "pos_frac": 0.6875, "sample": [3.0179595947265625, -0.7161407470703125, -0.269073486328125, 0.4684276580810547, 0.41500091552734375, -0.5386295318603516, -0.15875244140625, -0.1544036865234375, 1.6427688598632812, 2.0570144653320312, -0.37140846252441406, 0.6289787292480469, -1.983612060546875, 1.6577911376953125, 0.05544281005859375, 3.6878204345703125, 3.4154739379882812, -0.35845947265625, -0.4787445068359375, 2.0308837890625, 0.6759109497070312, 0.9442977905273438, 1.2799911499023438, 2.385711669921875, 1.6985015869140625, 4.048633575439453, -0.4130382537841797, 1.165130615234375, -0.0685882568359375, 0.3220539093017578, 1.0502471923828125, 0.4611663818359375, 2.666717529296875, 0.9549179077148438, 0.3739509582519531, 0.02439117431640625, 3.4328346252441406, 0.0489044189453125, 0.2260589599609375, 1.8339309692382812, -0.6495208740234375, -1.4141464233398438, 1.721832275390625, 0.3485260009765625, 3.333709716796875, 1.542306900024414, -0.41483306884765625, 1.8303680419921875, -1.7902755737304688, 0.697723388671875, -0.7676849365234375, 1.6584854125976562, 4.120168685913086, 1.2194404602050781, -1.4446907043457031, -0.9660320281982422, 0.42615509033203125, 3.6771240234375, 2.8281097412109375, -0.18137550354003906, -0.875152587890625, 3.707672119140625, 1.003021240234375, 1.6531105041503906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000264.npy"} +{"epoch": 0.39909297052154197, "step": 265, "batch_size": 64, "mean": 0.8688779473304749, "std": 1.2427005767822266, "min": -3.3548507690429688, "p10": -0.5816888809204099, "median": 0.8872966766357422, "p90": 2.3615942001342773, "max": 3.6213226318359375, "pos_frac": 0.78125, "sample": [0.6666717529296875, 0.9047107696533203, -0.3440055847167969, -0.955078125, 0.8824119567871094, 0.97467041015625, 0.13821792602539062, 1.8463668823242188, -1.240234375, 2.1550216674804688, -0.10413742065429688, 0.6840667724609375, 1.2391433715820312, 1.4448585510253906, -3.3548507690429688, 1.4012985229492188, 0.5239219665527344, 1.6459941864013672, 2.430482864379883, -0.9699058532714844, 0.6109600067138672, 1.66796875, 0.7984046936035156, 0.5930385589599609, -0.4041748046875, 0.4236946105957031, 3.312652587890625, 1.0871925354003906, 1.7944183349609375, -0.6577663421630859, 2.1227340698242188, 1.869476318359375, 0.5018997192382812, 2.3872737884521484, 1.443328857421875, 2.988983154296875, 0.8975982666015625, 0.13168716430664062, -0.2861480712890625, -0.13820648193359375, 0.48357391357421875, 0.9116401672363281, 0.6009769439697266, 1.8849258422851562, 3.0579071044921875, 3.6213226318359375, 2.3347511291503906, 0.6791763305664062, 0.5483016967773438, -0.9967441558837891, 1.63519287109375, 0.892181396484375, -0.16767501831054688, 1.7645950317382812, 2.12591552734375, 2.373098373413086, 1.276519775390625, -1.7767372131347656, 0.43350982666015625, -0.12619400024414062, 0.9450302124023438, 0.02089691162109375, 1.6707229614257812, 0.3006591796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000265.npy"} +{"epoch": 0.40060468631897206, "step": 266, "batch_size": 64, "mean": 0.8188612461090088, "std": 1.4309351444244385, "min": -3.0721588134765625, "p10": -0.6937225341796875, "median": 0.7351455688476562, "p90": 2.323872184753418, "max": 5.275634765625, "pos_frac": 0.71875, "sample": [0.4891166687011719, 2.2337722778320312, 4.0881500244140625, 0.6614456176757812, 1.0903854370117188, -0.4041252136230469, -0.5699844360351562, -0.5854949951171875, 0.6488895416259766, -0.4720001220703125, 1.6136970520019531, 1.4140625, -0.10114479064941406, 1.0704402923583984, 0.5162277221679688, 1.15087890625, -0.23336410522460938, 5.275634765625, -0.4105224609375, 0.7805538177490234, 2.845643997192383, 1.87432861328125, 0.5136184692382812, 2.950714111328125, 1.1435928344726562, 1.5956878662109375, -1.1913871765136719, 2.2568359375, 1.2812156677246094, 0.7146987915039062, 1.8839473724365234, 1.0511856079101562, 1.5586776733398438, -0.5238113403320312, 0.5779209136962891, -0.36763763427734375, 1.2938766479492188, 1.0666961669921875, 0.3924598693847656, 0.038936614990234375, -0.6941986083984375, -0.6926116943359375, 0.305206298828125, 1.15667724609375, -1.0094833374023438, 0.7555923461914062, 0.6949005126953125, 0.6276321411132812, 1.103546142578125, 1.4242324829101562, -1.8256912231445312, 0.24876976013183594, -3.0721588134765625, -0.8980712890625, 1.112274169921875, 0.9980316162109375, 4.967399597167969, -0.042377471923828125, 3.0205230712890625, 0.44036865234375, 2.352602005004883, 1.6776485443115234, 1.3972969055175781, -0.8548088073730469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000266.npy"} +{"epoch": 0.4021164021164021, "step": 267, "batch_size": 64, "mean": 0.5725547075271606, "std": 1.2883800268173218, "min": -3.2768821716308594, "p10": -0.857038116455078, "median": 0.7910404205322266, "p90": 1.9379587173461927, "max": 4.305030822753906, "pos_frac": 0.65625, "sample": [0.00518035888671875, 1.1251277923583984, -0.16324996948242188, -1.6470470428466797, 1.335174560546875, -1.04132080078125, 1.0376243591308594, 0.94866943359375, 0.9757766723632812, 0.3522682189941406, -0.017263412475585938, 1.5059280395507812, 0.5646514892578125, 4.305030822753906, -0.73065185546875, -1.7334709167480469, -0.41147613525390625, -0.15178585052490234, 0.398590087890625, -2.0897903442382812, -0.7797012329101562, -1.2054176330566406, 0.335601806640625, 0.8626785278320312, 2.9273681640625, 2.082815170288086, -0.42684173583984375, -0.10829925537109375, 2.155548095703125, -0.4742584228515625, 0.7316055297851562, 1.0512218475341797, 2.1011505126953125, 1.3258857727050781, -0.32424163818359375, 0.8504753112792969, 1.5159454345703125, 1.1582183837890625, 1.3117523193359375, 3.724853515625, 1.3787612915039062, -0.2267475128173828, 2.4966278076171875, 0.08278274536132812, 1.5999603271484375, 1.0882549285888672, 1.4672775268554688, 0.6724395751953125, -3.2768821716308594, 1.3085441589355469, 1.4134521484375, 1.000396728515625, -0.2989177703857422, 0.10152053833007812, -0.8901824951171875, -0.2704277038574219, 1.1860637664794922, 0.56976318359375, 1.1946525573730469, -0.4138031005859375, 1.4474296569824219, 0.9000358581542969, 1.2833175659179688, -0.555145263671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000267.npy"} +{"epoch": 0.4036281179138322, "step": 268, "batch_size": 64, "mean": 0.8244317770004272, "std": 1.24802565574646, "min": -0.772705078125, "p10": -0.5535276412963867, "median": 0.5699682235717773, "p90": 2.4032455444335943, "max": 5.237945556640625, "pos_frac": 0.703125, "sample": [0.60137939453125, 3.296417236328125, 0.08782768249511719, -0.772705078125, 0.0017852783203125, 2.2257232666015625, 1.0550003051757812, 0.611663818359375, -0.1522808074951172, 3.3050575256347656, 0.9352798461914062, -0.2014312744140625, -0.5120487213134766, 0.90380859375, 0.0791015625, 0.6213321685791016, 0.6706600189208984, -0.316558837890625, 0.009046554565429688, -0.21875381469726562, 1.7953758239746094, 1.4514846801757812, -0.04878425598144531, 0.518341064453125, -0.7365608215332031, -0.407196044921875, 1.8238296508789062, -0.6721572875976562, 0.7030868530273438, 0.924591064453125, 1.7308197021484375, 0.3563346862792969, 0.11895370483398438, 1.5465087890625, -0.11244964599609375, 2.7850799560546875, 0.249786376953125, -0.7127799987792969, -0.662261962890625, 0.7865161895751953, 0.4988555908203125, 0.2802734375, -0.3664093017578125, 5.237945556640625, 1.4600982666015625, 1.518035888671875, 2.1058502197265625, -0.25791358947753906, 0.47217559814453125, -0.5713043212890625, -0.30051422119140625, -0.6727218627929688, 2.2903671264648438, -0.18010330200195312, 3.5386505126953125, 1.6228523254394531, 2.4516220092773438, 1.997406005859375, 2.9281368255615234, 0.5989131927490234, 0.5410232543945312, 2.1386947631835938, 1.5530929565429688, 0.20978164672851562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000268.npy"} +{"epoch": 0.4051398337112623, "step": 269, "batch_size": 64, "mean": 1.0223793983459473, "std": 1.3003785610198975, "min": -1.4603424072265625, "p10": -0.7812740325927734, "median": 1.0780839920043945, "p90": 2.761871528625489, "max": 3.7717742919921875, "pos_frac": 0.78125, "sample": [0.1586761474609375, -1.1579818725585938, 0.242523193359375, 1.8719291687011719, 1.279083251953125, -0.7902450561523438, -0.1801910400390625, 2.850889205932617, 1.9686355590820312, 0.7004776000976562, -1.4432296752929688, 2.5274276733398438, 0.5792427062988281, 1.4342689514160156, 2.3251991271972656, 2.5629043579101562, 0.471923828125, 1.4250564575195312, 2.104736328125, -1.0104598999023438, 0.9071273803710938, -0.24945068359375, 2.557291030883789, 0.0351409912109375, -0.8824462890625, 3.4713687896728516, 0.43665122985839844, 3.7717742919921875, 0.018138885498046875, 1.7895278930664062, 0.8487377166748047, 0.8122596740722656, -0.015954971313476562, 1.0792312622070312, 2.0680084228515625, 0.32219505310058594, -0.17302703857421875, 2.1277618408203125, -0.5667648315429688, 1.3587417602539062, 2.621236801147461, 1.8842754364013672, 1.4419784545898438, 1.5523090362548828, 2.925495147705078, -0.8417015075683594, 1.0769367218017578, 0.5025291442871094, -0.7603416442871094, 0.0872802734375, 3.454193115234375, 0.046352386474609375, 2.8452301025390625, 0.03094482421875, 1.1636810302734375, -1.4603424072265625, 2.8221435546875, 2.1936492919921875, -0.6004104614257812, 1.7336196899414062, 1.6896514892578125, 0.3895416259765625, 1.4540424346923828, 1.5428047180175781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000269.npy"} +{"epoch": 0.40665154950869237, "step": 270, "batch_size": 64, "mean": 0.6104167103767395, "std": 1.1975998878479004, "min": -3.16082763671875, "p10": -0.8543174743652342, "median": 0.6565046310424805, "p90": 2.0667373657226564, "max": 3.4451141357421875, "pos_frac": 0.734375, "sample": [2.4020252227783203, 0.8268089294433594, -0.27906036376953125, 2.5818405151367188, 0.14225387573242188, 2.9651565551757812, 0.5373649597167969, -0.7362747192382812, 2.71539306640625, 0.195465087890625, 3.4451141357421875, 0.8349227905273438, 0.1531982421875, -0.047698974609375, 0.20529937744140625, -1.2516632080078125, 0.605194091796875, 0.5888786315917969, 0.7078151702880859, 1.7385406494140625, -0.171112060546875, 1.71197509765625, 0.1965789794921875, -0.163543701171875, 1.0123062133789062, 1.33270263671875, 0.9044170379638672, -3.16082763671875, 1.8222122192382812, 0.17599105834960938, 1.407440185546875, 0.070709228515625, 0.20569610595703125, 2.2119674682617188, 0.1767425537109375, 1.503631591796875, 0.7084121704101562, -0.148040771484375, 0.8878822326660156, -1.6789703369140625, 0.711334228515625, 2.0185012817382812, 2.0874099731445312, -1.4068069458007812, 0.9808864593505859, -0.23291778564453125, -0.003265380859375, 1.0383453369140625, 1.4549636840820312, 1.0292739868164062, 1.0760059356689453, -0.058254241943359375, -0.9049072265625, 1.3074722290039062, 0.29311370849609375, -1.8292999267578125, 0.20563507080078125, -1.5756111145019531, 1.4412994384765625, 0.8804149627685547, 0.4701957702636719, -0.21175765991210938, 1.0970382690429688, 1.8608551025390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000270.npy"} +{"epoch": 0.40816326530612246, "step": 271, "batch_size": 64, "mean": 0.5049762725830078, "std": 1.517310619354248, "min": -2.122100830078125, "p10": -1.422311973571777, "median": 0.36506080627441406, "p90": 2.1057418823242187, "max": 5.76666259765625, "pos_frac": 0.578125, "sample": [1.7084426879882812, 0.9960403442382812, -1.8039093017578125, 1.8650283813476562, 2.5033950805664062, 0.8566818237304688, 0.697723388671875, -0.37642860412597656, 1.5785255432128906, -1.12701416015625, 2.0994720458984375, -1.548868179321289, 1.8292694091796875, -0.12584304809570312, 1.68994140625, 2.8245773315429688, 0.2839202880859375, -0.514068603515625, 0.3312263488769531, 3.788970947265625, 0.7591018676757812, 0.44403839111328125, 1.6160449981689453, -0.6811408996582031, -0.523956298828125, -0.6319198608398438, -0.7738113403320312, 1.482177734375, -0.5084381103515625, 0.5530910491943359, -0.013105392456054688, 1.7637081146240234, -0.28647613525390625, -0.4574146270751953, 0.1366424560546875, 2.350372314453125, 1.18865966796875, -0.5203018188476562, 0.8804473876953125, 1.9854049682617188, 0.765899658203125, -1.8653373718261719, 5.76666259765625, -0.71044921875, 1.2217178344726562, -0.8281097412109375, -0.5212326049804688, -1.6671199798583984, -2.064638137817383, -1.8932723999023438, 0.11324691772460938, -0.640350341796875, 1.0857467651367188, 1.7099952697753906, 1.7549514770507812, 2.108428955078125, -2.122100830078125, 0.398895263671875, -0.697174072265625, 0.2665519714355469, -0.27398109436035156, -0.8686370849609375, 3.2824268341064453, 1.6761550903320312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000271.npy"} +{"epoch": 0.40967498110355255, "step": 272, "batch_size": 64, "mean": 1.0848881006240845, "std": 1.2788180112838745, "min": -1.079010009765625, "p10": -0.1900985717773437, "median": 0.7440853118896484, "p90": 2.9060127258300783, "max": 4.6676025390625, "pos_frac": 0.828125, "sample": [0.4608020782470703, -0.20165252685546875, -0.6805953979492188, 1.2539253234863281, 0.3138885498046875, 2.9753494262695312, 0.1270599365234375, 2.858123779296875, 2.3066482543945312, 0.8320541381835938, 1.6143417358398438, 1.7454872131347656, -0.037876129150390625, -0.33818817138671875, 1.5197868347167969, 0.5417366027832031, 1.9080810546875, 0.15253448486328125, 2.3771705627441406, 3.6112098693847656, 1.1128425598144531, 0.7957763671875, 1.3895645141601562, 1.3563175201416016, 3.374898910522461, 0.21317291259765625, 0.6199798583984375, -0.07773590087890625, 1.5216445922851562, 2.9265365600585938, 0.11962127685546875, 4.641441345214844, 1.4166488647460938, -0.39421653747558594, 1.7015552520751953, 0.027681350708007812, -0.16313934326171875, 0.30536842346191406, 1.6311149597167969, 0.7658576965332031, 2.6448745727539062, 4.058937072753906, 2.031036376953125, 0.5825271606445312, 0.06041717529296875, -0.4300060272216797, 4.6676025390625, -0.4065704345703125, 1.1543693542480469, 1.5737228393554688, 0.15495681762695312, 2.2649574279785156, -1.079010009765625, 0.5023193359375, 0.5850372314453125, 0.195770263671875, 1.2920684814453125, 1.4118194580078125, 0.65118408203125, 0.1571025848388672, 0.06276702880859375, 0.07749557495117188, 0.7223129272460938, -0.1276702880859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000272.npy"} +{"epoch": 0.41118669690098264, "step": 273, "batch_size": 64, "mean": 0.9696696996688843, "std": 1.3231147527694702, "min": -2.003528594970703, "p10": -0.4246303558349609, "median": 0.847381591796875, "p90": 2.243489837646485, "max": 5.914642333984375, "pos_frac": 0.8125, "sample": [-0.71209716796875, 1.0938796997070312, 1.743682861328125, -1.2282791137695312, 2.28558349609375, 0.925201416015625, 0.944732666015625, 0.5436744689941406, -0.4325370788574219, 0.40474700927734375, -0.40618133544921875, 0.4787139892578125, 1.0966110229492188, 0.39525604248046875, 1.4938888549804688, 1.6190605163574219, 1.2425880432128906, 0.5696029663085938, 1.9359779357910156, 0.5415802001953125, 1.93060302734375, 0.6419448852539062, -0.4377861022949219, 0.22533798217773438, -1.00531005859375, 1.5653457641601562, 2.3185348510742188, 0.9298553466796875, -0.10562324523925781, 0.4925804138183594, 0.745574951171875, 5.914642333984375, 0.32318878173828125, -2.003528594970703, 0.4750823974609375, 0.790374755859375, 1.7949943542480469, 1.0633010864257812, 0.2427520751953125, 0.8190383911132812, -0.6173095703125, 0.5579986572265625, 1.3214950561523438, 0.4713592529296875, 0.1109466552734375, 4.6936492919921875, 4.224714279174805, 1.8122615814208984, 1.7807388305664062, 0.9027366638183594, 0.47479248046875, -0.127593994140625, -0.205291748046875, 0.9748916625976562, 0.8757247924804688, 2.1452713012695312, 1.0662918090820312, 1.935638427734375, 0.09533119201660156, 0.8987274169921875, 2.7551498413085938, 3.824533462524414, -0.3640785217285156, 1.1942901611328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000273.npy"} +{"epoch": 0.4126984126984127, "step": 274, "batch_size": 64, "mean": 0.8681962490081787, "std": 1.2716144323349, "min": -2.80926513671875, "p10": -0.59119873046875, "median": 0.8601922988891602, "p90": 2.327290344238281, "max": 4.123443603515625, "pos_frac": 0.734375, "sample": [1.7770309448242188, -0.81134033203125, 0.23523521423339844, 0.9877471923828125, -0.1075286865234375, -2.80926513671875, 1.6542892456054688, 2.1746292114257812, 0.3276405334472656, 0.43703460693359375, 1.9119720458984375, 0.447235107421875, 0.37284088134765625, -0.589019775390625, 0.6888465881347656, 2.8692092895507812, 3.0284671783447266, -1.5358943939208984, 0.8277759552001953, 1.0889739990234375, 2.336395263671875, 1.90093994140625, 3.061931610107422, 1.1678485870361328, 0.7235107421875, -0.08591842651367188, 1.635162353515625, 4.123443603515625, 0.9179477691650391, -0.592132568359375, 0.17592239379882812, -0.327728271484375, 1.344970703125, 0.099517822265625, -0.4061412811279297, 2.3235092163085938, -0.7509078979492188, -1.048614501953125, 1.1844863891601562, 0.8247146606445312, -0.019062042236328125, 2.0738525390625, 0.511474609375, 0.892608642578125, -0.15679550170898438, 1.0511627197265625, 0.6025180816650391, -1.7182693481445312, 1.4751510620117188, 0.3934516906738281, 2.3289108276367188, 1.9062576293945312, 1.300140380859375, 0.5044708251953125, 1.9120712280273438, -0.0441131591796875, 1.2877540588378906, -0.008535385131835938, 3.573211669921875, 2.0459976196289062, -0.3946952819824219, 1.2535324096679688, 1.5791549682617188, 1.6295719146728516], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000274.npy"} +{"epoch": 0.41421012849584277, "step": 275, "batch_size": 64, "mean": 0.9453713893890381, "std": 1.3883007764816284, "min": -1.8126068115234375, "p10": -0.6518165588378906, "median": 0.6960830688476562, "p90": 2.906297302246095, "max": 4.278564453125, "pos_frac": 0.71875, "sample": [1.355743408203125, 0.6926116943359375, 0.12427520751953125, 2.4711265563964844, 2.490957260131836, 0.5352878570556641, 0.36241912841796875, -1.7411251068115234, 3.7589797973632812, 1.798736572265625, -0.1150970458984375, 1.1959228515625, 0.28955841064453125, 4.2271881103515625, -0.15549850463867188, 2.3439197540283203, 0.5593185424804688, 1.076375961303711, 4.278564453125, -0.7356109619140625, -1.8126068115234375, 3.760578155517578, 0.7139129638671875, 0.823211669921875, 1.2983627319335938, -0.091064453125, -0.19060134887695312, -0.20629119873046875, -0.9792518615722656, 0.5759201049804688, 1.145721435546875, 3.6152572631835938, -0.5981063842773438, 3.0681533813476562, 2.5286331176757812, 1.17138671875, -0.03854179382324219, 0.7760581970214844, -0.471771240234375, 0.274322509765625, 1.7017898559570312, 0.4066734313964844, -0.4233589172363281, -0.2288036346435547, 0.5808048248291016, 1.6436538696289062, 2.0347366333007812, 0.699554443359375, -0.853057861328125, 0.6353416442871094, 1.0010566711425781, 0.6594581604003906, -0.24649429321289062, 1.2785873413085938, 2.036670684814453, 3.5562667846679688, -0.6931419372558594, 1.0559768676757812, 0.6156387329101562, 1.9461803436279297, 2.4083328247070312, -0.674835205078125, 0.037517547607421875, 1.1482810974121094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000275.npy"} +{"epoch": 0.41572184429327286, "step": 276, "batch_size": 64, "mean": 1.0291404724121094, "std": 1.2617276906967163, "min": -1.9732208251953125, "p10": -0.5046981811523436, "median": 1.0148296356201172, "p90": 2.5608680725097654, "max": 4.47950553894043, "pos_frac": 0.796875, "sample": [1.4996376037597656, 0.00275421142578125, -0.71856689453125, 2.0913848876953125, 1.1767387390136719, 0.7539749145507812, 3.5613880157470703, -0.6036300659179688, 0.9471836090087891, 0.7125396728515625, 1.1770744323730469, -0.09293937683105469, 2.9759597778320312, 1.2125473022460938, 3.7837905883789062, 1.3829536437988281, 0.3264808654785156, -1.0127906799316406, 0.7431793212890625, 0.7170028686523438, -0.1588134765625, 1.9744186401367188, 1.9177093505859375, 1.0062065124511719, -0.15012359619140625, 0.12054252624511719, 1.9358444213867188, -0.1303863525390625, 2.541095733642578, 1.8930587768554688, 1.2224769592285156, 1.192047119140625, 0.6858749389648438, -0.6699886322021484, 2.0511550903320312, -1.259124755859375, 4.47950553894043, 0.17278289794921875, 0.769866943359375, 1.6999473571777344, -1.5457839965820312, 0.5242233276367188, 2.5590362548828125, 0.4515857696533203, 0.1026458740234375, -1.9732208251953125, 2.134197235107422, 1.5543670654296875, 2.6212921142578125, 0.9979934692382812, -0.03798675537109375, 2.0399322509765625, 2.5616531372070312, 0.15634536743164062, 2.2444839477539062, 1.815948486328125, 1.2597026824951172, 1.0871505737304688, 0.1605072021484375, 2.7106246948242188, 0.5216007232666016, 1.2383346557617188, 1.0234527587890625, -0.27385711669921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000276.npy"} +{"epoch": 0.41723356009070295, "step": 277, "batch_size": 64, "mean": 0.7118738293647766, "std": 1.4732054471969604, "min": -2.52813720703125, "p10": -0.7615371704101561, "median": 0.5048789978027344, "p90": 2.3636367797851574, "max": 6.164268493652344, "pos_frac": 0.6875, "sample": [0.9595489501953125, -0.8080596923828125, 1.7455368041992188, 0.30699920654296875, 1.05877685546875, 0.47934722900390625, 0.7629051208496094, -0.4905548095703125, 1.6792449951171875, 1.5308570861816406, -0.8453636169433594, -0.924102783203125, 0.4090728759765625, 0.00598907470703125, 0.01438140869140625, 1.0818367004394531, 0.40396881103515625, -1.231597900390625, 0.5304107666015625, -0.17086410522460938, 0.9677467346191406, 1.3490524291992188, 0.199310302734375, -0.044769287109375, 0.7786178588867188, 3.02142333984375, 0.3224067687988281, -0.3283538818359375, -0.3134918212890625, 1.3052291870117188, -0.09910011291503906, 0.9916915893554688, -2.52813720703125, 2.0884742736816406, 0.6300773620605469, 1.0163040161132812, -0.652984619140625, -0.48743438720703125, 0.5538558959960938, 0.2785968780517578, 0.9521484375, -0.1597747802734375, 6.164268493652344, 3.334564208984375, 1.10205078125, 0.6952915191650391, 1.2889633178710938, -0.05156707763671875, -1.8968887329101562, 3.4646072387695312, 0.3531951904296875, -0.1784343719482422, 3.623992919921875, 1.3920440673828125, -0.5250473022460938, 0.2380695343017578, 2.4815635681152344, -1.8705482482910156, 1.934356689453125, 4.826656341552734, 1.0805435180664062, 0.12728118896484375, -0.1369190216064453, 1.7726593017578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000277.npy"} +{"epoch": 0.41874527588813304, "step": 278, "batch_size": 64, "mean": 0.872410237789154, "std": 1.2882297039031982, "min": -2.3723068237304688, "p10": -0.5452056884765625, "median": 0.7374105453491211, "p90": 2.5400177001953126, "max": 3.7712326049804688, "pos_frac": 0.75, "sample": [-0.17976760864257812, 0.39727783203125, 1.4208755493164062, 1.3112716674804688, 0.5145034790039062, -0.6166458129882812, -1.8879241943359375, -0.01572418212890625, 2.3443984985351562, 2.9441909790039062, 1.193603515625, -0.1385650634765625, 2.5535125732421875, 3.7712326049804688, 3.0427284240722656, 0.0040130615234375, 0.3135185241699219, 0.4288215637207031, -0.3626289367675781, 2.1495323181152344, 0.1878662109375, 1.511871337890625, 0.9778213500976562, 1.1807479858398438, -2.3723068237304688, 3.0949249267578125, 0.5277862548828125, 2.239194869995117, 2.2579345703125, 0.5712051391601562, 1.1891021728515625, 0.2010498046875, -0.50384521484375, -0.14615631103515625, 2.8967132568359375, 0.9359893798828125, 0.8221435546875, -0.5476226806640625, 0.6526775360107422, 1.7943801879882812, 3.3873062133789062, 2.5085296630859375, -0.7568016052246094, -0.03078460693359375, 0.4892444610595703, 1.8248519897460938, 1.2874984741210938, 0.2747764587402344, -1.4632186889648438, 0.5275039672851562, 1.360870361328125, 1.7115478515625, -1.5269012451171875, -0.5395660400390625, 0.34455108642578125, 2.48175048828125, 1.9854736328125, 0.9352874755859375, 0.5156364440917969, 1.0450668334960938, 0.17927932739257812, -0.1643505096435547, 1.5340652465820312, 1.2629356384277344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000278.npy"} +{"epoch": 0.42025699168556313, "step": 279, "batch_size": 64, "mean": 1.1091272830963135, "std": 1.1529980897903442, "min": -1.8626289367675781, "p10": -0.12578277587890616, "median": 1.1712188720703125, "p90": 2.5635044097900397, "max": 4.129142761230469, "pos_frac": 0.875, "sample": [0.18264007568359375, 1.8518791198730469, 3.8357410430908203, 1.0876083374023438, -1.8626289367675781, 1.8516693115234375, 0.16839218139648438, 1.3481369018554688, 0.04559326171875, 0.7034378051757812, 2.0903549194335938, 1.8464088439941406, 0.29296875, 1.7576904296875, 1.824310302734375, 4.129142761230469, 1.2491912841796875, 1.9934043884277344, -0.161895751953125, -0.9069862365722656, -0.49372291564941406, 1.4107666015625, 0.8112335205078125, 0.161590576171875, 1.2010498046875, 1.0335540771484375, 0.6660785675048828, 0.2711601257324219, 0.10973358154296875, 2.6281051635742188, 0.6770515441894531, 1.3675270080566406, 1.141387939453125, 0.07367706298828125, 0.06266021728515625, 1.5180511474609375, 2.255390167236328, 1.0909423828125, 0.22193145751953125, 3.338947296142578, 2.79473876953125, 2.412769317626953, 3.14532470703125, 1.3422470092773438, -0.7610015869140625, 1.1280326843261719, 0.7457656860351562, 1.3001899719238281, 2.244842529296875, 1.8155746459960938, 1.240530014038086, -0.380645751953125, 0.8399162292480469, 1.4192543029785156, 0.24614906311035156, -0.0415191650390625, 1.2244873046875, 0.9035491943359375, 0.4845466613769531, 1.2713871002197266, 1.2999744415283203, -1.1222152709960938, 3.141529083251953, 1.4145469665527344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000279.npy"} +{"epoch": 0.4217687074829932, "step": 280, "batch_size": 64, "mean": 1.1231298446655273, "std": 1.1215238571166992, "min": -1.0098609924316406, "p10": -0.12383804321289062, "median": 0.9866065979003906, "p90": 2.5166885375976564, "max": 5.382465362548828, "pos_frac": 0.875, "sample": [1.9137535095214844, 5.382465362548828, 1.500143051147461, 1.2756195068359375, 1.2194137573242188, 2.960725784301758, 0.16681861877441406, -0.77789306640625, 0.5447978973388672, 1.421905517578125, 1.0996780395507812, 1.574554443359375, 0.585205078125, 1.6495819091796875, 1.6088180541992188, 1.9164962768554688, 0.6367721557617188, 0.39522552490234375, 2.0493927001953125, 0.32454681396484375, 1.0421371459960938, 0.9013385772705078, -0.4346466064453125, 0.8083152770996094, 2.1853485107421875, -0.142059326171875, 1.242919921875, 1.2587432861328125, 2.50653076171875, 2.658203125, 0.2881908416748047, -0.58233642578125, 1.187347412109375, -0.4044647216796875, 0.3373680114746094, 1.966827392578125, 3.5576400756835938, 0.3761749267578125, 2.4945755004882812, 0.9379806518554688, 0.215240478515625, -1.0098609924316406, -0.12084197998046875, 0.3274803161621094, 0.11621475219726562, 2.1651954650878906, 0.46521949768066406, -0.1251220703125, 0.32405853271484375, 0.9376430511474609, 2.7155284881591797, 0.3943061828613281, 0.7494621276855469, 2.198505401611328, 1.5452957153320312, 1.0352325439453125, 1.22216796875, 0.9146728515625, 0.29524993896484375, 1.4926223754882812, 0.5657119750976562, 0.33113861083984375, 2.9699935913085938, 2.5210418701171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000280.npy"} +{"epoch": 0.42328042328042326, "step": 281, "batch_size": 64, "mean": 1.0629009008407593, "std": 1.3931818008422852, "min": -1.6785755157470703, "p10": -0.930542755126953, "median": 1.045888900756836, "p90": 3.04851303100586, "max": 4.179899215698242, "pos_frac": 0.78125, "sample": [-1.2208232879638672, 1.8944206237792969, 0.7168540954589844, -0.1448822021484375, 4.179899215698242, 2.684234619140625, 0.5748500823974609, 2.0226898193359375, 1.1420440673828125, 3.64385986328125, 0.29412078857421875, 1.3526763916015625, 1.2349472045898438, 0.9862442016601562, 1.2695541381835938, 0.8543052673339844, 0.8078517913818359, 3.5503158569335938, -0.34563636779785156, 1.2661209106445312, 1.9342002868652344, 1.0568504333496094, 2.4449310302734375, 3.0828933715820312, 1.0349273681640625, 0.5834693908691406, 0.9235115051269531, 0.2712211608886719, 0.678009033203125, -1.6785755157470703, 1.9181556701660156, -0.1310100555419922, -1.2338714599609375, 1.7274818420410156, 2.04962158203125, -1.4160175323486328, 3.560272216796875, 3.417327880859375, 0.018978118896484375, 0.29829978942871094, 2.968292236328125, 0.7635498046875, 3.5485973358154297, 1.465780258178711, 0.8507823944091797, 2.285797119140625, 0.7159309387207031, 0.4920845031738281, 1.2931938171386719, 1.2114849090576172, -0.7369842529296875, 0.04912567138671875, 1.9344444274902344, -1.3042964935302734, 1.5550498962402344, 2.9486122131347656, 1.2934074401855469, -1.1754150390625, 1.7388572692871094, 1.2651214599609375, -0.6399421691894531, -0.6962451934814453, -0.09239959716796875, -1.0134963989257812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000281.npy"} +{"epoch": 0.42479213907785335, "step": 282, "batch_size": 64, "mean": 0.8964922428131104, "std": 1.335062026977539, "min": -1.4930267333984375, "p10": -0.8737140655517578, "median": 0.7467498779296875, "p90": 2.8608366012573243, "max": 4.935146331787109, "pos_frac": 0.78125, "sample": [1.5687789916992188, -1.4930267333984375, -1.079092025756836, 0.8619537353515625, -0.1287841796875, 1.213266372680664, 1.0381813049316406, 0.6122093200683594, 0.16902923583984375, 2.9468536376953125, 2.851104736328125, 2.8650074005126953, 0.7736072540283203, -0.98468017578125, 2.455108642578125, 0.8214111328125, 2.879526138305664, 0.3105735778808594, 0.2641143798828125, 1.5076618194580078, -0.6248550415039062, -0.680877685546875, 1.0562152862548828, 1.7257843017578125, 0.46944427490234375, 0.538177490234375, 0.958984375, 1.69610595703125, 4.935146331787109, 1.3059120178222656, -0.3736457824707031, -0.8151359558105469, -1.3147964477539062, 1.8398284912109375, 0.12647056579589844, -0.3849372863769531, 2.0613021850585938, 0.37244224548339844, -1.1225509643554688, 0.636199951171875, 0.7194671630859375, 2.094951629638672, 0.6160049438476562, 0.9394912719726562, 3.3512821197509766, 3.4030914306640625, 1.0856056213378906, 0.4288444519042969, 0.3303718566894531, 1.4050445556640625, -1.33154296875, 0.27632904052734375, 0.404266357421875, 0.21756935119628906, -0.8988189697265625, 3.475923538208008, 2.3399276733398438, 0.732177734375, -0.7373199462890625, 0.761322021484375, 1.529092788696289, 1.49298095703125, 2.2283058166503906, 0.6531181335449219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000282.npy"} +{"epoch": 0.42630385487528344, "step": 283, "batch_size": 64, "mean": 0.9660578370094299, "std": 1.5339956283569336, "min": -3.1136322021484375, "p10": -0.7084650039672852, "median": 0.7535648345947266, "p90": 3.0725168228149413, "max": 4.8507232666015625, "pos_frac": 0.71875, "sample": [1.5780487060546875, -1.4962787628173828, -1.33392333984375, 0.1881256103515625, 0.784088134765625, 0.7406654357910156, 0.466522216796875, 0.6267471313476562, 3.847484588623047, -0.6781291961669922, -0.136688232421875, -0.3654937744140625, 0.8805084228515625, 0.3932609558105469, 1.0568389892578125, 0.9047775268554688, 0.3927001953125, 2.0257720947265625, -3.1136322021484375, -0.08526611328125, 2.782196044921875, 1.8800430297851562, 3.3894729614257812, 0.3869781494140625, 3.0003204345703125, 3.0453319549560547, 4.8507232666015625, 1.802276611328125, 3.1734848022460938, 0.33826446533203125, 1.7574005126953125, 2.5074119567871094, -0.22478485107421875, 0.7664642333984375, -0.17664337158203125, 3.41778564453125, 0.7906112670898438, 0.201690673828125, 1.6252288818359375, 2.5609092712402344, 2.9616260528564453, -1.1376094818115234, 1.9086761474609375, 3.300394058227539, -0.1488494873046875, -0.17008590698242188, -0.11591339111328125, -0.721466064453125, 0.36971092224121094, 0.453460693359375, 0.9301662445068359, 0.9115486145019531, 2.4766845703125, 0.5249843597412109, 3.08416748046875, 1.51300048828125, 0.22870635986328125, -0.12511825561523438, -0.13475799560546875, 0.19084930419921875, -2.17510986328125, 2.3992137908935547, -0.9840621948242188, 1.7361888885498047], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000283.npy"} +{"epoch": 0.42781557067271353, "step": 284, "batch_size": 64, "mean": 0.9647125005722046, "std": 1.4474917650222778, "min": -2.3667449951171875, "p10": -0.5965522766113281, "median": 0.8179092407226562, "p90": 2.906195068359375, "max": 5.124353408813477, "pos_frac": 0.765625, "sample": [1.544219970703125, 0.0113525390625, 1.33209228515625, 2.0986328125, 2.879791259765625, 0.8549041748046875, -0.08305931091308594, -2.3667449951171875, 2.139312744140625, 0.3099365234375, 0.7481594085693359, 2.265758514404297, 0.28434181213378906, 1.6910400390625, -0.08109474182128906, -0.5935134887695312, 0.6513347625732422, 1.0471782684326172, 0.6174774169921875, 4.83319091796875, 0.39693450927734375, 1.672943115234375, 1.4875030517578125, -0.5813865661621094, -0.26114654541015625, 1.8097152709960938, 0.5769119262695312, 1.7472686767578125, 1.1672286987304688, 0.5543594360351562, 0.7339668273925781, 0.8632392883300781, 1.2596435546875, 0.182159423828125, 0.8767032623291016, -0.08016586303710938, 1.5634269714355469, 5.124353408813477, 1.08740234375, 1.02508544921875, 0.49756622314453125, 3.7265167236328125, 1.9624786376953125, 1.5594902038574219, 0.4941978454589844, -0.1326923370361328, 0.780914306640625, 3.0440006256103516, 0.9610824584960938, 2.917510986328125, 0.10891914367675781, -0.7555885314941406, 2.0589370727539062, -1.4050140380859375, 0.37277984619140625, -0.4505767822265625, 3.969024658203125, -0.5978546142578125, 1.3037948608398438, -1.1169891357421875, 3.4834442138671875, -1.6983795166015625, -1.2434844970703125, 0.5110645294189453], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000284.npy"} +{"epoch": 0.4293272864701436, "step": 285, "batch_size": 64, "mean": 0.7905269861221313, "std": 1.0934540033340454, "min": -1.3662738800048828, "p10": -0.5451103210449217, "median": 0.7214012145996094, "p90": 2.105841064453125, "max": 3.6724395751953125, "pos_frac": 0.796875, "sample": [1.188873291015625, 0.5149192810058594, 0.343475341796875, 2.0226497650146484, 0.1005706787109375, 0.02996826171875, -0.04271697998046875, 1.203948974609375, 0.075531005859375, -1.3662738800048828, 1.5130748748779297, 3.4197769165039062, 2.1048583984375, 0.7155609130859375, 2.325653076171875, -0.8365077972412109, 1.5861282348632812, 3.6724395751953125, 0.019557952880859375, 0.7272415161132812, -0.703704833984375, 0.027170181274414062, -1.014963150024414, -0.41114234924316406, 0.8505592346191406, 0.7542800903320312, 0.18218994140625, 1.2039642333984375, 2.10626220703125, -0.5767440795898438, 0.07342529296875, 1.2020721435546875, -0.6043853759765625, 2.5930709838867188, 1.8722076416015625, 0.8690452575683594, 0.40988922119140625, 1.1115264892578125, 1.4587783813476562, 0.7008495330810547, 1.525970458984375, 1.1960983276367188, -1.2728118896484375, 2.7392196655273438, 1.2799148559570312, 0.37871551513671875, 0.23905181884765625, 2.57342529296875, 0.3437957763671875, 0.9513397216796875, -0.4059104919433594, 1.3923110961914062, 0.575927734375, 1.1428451538085938, 0.2086029052734375, 0.12058258056640625, 1.8853607177734375, 1.7803955078125, -0.34607696533203125, -0.3733024597167969, 0.1681060791015625, 1.819854736328125, -0.4712982177734375, 1.7185287475585938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000285.npy"} +{"epoch": 0.4308390022675737, "step": 286, "batch_size": 64, "mean": 1.2858755588531494, "std": 1.5366671085357666, "min": -3.9573974609375, "p10": -0.1074682235717773, "median": 1.17266845703125, "p90": 2.930333518981934, "max": 6.8729705810546875, "pos_frac": 0.859375, "sample": [0.8211822509765625, 0.7224769592285156, 0.33983421325683594, 1.519866943359375, 1.5768890380859375, 2.0740127563476562, 0.4557952880859375, 2.962726593017578, 1.6260948181152344, 0.02301025390625, 2.7938156127929688, 0.857513427734375, 0.39708709716796875, 1.6670722961425781, 1.3265647888183594, 3.7736663818359375, 1.6429862976074219, 0.5560703277587891, -1.7840423583984375, 0.36087799072265625, 1.324951171875, -0.13812637329101562, -0.1264190673828125, 3.1403236389160156, -0.22278976440429688, -1.98101806640625, 2.4742298126220703, 1.4255828857421875, 0.7114410400390625, 1.332061767578125, 0.9753570556640625, 2.6213111877441406, 2.6819725036621094, 2.6125869750976562, 1.5848846435546875, 2.2696380615234375, 0.9138069152832031, 2.8547496795654297, 0.15475082397460938, 6.8729705810546875, 3.0348129272460938, 2.662353515625, 0.3144073486328125, 0.7516632080078125, 1.1516189575195312, 0.7270278930664062, 0.016210556030273438, 1.5141448974609375, 3.5053176879882812, 2.2899627685546875, 2.41943359375, 0.7325439453125, 0.5128402709960938, -0.06324958801269531, -0.03527069091796875, 1.1399993896484375, 4.173957824707031, 1.0656852722167969, -3.9573974609375, 2.0305023193359375, 1.1937179565429688, -0.3772087097167969, 1.4233932495117188, 0.8738021850585938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000286.npy"} +{"epoch": 0.4323507180650038, "step": 287, "batch_size": 64, "mean": 1.067583441734314, "std": 1.3035379648208618, "min": -2.0394210815429688, "p10": -0.3221548080444335, "median": 0.8763952255249023, "p90": 3.1038341522216797, "max": 4.467460632324219, "pos_frac": 0.828125, "sample": [1.7067604064941406, 0.7090606689453125, 0.430938720703125, 0.9333534240722656, 3.565704345703125, 3.1902008056640625, 0.4901123046875, 0.2700042724609375, 0.4348602294921875, -0.35626220703125, 0.982757568359375, 1.306142807006836, 0.8371753692626953, 0.7751312255859375, 3.6252593994140625, 3.4684829711914062, 3.063018798828125, 0.5349235534667969, 1.386159896850586, 0.9156150817871094, 1.04034423828125, 0.02880859375, 3.121326446533203, 1.02703857421875, 1.3590278625488281, 1.8816947937011719, 0.2910137176513672, -1.4542770385742188, 0.8096275329589844, 0.5652732849121094, 1.4791030883789062, 0.5987319946289062, -2.0394210815429688, 0.31105804443359375, -0.10734176635742188, 2.0165252685546875, 1.0118865966796875, 0.14209365844726562, 0.31267547607421875, 1.6403388977050781, 2.5528335571289062, 0.0679779052734375, 1.1586227416992188, 1.9308795928955078, -1.1630783081054688, -0.4220123291015625, 4.467460632324219, 1.1920623779296875, 1.997201919555664, 1.9877700805664062, -0.2425708770751953, -0.0087432861328125, 0.7597026824951172, 0.7656936645507812, -0.6484642028808594, 1.2943572998046875, 3.5676841735839844, 2.15802001953125, -0.14000320434570312, 0.21443748474121094, -0.6762466430664062, 2.3015594482421875, 2.8278770446777344, 0.077392578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000287.npy"} +{"epoch": 0.43386243386243384, "step": 288, "batch_size": 64, "mean": 0.8767852187156677, "std": 1.4809300899505615, "min": -2.3342208862304688, "p10": -1.0033607482910152, "median": 0.8185710906982422, "p90": 2.7847846984863294, "max": 5.188812255859375, "pos_frac": 0.734375, "sample": [1.2355098724365234, 0.397857666015625, -0.2689971923828125, -0.3508148193359375, 2.4166183471679688, 0.5557842254638672, 1.6031417846679688, -0.09234619140625, 0.8488082885742188, 0.9394989013671875, 0.9465065002441406, 0.6404037475585938, -0.1691436767578125, 2.056650161743164, -0.1317901611328125, 3.240631103515625, -1.5077667236328125, 0.6011543273925781, 0.9899444580078125, 1.1919879913330078, 0.1509246826171875, 3.253192901611328, 5.188812255859375, 4.424842834472656, -0.61822509765625, 3.2110595703125, 0.42474365234375, 1.8322181701660156, -2.3342208862304688, 0.31388092041015625, 3.6605682373046875, 1.2661590576171875, 0.4368896484375, 0.7883338928222656, 1.5616912841796875, 1.1245689392089844, 1.704010009765625, 0.0189361572265625, 1.859466552734375, -1.9844207763671875, -1.1684188842773438, -1.589569091796875, 2.4301223754882812, 0.40401458740234375, 0.6427364349365234, -1.7213611602783203, 1.4186477661132812, -1.3387451171875, 1.0403728485107422, 2.37896728515625, 1.3590469360351562, 0.5172653198242188, -0.27813720703125, 2.24798583984375, -0.24916839599609375, -0.242950439453125, 1.2523117065429688, 2.2809677124023438, -0.30974769592285156, 1.3611984252929688, 0.4507255554199219, 0.8539314270019531, 2.9367828369140625, 0.010204315185546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000288.npy"} +{"epoch": 0.43537414965986393, "step": 289, "batch_size": 64, "mean": 0.9776356220245361, "std": 1.6044565439224243, "min": -1.777099609375, "p10": -0.8899024963378906, "median": 0.9087791442871094, "p90": 2.7792638778686527, "max": 5.5103759765625, "pos_frac": 0.75, "sample": [0.6610851287841797, 1.1748046875, 5.5103759765625, -0.9674091339111328, -0.6788253784179688, -1.6340179443359375, 0.03548622131347656, 1.0060310363769531, 2.7336807250976562, 1.4524383544921875, 0.9033432006835938, 0.914215087890625, 0.5586872100830078, 1.6941032409667969, 1.8502044677734375, -0.11245155334472656, 1.178375244140625, -1.777099609375, -0.2618865966796875, 2.9483489990234375, 1.01202392578125, 4.559144973754883, -0.6861991882324219, 1.0750656127929688, 1.5661087036132812, 0.01018524169921875, 1.9441871643066406, -0.7465896606445312, 0.10141754150390625, 4.012977600097656, 0.45986175537109375, 0.9206695556640625, -0.6490859985351562, -1.1817245483398438, 5.2970123291015625, -0.8892135620117188, 0.07359504699707031, 4.9880218505859375, 1.0917129516601562, 1.5712814331054688, 2.798799514770508, 2.4961776733398438, -0.3228302001953125, 0.42738914489746094, 0.6447315216064453, -1.303079605102539, 1.0483989715576172, 0.4016590118408203, 0.6532821655273438, -0.89019775390625, 1.8174152374267578, 2.593719482421875, 0.7648811340332031, 1.1861114501953125, 1.8992538452148438, 2.4250411987304688, 1.8883132934570312, 1.261322021484375, 0.705108642578125, 0.8787937164306641, 0.4680652618408203, -1.4467430114746094, 1.1188411712646484, -0.6657142639160156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000289.npy"} +{"epoch": 0.436885865457294, "step": 290, "batch_size": 64, "mean": 1.0387189388275146, "std": 1.5021342039108276, "min": -2.6789169311523438, "p10": -0.5795808792114258, "median": 0.6213340759277344, "p90": 3.4487972259521484, "max": 4.709197998046875, "pos_frac": 0.765625, "sample": [2.3202743530273438, 3.435779571533203, 1.2968730926513672, 3.731109619140625, 0.0757598876953125, 1.1741695404052734, -0.25482940673828125, 0.20539093017578125, 1.2384490966796875, 2.4569320678710938, 0.45429420471191406, -0.5462169647216797, 0.27564239501953125, 0.06539154052734375, 1.9117431640625, 0.5397872924804688, -0.35065460205078125, -0.7038764953613281, 0.4015007019042969, -0.6090164184570312, 0.8933486938476562, 4.182304382324219, 2.805706024169922, 2.0117721557617188, 0.4931640625, 0.06067085266113281, 1.2104969024658203, -0.87469482421875, 1.669769287109375, -0.7116928100585938, 3.0017852783203125, 2.3038063049316406, 1.2384624481201172, -0.18149948120117188, 0.7810211181640625, 1.1989517211914062, 3.454376220703125, -0.2458477020263672, 0.657318115234375, 0.5885009765625, 4.709197998046875, 3.763338088989258, -0.5938796997070312, -2.6789169311523438, 0.3234214782714844, 0.2115936279296875, -0.06565475463867188, 3.5125198364257812, 1.2493209838867188, 1.9122543334960938, 0.038547515869140625, 0.025482177734375, -0.5049667358398438, 0.0630645751953125, 0.11571121215820312, 3.983062744140625, 2.0582351684570312, 2.4347763061523438, 2.9938201904296875, -0.2664146423339844, 0.45719146728515625, 1.10723876953125, 0.6541671752929688, -0.6813278198242188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000290.npy"} +{"epoch": 0.4383975812547241, "step": 291, "batch_size": 64, "mean": 1.085837960243225, "std": 1.3725872039794922, "min": -1.9972457885742188, "p10": -0.3779403686523436, "median": 0.906219482421875, "p90": 2.9224758148193364, "max": 4.473876953125, "pos_frac": 0.796875, "sample": [1.8494873046875, 2.2156524658203125, 0.5434455871582031, -1.1939239501953125, 1.3083038330078125, 3.3398361206054688, -0.4541740417480469, 0.460418701171875, 2.9682464599609375, 0.0023345947265625, 1.058807373046875, 3.126230239868164, -0.7639808654785156, -0.7995681762695312, 1.7983455657958984, 2.181058883666992, -0.13033294677734375, 0.40726470947265625, 1.9508209228515625, 0.8424949645996094, -0.15743255615234375, -0.20006179809570312, 1.6646499633789062, 1.998382568359375, 1.8566398620605469, 1.3243331909179688, 4.4098052978515625, 0.19774818420410156, 2.8156776428222656, 0.6842498779296875, 0.9290237426757812, 3.9515609741210938, 0.13605308532714844, 0.7339859008789062, -1.491729736328125, 0.8236598968505859, 2.015766143798828, 0.990814208984375, 1.7144012451171875, 0.32529449462890625, 1.6969528198242188, 3.4181594848632812, -1.9972457885742188, 0.8511581420898438, 1.0881805419921875, 0.574127197265625, 2.184417724609375, 2.695018768310547, 0.6846961975097656, 4.473876953125, 0.3447303771972656, 0.87310791015625, -1.5886268615722656, 1.4926223754882812, -0.15976905822753906, 1.0299644470214844, -0.1506938934326172, 2.612457275390625, 0.9129180908203125, 1.3729419708251953, 0.8995208740234375, 0.8430061340332031, -0.1513824462890625, 0.05989837646484375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000291.npy"} +{"epoch": 0.4399092970521542, "step": 292, "batch_size": 64, "mean": 0.7545456886291504, "std": 1.2581899166107178, "min": -2.5391387939453125, "p10": -0.8346889495849608, "median": 0.8084850311279297, "p90": 2.4345832824707037, "max": 3.975025177001953, "pos_frac": 0.703125, "sample": [-2.5391387939453125, -1.208160400390625, 1.3446388244628906, -0.23967552185058594, 2.671567916870117, 3.089448928833008, -0.023473739624023438, 3.5037841796875, 1.4621047973632812, 1.4383544921875, 2.3142547607421875, 0.8193931579589844, 2.609264373779297, 0.9052200317382812, -0.6044082641601562, 0.3394584655761719, 1.4251937866210938, 0.7032623291015625, 1.273345947265625, -0.393768310546875, 0.828521728515625, -0.7004585266113281, 0.41962432861328125, -1.073995590209961, 1.02978515625, -0.0358734130859375, -0.8876991271972656, 1.98297119140625, -1.5369873046875, 2.281942367553711, 0.6021347045898438, 1.3332080841064453, 0.34389305114746094, 1.9548187255859375, -0.07660865783691406, -0.38263702392578125, 1.2083320617675781, -0.19281005859375, 0.5888137817382812, 1.1570205688476562, 0.797576904296875, 0.6794052124023438, 1.2785663604736328, -1.016092300415039, 3.975025177001953, 1.4854240417480469, 0.9818267822265625, 0.25972747802734375, 0.8987846374511719, 0.789093017578125, 0.825531005859375, 1.7700576782226562, 0.44846534729003906, 1.5668296813964844, 1.1447505950927734, 0.25241851806640625, 1.1187114715576172, 0.418975830078125, 3.0129737854003906, -0.07244110107421875, -0.647430419921875, -0.71099853515625, -1.18707275390625, 2.4861526489257812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000292.npy"} +{"epoch": 0.4414210128495843, "step": 293, "batch_size": 64, "mean": 1.1138887405395508, "std": 1.390509009361267, "min": -2.790355682373047, "p10": -0.5840518951416015, "median": 1.1563224792480469, "p90": 2.8648834228515643, "max": 5.84033203125, "pos_frac": 0.828125, "sample": [0.24357223510742188, 1.0570487976074219, 1.29156494140625, 1.9603824615478516, 0.7163848876953125, 0.6497039794921875, -1.0702896118164062, 1.1701984405517578, 0.32529449462890625, 3.1660614013671875, 1.2944183349609375, -0.36580657958984375, 2.0180892944335938, 0.910369873046875, 1.4786834716796875, 0.5638198852539062, 1.6046104431152344, 1.9289627075195312, 1.5564155578613281, 1.206787109375, 2.3536643981933594, -0.4736061096191406, -0.4702911376953125, -0.6313858032226562, 2.4321365356445312, 0.10884284973144531, 1.142446517944336, 0.05778694152832031, 1.0019340515136719, 3.9481964111328125, 0.8100719451904297, 3.0503463745117188, 3.109100341796875, -0.6996231079101562, 1.8203887939453125, -1.0554237365722656, 2.166492462158203, -2.790355682373047, 1.2175331115722656, 0.1638946533203125, 2.3491668701171875, 3.332794189453125, -1.23065185546875, 0.6519355773925781, -0.2837371826171875, -0.6733245849609375, 0.725799560546875, 1.0595703125, 5.84033203125, 3.7749252319335938, 1.6427955627441406, 0.546417236328125, 1.5182514190673828, 0.9578704833984375, 1.3553924560546875, 0.09385299682617188, 2.02252197265625, 0.360198974609375, 1.292999267578125, 1.9401779174804688, 1.4898681640625, 1.592966079711914, 0.19257354736328125, 1.76776123046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000293.npy"} +{"epoch": 0.4429327286470144, "step": 294, "batch_size": 64, "mean": 1.0972115993499756, "std": 1.294878363609314, "min": -1.3507270812988281, "p10": -0.4218410491943359, "median": 0.9136714935302734, "p90": 2.8364200592041025, "max": 4.816749572753906, "pos_frac": 0.796875, "sample": [3.3216400146484375, 1.384552001953125, 3.00262451171875, 1.5749359130859375, 2.3169002532958984, 0.9288101196289062, 2.555034637451172, 1.665374755859375, -0.08774948120117188, 1.3116798400878906, 1.1473197937011719, 0.7291641235351562, -0.6084327697753906, 1.9738693237304688, 0.5141353607177734, 0.9319248199462891, 2.9086952209472656, 1.6378250122070312, 1.2608528137207031, 1.4487724304199219, 2.07098388671875, -0.13316726684570312, 0.5894184112548828, 1.7432098388671875, 0.2313079833984375, 4.500801086425781, -1.3507270812988281, 0.6915168762207031, -0.5479316711425781, 4.816749572753906, 3.181488037109375, 0.26450538635253906, -0.010705947875976562, 4.415069580078125, -0.7126617431640625, 0.019622802734375, 0.7835502624511719, -0.2590007781982422, 0.5906982421875, 2.178577423095703, 0.04573822021484375, -0.1356658935546875, 1.4544792175292969, 0.11071586608886719, 1.259225845336914, 2.1304550170898438, 0.6749553680419922, 1.0000438690185547, 0.40725135803222656, 2.6677780151367188, -0.4154243469238281, 1.8383407592773438, 1.35784912109375, 0.4500999450683594, 0.46356201171875, 0.8985328674316406, 0.6332244873046875, -0.6713218688964844, 0.11893844604492188, 0.5602989196777344, 1.7075347900390625, -0.5118827819824219, 1.6201744079589844, -0.424591064453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000294.npy"} +{"epoch": 0.4444444444444444, "step": 295, "batch_size": 64, "mean": 0.918948233127594, "std": 1.48408842086792, "min": -1.6765022277832031, "p10": -0.7238803863525389, "median": 0.871607780456543, "p90": 2.7446624755859377, "max": 5.593059539794922, "pos_frac": 0.703125, "sample": [2.175384521484375, -0.3895416259765625, 0.1552581787109375, 1.2414436340332031, 0.15936660766601562, 1.6042633056640625, 2.124908447265625, -0.06574058532714844, 1.2540817260742188, 0.0336456298828125, 5.593059539794922, 0.1743488311767578, 4.6412353515625, -0.8121242523193359, 2.463003158569336, 1.2505569458007812, -0.0868988037109375, 0.8532562255859375, -1.233367919921875, -0.36527252197265625, 2.3067245483398438, -0.5221595764160156, 0.5369243621826172, 0.5566291809082031, 2.444671630859375, -0.245208740234375, 0.7012271881103516, -1.6765022277832031, 2.1455001831054688, 0.7643051147460938, -1.3377685546875, -0.5713539123535156, 2.6785888671875, 1.022064208984375, 1.3995208740234375, -0.5340709686279297, 1.5860748291015625, 1.0652294158935547, 2.276409149169922, 1.9252243041992188, 1.069366455078125, -0.51324462890625, 0.169403076171875, 1.29083251953125, 3.504486083984375, 2.8839263916015625, -0.7652816772460938, -0.5949325561523438, -1.2024345397949219, 1.292266845703125, 0.09166336059570312, 1.1466102600097656, 1.6650543212890625, 0.1132354736328125, 0.8899593353271484, 0.8976459503173828, 2.798208236694336, -0.09334564208984375, 0.6986465454101562, -1.2407913208007812, 1.0266494750976562, 2.772979736328125, 4.246164321899414, -0.6272773742675781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000295.npy"} +{"epoch": 0.4459561602418745, "step": 296, "batch_size": 64, "mean": 1.3085522651672363, "std": 1.4987280368804932, "min": -1.8045806884765625, "p10": -0.6220838546752929, "median": 1.220545768737793, "p90": 3.234603309631348, "max": 4.5176239013671875, "pos_frac": 0.78125, "sample": [3.874542236328125, 2.2643661499023438, 2.3775253295898438, 2.062898635864258, -0.8686981201171875, 0.1373291015625, 0.4462566375732422, 4.16241455078125, 0.8350753784179688, 1.0310935974121094, 2.4124374389648438, 3.121471405029297, -0.6324481964111328, 3.2666759490966797, 0.8757133483886719, 2.3962554931640625, 2.7347030639648438, 1.8087635040283203, 0.4405708312988281, 2.3306884765625, 1.123361587524414, -0.5914115905761719, 2.892364501953125, -0.97393798828125, -0.597900390625, 1.2168731689453125, 0.4442901611328125, 1.1126747131347656, 1.5050430297851562, 2.0322723388671875, 0.011932373046875, -0.15401268005371094, 0.5238037109375, -0.224700927734375, 4.5176239013671875, -1.285400390625, -0.26381683349609375, 0.03394889831542969, 2.0470829010009766, -0.6947097778320312, 0.7022552490234375, -1.8045806884765625, 4.248996734619141, 1.22210693359375, 1.9581146240234375, 2.1005859375, 0.7791481018066406, 1.2941951751708984, 1.5601005554199219, 2.9535255432128906, 1.721588134765625, 0.5665359497070312, 1.822113037109375, 1.218984603881836, 3.6445083618164062, 1.79498291015625, 0.7284164428710938, 3.1597671508789062, -0.2527923583984375, -1.1367263793945312, -0.4234352111816406, 3.52020263671875, 3.1127471923828125, 1.50299072265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000296.npy"} +{"epoch": 0.4474678760393046, "step": 297, "batch_size": 64, "mean": 1.1771044731140137, "std": 1.296628475189209, "min": -1.4688568115234375, "p10": -0.13156261444091796, "median": 0.8502349853515625, "p90": 2.735970115661621, "max": 5.1820220947265625, "pos_frac": 0.8125, "sample": [0.3157768249511719, -0.13202667236328125, 1.5497665405273438, 2.15936279296875, 1.8446044921875, 2.799224853515625, 1.0453224182128906, 1.946401596069336, -0.05926513671875, -0.42203712463378906, 0.6899909973144531, 1.11236572265625, 0.5118808746337891, 1.6368064880371094, 0.5947685241699219, 2.7458629608154297, 2.560626983642578, 2.5710716247558594, -0.1304798126220703, 0.2281646728515625, 0.9418563842773438, 0.0371551513671875, 0.5669097900390625, -0.2451629638671875, -0.014959335327148438, 1.7836380004882812, 0.6489028930664062, 1.228851318359375, 2.9137496948242188, 0.7205123901367188, 1.887216567993164, 1.8165607452392578, 0.6303253173828125, -1.0283985137939453, -0.4270439147949219, 1.0550899505615234, 0.5085678100585938, 1.4465484619140625, 4.528388977050781, 0.05861663818359375, -1.4688568115234375, -1.1168403625488281, 2.0099411010742188, 2.7675819396972656, 0.6537094116210938, 1.486083984375, 0.03032684326171875, -0.06109046936035156, 0.5428314208984375, 2.59783935546875, 0.6607627868652344, 2.4307117462158203, 0.6439552307128906, 1.7697906494140625, -0.08829116821289062, 0.2518634796142578, 3.3808746337890625, 5.1820220947265625, 0.23046875, 2.7128868103027344, 0.7586135864257812, 2.0879478454589844, 2.595916748046875, 2.6501235961914062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000297.npy"} +{"epoch": 0.4489795918367347, "step": 298, "batch_size": 64, "mean": 1.0229599475860596, "std": 1.5030875205993652, "min": -2.425729751586914, "p10": -0.947149658203125, "median": 0.9998683929443359, "p90": 3.0387491226196293, "max": 4.5500030517578125, "pos_frac": 0.765625, "sample": [0.5842208862304688, -2.425729751586914, 4.5500030517578125, 1.75543212890625, 2.048583984375, 0.5203094482421875, 1.2581138610839844, -0.9328155517578125, 0.512664794921875, 1.1685028076171875, 3.1532669067382812, 3.06829833984375, -0.9532928466796875, 2.575042724609375, -1.39501953125, 1.6923332214355469, 0.942474365234375, -0.9924163818359375, 0.39733314514160156, 1.256927490234375, 0.4473304748535156, -0.8463459014892578, 2.3816986083984375, 2.267333984375, 0.6942901611328125, 1.4640045166015625, -1.1109447479248047, 1.0572624206542969, 2.55487060546875, 2.443704605102539, 0.7719593048095703, 2.590473175048828, 0.26999664306640625, 3.0712852478027344, 0.21471786499023438, 0.2989616394042969, 0.3254661560058594, 2.9698009490966797, 1.4121532440185547, 0.308349609375, -0.8568344116210938, -0.4552631378173828, -0.2723865509033203, 1.5092620849609375, 0.22052383422851562, 4.111328125, 3.5481338500976562, 2.1456756591796875, 1.2054176330566406, 2.248565673828125, 1.4296646118164062, 0.14300537109375, 1.1623764038085938, -0.9777240753173828, 0.11247634887695312, 1.2155227661132812, -0.4069366455078125, -0.5788421630859375, -0.537933349609375, -1.426504135131836, 2.632049560546875, 2.1241111755371094, 0.8343296051025391, 3.968812942504883], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000298.npy"} +{"epoch": 0.4504913076341648, "step": 299, "batch_size": 64, "mean": 0.7673170566558838, "std": 1.4802305698394775, "min": -2.5735702514648438, "p10": -0.9054239273071287, "median": 0.6618375778198242, "p90": 2.456406402587891, "max": 5.584136962890625, "pos_frac": 0.703125, "sample": [1.2427253723144531, 1.7868843078613281, 0.09876441955566406, -2.5735702514648438, -0.9753170013427734, 1.8499946594238281, 1.3174285888671875, 0.7738704681396484, 3.2669677734375, -0.12364578247070312, -1.3410263061523438, 1.2217559814453125, -1.8401260375976562, 0.5498046875, -1.6222076416015625, 0.1573486328125, -0.06472015380859375, 2.490436553955078, 2.3349838256835938, -0.6364898681640625, 2.0864486694335938, 0.044864654541015625, 1.1309814453125, 1.5593032836914062, -0.523956298828125, -0.48686981201171875, 1.6227951049804688, 0.4930686950683594, 5.584136962890625, -0.450927734375, 1.245880126953125, 0.42711639404296875, 0.8954544067382812, -1.5902137756347656, 0.1257476806640625, -0.3782958984375, 0.811920166015625, 2.377002716064453, 0.5208358764648438, 1.5100555419921875, 2.2592926025390625, 0.5240097045898438, 1.2957916259765625, 2.5659046173095703, 3.7085418701171875, -0.6219558715820312, 2.1697559356689453, 3.31378173828125, 0.1390838623046875, 1.7835845947265625, 0.29593849182128906, -0.4150524139404297, 1.3030204772949219, -0.742340087890625, 1.0932159423828125, 1.2861900329589844, -1.99151611328125, 2.3055343627929688, -0.10509300231933594, 0.4213409423828125, 0.22085952758789062, 0.8503379821777344, -0.3246345520019531, 2.8534889221191406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000299.npy"} +{"epoch": 0.4520030234315949, "step": 300, "batch_size": 64, "mean": 1.0325164794921875, "std": 1.4075207710266113, "min": -2.46466064453125, "p10": -0.688136863708496, "median": 0.9867038726806641, "p90": 2.5495780944824222, "max": 4.634731292724609, "pos_frac": 0.78125, "sample": [0.8021316528320312, 2.5817222595214844, -0.0205078125, 2.096240997314453, 1.8037910461425781, 0.00782012939453125, 1.8138961791992188, 1.602874755859375, -0.4657001495361328, -1.5371513366699219, 3.022308349609375, 4.634731292724609, 1.5902023315429688, 2.1521453857421875, 3.6697311401367188, 2.1945152282714844, 1.893585205078125, 0.30010223388671875, 0.0766448974609375, 0.4329032897949219, 0.6926383972167969, 1.940582275390625, 2.450836181640625, -0.09025764465332031, -0.6986007690429688, 0.73529052734375, -0.2734260559082031, -0.2748603820800781, 1.8821945190429688, 1.5056133270263672, 0.030992507934570312, -0.7532958984375, 0.6400070190429688, 0.39154624938964844, -2.46466064453125, 0.9781684875488281, 0.3834972381591797, 4.07208251953125, 1.3032073974609375, 2.4738845825195312, 1.897430419921875, 1.303924560546875, -0.9304351806640625, 1.5829010009765625, 1.2718391418457031, -0.055389404296875, 0.3358573913574219, 1.0323295593261719, 0.46307373046875, 2.4745750427246094, 1.8158416748046875, 0.4536266326904297, 0.9952392578125, 0.6090164184570312, 0.48358154296875, -0.6637210845947266, 2.6608657836914062, 1.6859664916992188, 2.327899932861328, -1.8394832611083984, 0.027103424072265625, 4.077003479003906, 1.4556732177734375, -0.961090087890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000300.npy"} +{"epoch": 0.45351473922902497, "step": 301, "batch_size": 64, "mean": 0.9239292144775391, "std": 1.7093425989151, "min": -2.1144485473632812, "p10": -1.2174325942993163, "median": 0.6901569366455078, "p90": 3.054500389099122, "max": 6.4470367431640625, "pos_frac": 0.703125, "sample": [1.6554126739501953, 1.4812774658203125, 1.3316268920898438, 0.7813243865966797, 2.21875, -0.170684814453125, 2.9107894897460938, 1.9492645263671875, 1.9440441131591797, 0.43883514404296875, 0.1998310089111328, 0.11689949035644531, 2.0452537536621094, -1.2317543029785156, 0.5933380126953125, 0.112335205078125, -0.0384979248046875, 0.0785064697265625, 0.9047775268554688, 0.45703125, -0.7360897064208984, 0.7041969299316406, 4.224201202392578, -1.8293075561523438, 2.8323917388916016, 0.2819557189941406, 0.8669376373291016, 2.8664379119873047, 2.375274658203125, -1.75927734375, -0.17844390869140625, -1.5294532775878906, 3.116090774536133, 4.428436279296875, -1.6513690948486328, -0.22706985473632812, -1.92181396484375, 6.4470367431640625, 0.2329845428466797, 0.23807144165039062, -0.15964317321777344, 0.05367469787597656, 1.8807411193847656, -1.1840152740478516, 0.8727169036865234, 2.590719223022461, 1.235097885131836, 1.5907135009765625, 0.676116943359375, 3.1827926635742188, 0.8296089172363281, 4.0415802001953125, 0.7958450317382812, 0.33064842224121094, -1.0186004638671875, 1.6916217803955078, 1.916656494140625, 3.5812339782714844, -0.0236663818359375, -0.0355987548828125, -0.20400238037109375, -2.1144485473632812, 2.7877883911132812, -0.7456588745117188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000301.npy"} +{"epoch": 0.455026455026455, "step": 302, "batch_size": 64, "mean": 0.6078989505767822, "std": 1.455957293510437, "min": -2.8555984497070312, "p10": -1.2363723754882812, "median": 0.6169414520263672, "p90": 2.5909183502197273, "max": 4.00732421875, "pos_frac": 0.703125, "sample": [0.017765045166015625, 2.657238006591797, -1.0843048095703125, 4.00732421875, 1.6246757507324219, 0.772216796875, 2.7806739807128906, -1.0937728881835938, 1.3228874206542969, 1.5887451171875, 0.802215576171875, 0.4200248718261719, 0.37149620056152344, 0.7972564697265625, 3.9499664306640625, 1.7362308502197266, 0.6341476440429688, 0.0428466796875, 1.0326385498046875, -1.2847900390625, 0.5672225952148438, -1.3389663696289062, -1.6129302978515625, 1.9339752197265625, 3.7460174560546875, -1.746866226196289, -0.0791778564453125, 0.9782333374023438, -0.3008766174316406, 0.10169219970703125, 0.2847900390625, 1.2174835205078125, 2.349700927734375, 0.1713733673095703, -0.8705997467041016, 1.34893798828125, 2.696807861328125, 0.6287651062011719, 0.286041259765625, 0.8913650512695312, 1.4119281768798828, 2.4361724853515625, -0.19000816345214844, 1.0255279541015625, -0.943603515625, 0.9390106201171875, -1.9740066528320312, -1.1233978271484375, 1.67474365234375, 0.8848419189453125, 2.0350704193115234, -1.3048171997070312, 2.35870361328125, -0.6731986999511719, 0.22761917114257812, -0.9194221496582031, 0.17303466796875, 2.8124237060546875, -0.704681396484375, 0.6051177978515625, -2.8555984497070312, -0.5853271484375, 0.47687530517578125, 0.7700557708740234], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000302.npy"} +{"epoch": 0.4565381708238851, "step": 303, "batch_size": 64, "mean": 1.4789727926254272, "std": 1.7662547826766968, "min": -1.8746261596679688, "p10": -0.5686225891113281, "median": 1.3901691436767578, "p90": 3.6566413879394535, "max": 6.72552490234375, "pos_frac": 0.78125, "sample": [0.18120574951171875, 0.7295017242431641, 2.04632568359375, 1.3855171203613281, 5.82818603515625, 3.7684478759765625, 1.549041748046875, -0.37462425231933594, 1.8869438171386719, 0.522003173828125, 3.219972610473633, -0.5713348388671875, 1.2527885437011719, 0.46996116638183594, 0.20748138427734375, -0.4000568389892578, -0.1939239501953125, 3.250732421875, -0.5622940063476562, -0.6873626708984375, -0.37099266052246094, 2.198537826538086, 3.106689453125, 3.0662460327148438, -1.0780715942382812, 0.7224502563476562, 2.322986602783203, -1.3508834838867188, -0.8525505065917969, 3.15936279296875, 1.339223861694336, 1.677825927734375, 1.7339649200439453, -0.4848213195800781, 0.3021087646484375, 0.5778160095214844, 2.3623809814453125, 1.5486602783203125, 1.8145523071289062, 1.3948211669921875, 0.37325286865234375, 2.063976287841797, 1.6476593017578125, 2.221221923828125, 0.713623046875, 1.9157333374023438, 5.726005554199219, 0.9742965698242188, 2.500579833984375, 3.5386734008789062, -1.8746261596679688, 0.14513397216796875, -0.4398918151855469, 4.0714111328125, 3.4193801879882812, 1.029083251953125, 0.9797916412353516, 1.0999908447265625, 2.1479835510253906, 3.996824264526367, 6.72552490234375, -0.9262409210205078, 2.1988525390625, 3.7071990966796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000303.npy"} +{"epoch": 0.4580498866213152, "step": 304, "batch_size": 64, "mean": 1.2210009098052979, "std": 1.254754900932312, "min": -2.1882781982421875, "p10": -0.16510391235351562, "median": 1.1742134094238281, "p90": 2.9964622497558606, "max": 3.8017578125, "pos_frac": 0.84375, "sample": [1.3471412658691406, -0.5615043640136719, 2.46844482421875, 2.31494140625, 0.8332901000976562, 0.28632164001464844, 1.60302734375, 2.4327011108398438, 0.5107402801513672, 0.4385547637939453, -0.166168212890625, 3.41607666015625, 1.9683685302734375, 1.0967864990234375, 1.350198745727539, 1.23046875, 0.04337310791015625, 0.9325218200683594, 3.122528076171875, -2.1882781982421875, 1.0171127319335938, 2.135650634765625, 1.4563827514648438, 0.7552852630615234, 1.54949951171875, -0.16262054443359375, 0.35434913635253906, 2.5701637268066406, 1.6831588745117188, 2.505807876586914, 1.7750015258789062, 0.20842742919921875, 3.671142578125, 1.9403533935546875, -0.041290283203125, -1.04766845703125, 0.9514656066894531, 1.1030693054199219, 0.45511627197265625, 2.123870849609375, -0.23672199249267578, -0.6424407958984375, 3.67327880859375, 2.7023086547851562, 0.8499755859375, 0.4421844482421875, 1.2703094482421875, 3.6223716735839844, 1.6730499267578125, 1.8635177612304688, 0.34577178955078125, 0.35201263427734375, 3.3355712890625, 0.4129829406738281, 1.1179580688476562, -1.0655632019042969, 0.166717529296875, 3.8017578125, 1.45751953125, 0.6056137084960938, 1.7161407470703125, -0.05635643005371094, 1.405426025390625, 1.8468589782714844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000304.npy"} +{"epoch": 0.4595616024187453, "step": 305, "batch_size": 64, "mean": 1.0611159801483154, "std": 1.6485947370529175, "min": -2.647216796875, "p10": -0.9966064453124996, "median": 0.8649673461914062, "p90": 3.172847366333009, "max": 5.287624359130859, "pos_frac": 0.75, "sample": [2.3400650024414062, 0.6022262573242188, -1.2984275817871094, -2.647216796875, -0.251800537109375, -0.15176963806152344, 5.287624359130859, -1.1652088165283203, 1.697235107421875, 2.2189254760742188, 0.6178092956542969, 3.2730369567871094, 0.935699462890625, 1.1505012512207031, 0.8518218994140625, -0.2175464630126953, -0.2304840087890625, 0.3513069152832031, -1.943634033203125, 2.9390716552734375, 0.5762672424316406, 2.6477088928222656, 0.8061904907226562, 0.87811279296875, 0.5055961608886719, 0.36560821533203125, 1.8339576721191406, 0.5988922119140625, 1.3671875, -2.4690017700195312, 1.8941650390625, 1.817178726196289, 3.3425636291503906, 2.1883544921875, 0.49884033203125, 0.9217529296875, 4.6174163818359375, 1.685150146484375, 0.4261016845703125, -0.161834716796875, 0.5302886962890625, -0.07305908203125, 2.530597686767578, 3.713226318359375, 4.6736297607421875, 0.4456596374511719, 0.0104827880859375, -1.1395111083984375, 2.3326950073242188, 1.1630363464355469, 1.6540679931640625, 1.5098495483398438, 2.1588172912597656, 4.4736328125, 2.0268096923828125, 2.370147705078125, -0.6631622314453125, 0.38823699951171875, -1.1912498474121094, 1.9902057647705078, -0.6033782958984375, 0.3027229309082031, 1.255340576171875, -0.6471099853515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000305.npy"} +{"epoch": 0.46107331821617537, "step": 306, "batch_size": 64, "mean": 0.9585160613059998, "std": 1.6464711427688599, "min": -3.92510986328125, "p10": -0.7069725036621093, "median": 0.6530685424804688, "p90": 2.79732780456543, "max": 5.61981201171875, "pos_frac": 0.765625, "sample": [3.1556930541992188, 2.8207321166992188, 0.6039638519287109, 2.742717742919922, 2.4545440673828125, 1.7214698791503906, -0.020687103271484375, 0.685943603515625, 0.48746299743652344, 1.6222801208496094, 1.935302734375, 1.011322021484375, -2.1764450073242188, 0.21174240112304688, 0.2195587158203125, -1.2409286499023438, 2.431060791015625, 0.592071533203125, 0.22825241088867188, 0.27311134338378906, 1.7572021484375, 5.1260223388671875, 1.2680320739746094, -0.3295097351074219, 2.0425262451171875, 0.4095573425292969, 0.22066497802734375, 1.54718017578125, 0.08460235595703125, 0.61810302734375, -3.92510986328125, 2.61370849609375, -0.2515430450439453, 0.9628753662109375, 0.39714813232421875, -0.5646514892578125, 2.4303245544433594, 5.5133209228515625, -0.34947967529296875, 0.5889663696289062, -0.4694347381591797, 0.9864597320556641, -0.7679672241210938, 0.6198577880859375, -0.8940391540527344, -0.10688018798828125, 0.2712745666503906, 2.9887237548828125, 0.9904708862304688, 3.279399871826172, 1.4787445068359375, 0.23426055908203125, 0.6201934814453125, 1.2110366821289062, 2.3854827880859375, 0.75115966796875, 2.2767105102539062, -0.34264373779296875, 0.9361648559570312, 5.61981201171875, -1.724874496459961, 1.3198089599609375, 0.6992034912109375, -0.9370059967041016], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000306.npy"} +{"epoch": 0.46258503401360546, "step": 307, "batch_size": 64, "mean": 0.9371170401573181, "std": 1.6174988746643066, "min": -3.151458740234375, "p10": -1.0650150299072265, "median": 0.7473077774047852, "p90": 2.9638713836669925, "max": 5.5703125, "pos_frac": 0.671875, "sample": [1.7748947143554688, -0.6113929748535156, 1.1576995849609375, 0.3213233947753906, -3.151458740234375, 1.4172821044921875, 5.5703125, 0.6261520385742188, 2.47735595703125, 2.1858901977539062, -0.0977325439453125, 2.129772186279297, 0.7497520446777344, 3.4877395629882812, -1.0032730102539062, 3.0584259033203125, -0.2918243408203125, -1.0637893676757812, 1.5987434387207031, -0.38085174560546875, 2.0990142822265625, -1.3087139129638672, -1.0655403137207031, 0.2586669921875, 1.2740745544433594, -1.0545730590820312, -1.4138870239257812, 0.9307327270507812, 2.90850830078125, -0.48868751525878906, 0.67437744140625, 2.760425567626953, -0.14996719360351562, -0.15323257446289062, -1.638824462890625, 1.046112060546875, -0.09506988525390625, 2.4176025390625, 1.984334945678711, 0.92816162109375, 0.6987266540527344, -0.10584259033203125, 0.21016693115234375, 3.20721435546875, 2.978900909423828, 0.29742431640625, 0.7448635101318359, 2.29638671875, 2.6911697387695312, 1.5900039672851562, -0.48309326171875, 2.928802490234375, 2.4349212646484375, 0.03738594055175781, 3.164337158203125, 2.7165985107421875, 0.12860870361328125, 3.062488555908203, 0.07110595703125, -0.16704559326171875, -1.1553115844726562, 1.7272796630859375, 2.3700485229492188, -1.3381881713867188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000307.npy"} +{"epoch": 0.46409674981103555, "step": 308, "batch_size": 64, "mean": 1.1912176609039307, "std": 1.5135875940322876, "min": -2.2870101928710938, "p10": -0.36869602203369134, "median": 1.0714397430419922, "p90": 2.7941650390625, "max": 5.614112854003906, "pos_frac": 0.796875, "sample": [2.153993606567383, 1.0994377136230469, 1.0329742431640625, -0.6418952941894531, 2.7047042846679688, 1.6311874389648438, -0.23284339904785156, 1.50518798828125, -1.2160491943359375, 0.7203903198242188, 5.614112854003906, 2.288848876953125, 4.744132995605469, 1.9480094909667969, 1.854400634765625, -0.40224266052246094, 1.2219772338867188, 0.28388214111328125, 2.5302066802978516, 0.7516441345214844, 1.3034210205078125, -0.7573509216308594, 3.7776565551757812, 1.6567535400390625, 0.9957199096679688, 2.8050689697265625, -0.2904205322265625, 1.2047805786132812, 0.95745849609375, -0.1514739990234375, 2.3048629760742188, -1.3019027709960938, 0.6958732604980469, -0.22177696228027344, 2.6470870971679688, 0.8638191223144531, 1.8049087524414062, 1.1830062866210938, 0.0591278076171875, -2.2870101928710938, 1.0434417724609375, 0.8584365844726562, 2.7687225341796875, 3.1548118591308594, -0.0927276611328125, 0.2949066162109375, 0.27541351318359375, 1.208892822265625, 1.5465412139892578, 0.4529914855957031, 2.604581832885742, 1.5931549072265625, 0.4889678955078125, 2.3861465454101562, 5.35211181640625, 0.3118133544921875, 3.0302734375, -1.8379783630371094, 0.23210525512695312, 1.3425159454345703, 0.1665821075439453, -0.0749053955078125, 0.9652252197265625, 1.3242301940917969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000308.npy"} +{"epoch": 0.4656084656084656, "step": 309, "batch_size": 64, "mean": 1.106238842010498, "std": 1.4884768724441528, "min": -2.5413818359375, "p10": -0.4823648452758788, "median": 0.9228019714355469, "p90": 2.779443359375001, "max": 5.526180267333984, "pos_frac": 0.75, "sample": [1.0044097900390625, 5.323146820068359, -0.038661956787109375, 2.863262176513672, 1.9945602416992188, 0.6507949829101562, 2.5838661193847656, -0.5348625183105469, -0.38494873046875, 0.7213687896728516, 2.2221221923828125, 0.14454269409179688, -0.5241146087646484, 0.4903221130371094, 1.515350341796875, -1.1132621765136719, 1.3885250091552734, 2.1994895935058594, -0.0082550048828125, 0.3401947021484375, 0.4233112335205078, 1.7793083190917969, 2.326152801513672, -0.18840599060058594, 1.28436279296875, 1.3667488098144531, 0.20368576049804688, 1.8306293487548828, -0.6150741577148438, 0.07775497436523438, 0.2761726379394531, -1.4240264892578125, 0.3730316162109375, 1.323150634765625, -0.238616943359375, 1.2697944641113281, 1.7222824096679688, 1.8166236877441406, -0.21140098571777344, 3.6625289916992188, 0.4838409423828125, 3.0220947265625, 2.4913692474365234, 1.6540679931640625, 3.2995834350585938, -0.1083984375, 2.5555038452148438, 1.8130569458007812, 0.19029808044433594, 2.1102676391601562, -1.0185317993164062, 0.8411941528320312, 0.09786605834960938, 2.463186264038086, 5.526180267333984, 0.6458930969238281, 1.0085277557373047, 0.7332305908203125, -0.0235748291015625, -2.5413818359375, 2.2041015625, -0.057342529296875, 1.5705757141113281, 3.9418182373046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000309.npy"} +{"epoch": 0.4671201814058957, "step": 310, "batch_size": 64, "mean": 0.709973931312561, "std": 1.486603856086731, "min": -1.7402725219726562, "p10": -1.0881328582763672, "median": 0.5387420654296875, "p90": 2.5107740402221683, "max": 5.0885467529296875, "pos_frac": 0.65625, "sample": [1.350921630859375, 0.40222930908203125, 1.7165031433105469, 0.382415771484375, 2.537708282470703, -0.257904052734375, -0.08413314819335938, 1.582763671875, 1.4951705932617188, -0.35723304748535156, 0.6173343658447266, -1.6281147003173828, 2.0568008422851562, 5.0885467529296875, 2.833404541015625, -0.49384307861328125, -0.3655376434326172, 0.449249267578125, 3.4541854858398438, -1.1008186340332031, 0.5192146301269531, 1.9914112091064453, -0.8953437805175781, 4.219757080078125, -1.05853271484375, -0.8111896514892578, 1.6957244873046875, 0.0052013397216796875, 0.5353240966796875, 1.9165172576904297, -0.9478759765625, 1.7477951049804688, 1.037078857421875, 0.2091217041015625, -0.9037322998046875, 1.86126708984375, -0.4905662536621094, 0.6745147705078125, 1.284759521484375, 0.6768417358398438, -0.7269821166992188, 2.1362075805664062, -1.4029006958007812, -1.1214599609375, 1.0956497192382812, 0.302490234375, 0.5421600341796875, 2.0530529022216797, -1.7402725219726562, -0.1873779296875, -0.5184249877929688, 2.943387985229492, 0.7803173065185547, -1.361236572265625, 0.375732421875, 0.9140377044677734, 0.43603515625, 2.176088333129883, 2.447927474975586, -1.5305862426757812, -0.8787384033203125, 3.5235519409179688, 0.7822284698486328, 1.4505081176757812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000310.npy"} +{"epoch": 0.46863189720332576, "step": 311, "batch_size": 64, "mean": 1.0156208276748657, "std": 1.766611099243164, "min": -3.7061614990234375, "p10": -1.1139503479003905, "median": 1.0677013397216797, "p90": 2.9214172363281263, "max": 7.050628662109375, "pos_frac": 0.71875, "sample": [2.3241119384765625, 0.19867706298828125, -0.37683868408203125, -1.80364990234375, -0.963287353515625, -0.024335861206054688, 0.09130477905273438, 2.2345733642578125, -0.9399051666259766, 3.8475723266601562, 1.6747283935546875, -3.7061614990234375, 1.5937423706054688, 1.4315567016601562, 2.106048583984375, -0.009550094604492188, 1.094635009765625, 1.1845169067382812, 2.585174560546875, 0.9781036376953125, 0.9267044067382812, -0.1488494873046875, 1.0407676696777344, 0.5706939697265625, 1.6260147094726562, 0.16875076293945312, -0.7453422546386719, -0.6403942108154297, 1.3150634765625, 2.5698394775390625, -1.233123779296875, 2.086029052734375, 5.408843994140625, 0.4218292236328125, 1.3769798278808594, 1.5265522003173828, 0.9463157653808594, 0.05816650390625, 1.3928909301757812, 1.3645477294921875, 7.050628662109375, -0.6671295166015625, -1.1848068237304688, 3.474029541015625, -1.2934513092041016, 1.1220703125, 0.49432373046875, -1.1785202026367188, 1.8684921264648438, 0.9280548095703125, 4.100410461425781, 3.065521240234375, 0.41162109375, -2.0129928588867188, 1.6341590881347656, 1.4445648193359375, 0.26033592224121094, -0.052684783935546875, -0.4392547607421875, 2.4057159423828125, 2.2258148193359375, 3.6815643310546875, 2.418609619140625, 1.689361572265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000311.npy"} +{"epoch": 0.47014361300075586, "step": 312, "batch_size": 64, "mean": 1.0361424684524536, "std": 1.6474061012268066, "min": -4.7663726806640625, "p10": -0.6647735595703125, "median": 0.8673019409179688, "p90": 2.9422325134277343, "max": 5.0339813232421875, "pos_frac": 0.828125, "sample": [0.38215065002441406, 0.597198486328125, 0.34881591796875, 1.4625244140625, 1.838134765625, 1.6504592895507812, 1.5186271667480469, 0.3130664825439453, 1.9983291625976562, -0.9312934875488281, 3.4681034088134766, 1.94805908203125, 1.5132980346679688, 2.4800033569335938, 0.3158760070800781, 2.5229568481445312, 0.2585906982421875, 0.021940231323242188, 0.7156620025634766, 4.944671630859375, 2.9371871948242188, 0.27527618408203125, -1.0665206909179688, 1.1902389526367188, 0.2650146484375, 2.939380645751953, 0.9643783569335938, -0.6337127685546875, -0.194976806640625, 2.3759002685546875, 3.8710269927978516, -2.1141586303710938, 2.1045150756835938, 1.2654571533203125, -2.2436370849609375, 1.4134788513183594, 3.276988983154297, 0.7702255249023438, -4.7663726806640625, 2.748931884765625, 5.0339813232421875, 0.7057685852050781, 0.1427001953125, 1.8863677978515625, 1.9516067504882812, 0.6166839599609375, 0.46790313720703125, 2.9569664001464844, 1.5914115905761719, -0.5282402038574219, -0.6780853271484375, 1.0427970886230469, 2.9434547424316406, 0.5279312133789062, 0.439788818359375, 0.1324615478515625, 1.5319900512695312, 2.309389114379883, -1.7323474884033203, 0.5265274047851562, 0.29840850830078125, -0.3906898498535156, 0.3836536407470703, 1.406890869140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000312.npy"} +{"epoch": 0.47165532879818595, "step": 313, "batch_size": 64, "mean": 1.0249509811401367, "std": 1.7645083665847778, "min": -2.0793704986572266, "p10": -0.9524074554443358, "median": 0.7989768981933594, "p90": 3.092450714111328, "max": 7.03509521484375, "pos_frac": 0.703125, "sample": [0.5694789886474609, 3.0936203002929688, 2.353303909301758, 1.344156265258789, 2.307586669921875, 1.2875289916992188, 0.8100051879882812, 0.353485107421875, 1.1106033325195312, -1.3869857788085938, -0.12707138061523438, 2.6159114837646484, 0.9951057434082031, 0.7879486083984375, 1.4435958862304688, -0.15254974365234375, 1.4784317016601562, -0.2994823455810547, 0.3845672607421875, -0.6981124877929688, 2.9934005737304688, 1.8255119323730469, -0.2910652160644531, 5.71221923828125, -2.0793704986572266, -1.6617412567138672, -0.22672271728515625, 0.1844482421875, 0.965667724609375, 0.2454071044921875, 5.180194854736328, 2.8082847595214844, -1.262481689453125, -1.3725605010986328, 1.7367172241210938, 3.0897216796875, 0.6028919219970703, -0.4831962585449219, 0.091339111328125, 3.8395538330078125, 0.13274765014648438, 0.5570068359375, 2.4548492431640625, -0.536590576171875, 7.03509521484375, 0.2843437194824219, 0.02020263671875, 0.5632247924804688, 1.9691543579101562, 1.0527496337890625, 3.1282424926757812, 1.7335586547851562, 1.439697265625, -0.0062408447265625, -0.08267593383789062, -0.7775115966796875, 1.6688003540039062, -1.0088233947753906, 2.387277603149414, -0.820770263671875, -1.7248802185058594, 1.5045166015625, 0.9532623291015625, 3.5002803802490234], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000313.npy"} +{"epoch": 0.47316704459561604, "step": 314, "batch_size": 64, "mean": 1.2745311260223389, "std": 1.4797190427780151, "min": -1.8340606689453125, "p10": -0.5116477966308594, "median": 1.1018753051757812, "p90": 2.8226413726806645, "max": 5.857028961181641, "pos_frac": 0.78125, "sample": [2.524761199951172, 0.9815177917480469, 1.0650558471679688, 5.857028961181641, 2.291912078857422, 1.9388046264648438, 0.7456436157226562, 0.8780136108398438, -0.2662200927734375, 1.2987022399902344, -0.5554466247558594, -0.8046340942382812, 3.723785400390625, -1.117889404296875, -0.4053802490234375, 4.3025665283203125, 1.1271934509277344, 1.1255149841308594, 2.9732818603515625, 0.8648147583007812, 0.3946075439453125, -0.614715576171875, 2.4779090881347656, 3.6331214904785156, 2.4358596801757812, -1.8340606689453125, 1.082489013671875, 2.3867645263671875, 2.4323272705078125, 1.3142852783203125, 2.2840576171875, -0.41017913818359375, -0.06594276428222656, 1.003561019897461, 0.1656055450439453, 0.5508308410644531, 2.52142333984375, 1.2665557861328125, 0.8171787261962891, 1.2142486572265625, 0.3142528533935547, -1.0790557861328125, 2.734508514404297, 0.7312507629394531, 1.4020462036132812, 2.1138916015625, 2.22930908203125, 5.168561935424805, 0.7112960815429688, 2.3380470275878906, 0.6744766235351562, 2.427154541015625, 0.663330078125, 0.3462085723876953, 2.140380859375, 1.1212615966796875, -0.21106338500976562, 1.7891769409179688, 1.04547119140625, -0.20556640625, 1.634592056274414, -0.5404434204101562, -0.4444580078125, 2.86041259765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000314.npy"} +{"epoch": 0.47467876039304613, "step": 315, "batch_size": 64, "mean": 1.0559827089309692, "std": 1.4644498825073242, "min": -2.3030738830566406, "p10": -0.5312011718749999, "median": 0.9983692169189453, "p90": 3.0323902130126954, "max": 5.01361083984375, "pos_frac": 0.78125, "sample": [-0.55126953125, 1.8939971923828125, 3.7709903717041016, 1.2684803009033203, -0.3860969543457031, 2.0201797485351562, -2.3030738830566406, -0.3864002227783203, -0.484375, 0.2160186767578125, -1.0153274536132812, 3.0406494140625, 0.10744667053222656, 2.237518310546875, 0.11713027954101562, 1.8713302612304688, 1.2099761962890625, 0.34555816650390625, -0.14088821411132812, 0.11888694763183594, 1.6599273681640625, 3.7737884521484375, 0.7226791381835938, 0.5846633911132812, 0.150787353515625, 0.8222389221191406, 5.01361083984375, -1.1093673706054688, 0.37755775451660156, 1.7303524017333984, 0.3639373779296875, 0.6200065612792969, 2.956939697265625, 3.104248046875, 1.3748092651367188, 1.39971923828125, 0.60223388671875, 1.558837890625, -1.020172119140625, 0.3508758544921875, 1.4299812316894531, 0.317474365234375, 0.7942047119140625, 1.3737125396728516, -0.3596038818359375, 2.8797149658203125, 1.5916786193847656, 4.46980094909668, -0.7673187255859375, 1.671478271484375, 2.4689407348632812, 1.17449951171875, 0.088836669921875, -0.23749351501464844, 1.2279167175292969, 1.3036041259765625, 3.0131187438964844, 1.8349761962890625, 2.2470626831054688, -1.6980705261230469, -0.290008544921875, 0.09470558166503906, 3.2625656127929688, 1.7027130126953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000315.npy"} +{"epoch": 0.47619047619047616, "step": 316, "batch_size": 64, "mean": 1.290840983390808, "std": 1.835448980331421, "min": -1.4518718719482422, "p10": -0.7340133666992187, "median": 0.9766693115234375, "p90": 3.4278491973876957, "max": 7.692535400390625, "pos_frac": 0.71875, "sample": [0.28937530517578125, -1.4145355224609375, 1.83843994140625, -0.973358154296875, -1.2009506225585938, -0.36627197265625, 0.2729644775390625, 2.646728515625, 0.7022857666015625, 2.3671646118164062, 1.029022216796875, 4.3096923828125, 1.1215648651123047, 2.3327484130859375, 1.818939208984375, 0.92431640625, -1.4518718719482422, 0.757598876953125, -0.4434242248535156, 2.739164352416992, 3.3881072998046875, 3.2203369140625, 1.3622055053710938, -0.02448272705078125, 1.3967819213867188, 1.24078369140625, 1.8291740417480469, 0.038360595703125, -0.5051250457763672, 2.5589866638183594, 2.940399169921875, 4.336822509765625, 0.5413436889648438, 7.692535400390625, -0.8560256958007812, 0.366790771484375, -0.7690505981445312, 1.8207359313964844, 1.6986160278320312, -0.1100921630859375, 2.3730010986328125, 0.62005615234375, 2.142366409301758, 0.7466621398925781, 3.0393524169921875, -0.3880157470703125, -0.07431221008300781, 3.86566162109375, -0.2254180908203125, 6.742668151855469, -0.6522598266601562, -1.4449462890625, 0.5415496826171875, -0.02751922607421875, -0.588592529296875, 1.1978912353515625, 0.4472999572753906, 3.4448814392089844, 1.4814300537109375, 2.3533172607421875, 3.5500564575195312, 0.29120635986328125, 3.1382713317871094, 0.572418212890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000316.npy"} +{"epoch": 0.47770219198790626, "step": 317, "batch_size": 64, "mean": 1.5226335525512695, "std": 1.5497146844863892, "min": -1.0161895751953125, "p10": -0.29591712951660154, "median": 1.2430992126464844, "p90": 3.7793495178222662, "max": 5.905784606933594, "pos_frac": 0.828125, "sample": [1.8953628540039062, 2.205801010131836, 1.3441886901855469, 2.7945709228515625, 1.0891876220703125, 5.905784606933594, 0.015050888061523438, 1.0626144409179688, -0.186309814453125, 2.0547008514404297, -0.296966552734375, 1.249166488647461, -0.2934684753417969, -0.69189453125, 2.6011505126953125, 0.5346393585205078, 4.0460205078125, 0.0341033935546875, 2.8336753845214844, 3.8469886779785156, -0.11907196044921875, 0.6690902709960938, 1.4888076782226562, -0.8035659790039062, 2.138315200805664, 0.8025283813476562, 0.9786453247070312, 2.8085765838623047, 1.9608535766601562, 0.56842041015625, 4.951526641845703, 1.3649139404296875, 1.2370319366455078, 3.6215248107910156, 3.997547149658203, -0.2275238037109375, 1.827423095703125, 0.02957916259765625, 1.1891708374023438, 3.6207237243652344, 0.9686393737792969, 1.9966583251953125, 4.0973358154296875, -1.0161895751953125, 0.7281417846679688, 0.5343513488769531, 1.989288330078125, 2.054840087890625, 1.8035125732421875, 0.8317108154296875, 3.5487518310546875, 2.453296661376953, 1.0342159271240234, 2.0881805419921875, -0.3788909912109375, -0.51434326171875, -0.30013275146484375, 1.8813934326171875, 2.958353042602539, 4.906763076782227, 0.26432037353515625, 0.9104843139648438, 0.319122314453125, 0.139862060546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000317.npy"} +{"epoch": 0.47921390778533635, "step": 318, "batch_size": 64, "mean": 0.8145396709442139, "std": 1.8895400762557983, "min": -2.5374679565429688, "p10": -1.9847057342529297, "median": 0.5468587875366211, "p90": 3.124074935913086, "max": 5.010856628417969, "pos_frac": 0.65625, "sample": [1.1621055603027344, 1.3844070434570312, -0.147491455078125, -1.979949951171875, 3.1155433654785156, 2.4483795166015625, 1.56732177734375, -2.2206573486328125, 1.6437358856201172, 3.5430221557617188, 0.4306163787841797, -1.1163177490234375, 1.0833740234375, 2.829681396484375, 2.9917144775390625, 0.4891357421875, 0.4305686950683594, -0.815582275390625, 2.134510040283203, -0.32201385498046875, -0.05637931823730469, 1.3504180908203125, 3.2583389282226562, -2.1865463256835938, 0.01543426513671875, 1.8032302856445312, 3.1277313232421875, -2.5374679565429688, 1.2071914672851562, 1.539764404296875, -1.2119674682617188, 1.7317981719970703, -1.3446121215820312, 0.3789405822753906, -1.3798980712890625, 4.478240966796875, 4.777740478515625, -0.12526702880859375, 2.7344398498535156, -0.9490203857421875, 0.4599151611328125, -0.0356597900390625, 2.81195068359375, -2.2749710083007812, 0.22803497314453125, 2.1218109130859375, 0.31291961669921875, 3.564544677734375, 0.3933086395263672, 1.4222640991210938, 0.6045818328857422, 5.010856628417969, 1.3785171508789062, 1.4018707275390625, -2.0122032165527344, 0.18039703369140625, 3.098907470703125, -2.0479965209960938, -0.3298492431640625, -0.28922271728515625, 1.9833450317382812, -1.9867439270019531, -1.9603729248046875, 2.8301219940185547], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000318.npy"} +{"epoch": 0.48072562358276644, "step": 319, "batch_size": 64, "mean": 1.1784477233886719, "std": 1.7701297998428345, "min": -3.3036956787109375, "p10": -0.61993408203125, "median": 0.9912776947021484, "p90": 3.566778182983399, "max": 5.505882263183594, "pos_frac": 0.78125, "sample": [1.1168251037597656, 3.616546630859375, 0.438385009765625, 1.96954345703125, 0.14648818969726562, 4.4928741455078125, 0.8965263366699219, 0.7108993530273438, 0.1702423095703125, 1.7139396667480469, 2.6182708740234375, 1.9716262817382812, 0.058704376220703125, 0.4364471435546875, 1.7249298095703125, 4.079315185546875, -0.8535690307617188, 5.505882263183594, 0.7777194976806641, 2.3442001342773438, 0.9939651489257812, -0.26857757568359375, 1.7252960205078125, 2.590991973876953, 4.476860046386719, -0.3607749938964844, 3.42303466796875, 1.538787841796875, 2.814678192138672, 0.72406005859375, -0.0697784423828125, -0.621673583984375, 1.8289337158203125, 5.279541015625, -1.9737224578857422, 1.1856460571289062, -0.615875244140625, 2.506420135498047, 1.4227218627929688, -1.9306411743164062, 2.0458030700683594, 0.9885902404785156, 0.5944709777832031, -0.25756072998046875, 0.1534423828125, -0.23168182373046875, -3.3036956787109375, 0.24295806884765625, -2.1872596740722656, 3.4884986877441406, 0.07831001281738281, 0.0121002197265625, 0.815948486328125, -0.147613525390625, 1.3544082641601562, 2.7339248657226562, 1.8781890869140625, 0.8999061584472656, 2.672941207885742, 1.5054130554199219, 1.02081298828125, -1.9969558715820312, 3.6003265380859375, 0.8536834716796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000319.npy"} +{"epoch": 0.48223733938019653, "step": 320, "batch_size": 64, "mean": 1.0131080150604248, "std": 2.0796163082122803, "min": -2.6968994140625, "p10": -1.2651039123535155, "median": 0.4960145950317383, "p90": 3.2760440826416017, "max": 8.9578857421875, "pos_frac": 0.703125, "sample": [1.9487743377685547, -1.5544586181640625, 0.35852813720703125, 0.27660369873046875, 1.0445480346679688, 0.08043289184570312, 3.229236602783203, 0.27263641357421875, 2.7872238159179688, -0.10432624816894531, 1.10870361328125, 2.2771549224853516, 0.10310935974121094, 0.06020164489746094, -0.5723342895507812, 0.6597900390625, 0.35805511474609375, -1.1715774536132812, 0.033908843994140625, 1.9583969116210938, -2.1629180908203125, 0.6540660858154297, -2.6968994140625, 0.5030078887939453, -1.24176025390625, 8.9578857421875, 5.4227447509765625, 2.75616455078125, 1.1534996032714844, -1.3585128784179688, -0.049358367919921875, -1.5440025329589844, 2.5656280517578125, 3.224151611328125, 0.17112350463867188, 4.338310241699219, 0.7524185180664062, 0.9746627807617188, -1.8130760192871094, 0.2428264617919922, 0.8172187805175781, 3.9957313537597656, 0.5281829833984375, 0.48902130126953125, -0.8192596435546875, -0.04170799255371094, 6.8966827392578125, 0.07097625732421875, 0.31325531005859375, -0.14678955078125, -0.19565963745117188, -0.7214241027832031, 2.9577865600585938, -0.02834320068359375, 1.6069374084472656, 1.4014549255371094, 2.654754638671875, 4.209693908691406, 3.2961044311523438, -0.439300537109375, 1.6104621887207031, -1.2751083374023438, 2.2665977478027344, 1.3870773315429688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000320.npy"} +{"epoch": 0.4837490551776266, "step": 321, "batch_size": 64, "mean": 1.0499403476715088, "std": 1.6488450765609741, "min": -2.3998260498046875, "p10": -1.0764259338378905, "median": 1.0010204315185547, "p90": 3.2515312194824224, "max": 4.667964935302734, "pos_frac": 0.75, "sample": [1.06842041015625, 1.0074462890625, 0.6411876678466797, 1.013397216796875, 1.3450851440429688, 0.2439727783203125, 3.4211807250976562, 1.6612415313720703, 3.8035850524902344, 0.2422161102294922, 4.137172698974609, 0.6765861511230469, 0.9784164428710938, 1.8072891235351562, -0.7938861846923828, -0.5514678955078125, -0.40642547607421875, 0.40653419494628906, 1.2916412353515625, 2.811737060546875, 2.6942367553710938, 0.865966796875, -2.3998260498046875, -0.19980621337890625, 0.7889404296875, -1.9650993347167969, -1.8267650604248047, -1.540670394897461, 1.3083209991455078, 0.7572479248046875, 1.3941497802734375, 3.0977706909179688, 2.8295135498046875, 1.5473880767822266, 0.2228984832763672, 2.3170547485351562, 1.6235733032226562, -1.1402130126953125, 0.3442554473876953, 1.2878799438476562, 0.6759243011474609, -0.18112564086914062, 0.29492759704589844, 3.5716323852539062, -0.2884979248046875, -2.08087158203125, 3.3174285888671875, 2.2511138916015625, 2.834339141845703, 2.668506622314453, 0.6519050598144531, 1.7472000122070312, -0.46511077880859375, 1.968414306640625, 2.3193206787109375, 3.7236404418945312, 2.390409469604492, 0.9945945739746094, 4.667964935302734, -2.3544158935546875, 2.590606689453125, -0.9275894165039062, 0.5996742248535156, -0.5859603881835938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000321.npy"} +{"epoch": 0.4852607709750567, "step": 322, "batch_size": 64, "mean": 1.3585913181304932, "std": 1.4153547286987305, "min": -2.1415138244628906, "p10": -0.2918733596801757, "median": 1.3224563598632812, "p90": 3.152437019348145, "max": 5.767566680908203, "pos_frac": 0.84375, "sample": [-0.3289470672607422, 1.54815673828125, 2.789562225341797, 0.8821182250976562, 1.9191055297851562, 1.5512065887451172, 1.5643310546875, 1.5413265228271484, 2.334300994873047, 0.8054294586181641, -0.15427398681640625, 1.472747802734375, 3.331207275390625, 4.013082504272461, 1.732818603515625, 1.1302833557128906, 1.3454742431640625, 0.11126708984375, 0.7114982604980469, -0.10150146484375, 0.9150314331054688, 0.7485694885253906, -0.3871936798095703, 3.082986831665039, 3.5879173278808594, 3.5904369354248047, 0.8478260040283203, 2.1089324951171875, 0.10964584350585938, 2.7556610107421875, 0.21004486083984375, 0.58349609375, 1.9783401489257812, 3.182201385498047, 2.3694419860839844, 0.9110355377197266, 0.7284469604492188, 3.545166015625, 0.8113059997558594, -0.3447265625, 2.3121185302734375, 2.1359329223632812, 0.020259857177734375, 2.671438217163086, 1.5115032196044922, 2.9258155822753906, 2.8797454833984375, 1.1127357482910156, -1.1059150695800781, -2.1415138244628906, 0.7390117645263672, 0.2444915771484375, 1.0808448791503906, 1.4819812774658203, 2.4583053588867188, 1.4244384765625, 0.22118377685546875, -0.9403533935546875, 1.2994384765625, -1.4066658020019531, 5.767566680908203, -0.2053680419921875, 1.9354000091552734, 0.9936904907226562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000322.npy"} +{"epoch": 0.48677248677248675, "step": 323, "batch_size": 64, "mean": 1.1497416496276855, "std": 2.2343127727508545, "min": -4.103309631347656, "p10": -0.8449378967285155, "median": 0.7141456604003906, "p90": 4.35455780029297, "max": 8.760257720947266, "pos_frac": 0.71875, "sample": [1.4182968139648438, 0.06304931640625, -0.1617584228515625, -0.5828323364257812, 0.13561248779296875, 0.8729095458984375, 2.6427383422851562, 0.7443656921386719, -0.4095306396484375, 4.043159484863281, 2.723968505859375, -0.6522922515869141, 3.363475799560547, 0.976104736328125, 5.087730407714844, -0.9043922424316406, -1.7704315185546875, 3.34844970703125, 2.0388641357421875, 1.774688720703125, -2.0261688232421875, 2.1304473876953125, 0.25006103515625, 0.8652496337890625, 6.3205108642578125, -0.35643768310546875, -1.7235794067382812, 1.3713569641113281, 0.39513397216796875, 2.015655517578125, 6.28912353515625, 0.1497955322265625, -0.5112800598144531, -0.4651927947998047, 1.831298828125, -0.0817413330078125, 0.5302734375, 1.8884468078613281, -0.21924591064453125, 0.16142845153808594, 2.7139663696289062, 0.3838920593261719, 0.31183624267578125, 0.35599517822265625, 4.510658264160156, 8.760257720947266, 4.488014221191406, -2.9166030883789062, 4.5904541015625, 0.6839256286621094, -4.103309631347656, 0.4290771484375, 0.7743301391601562, 0.9248123168945312, 2.70208740234375, 3.0551910400390625, 2.4443359375, 1.0749435424804688, -1.65826416015625, -0.6855564117431641, 0.6536712646484375, 0.21149063110351562, 1.0171585083007812, -0.7062110900878906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000323.npy"} +{"epoch": 0.48828420256991684, "step": 324, "batch_size": 64, "mean": 1.2257626056671143, "std": 1.3642934560775757, "min": -1.9054679870605469, "p10": -0.250318145751953, "median": 0.9789409637451172, "p90": 3.039696311950684, "max": 4.662071228027344, "pos_frac": 0.828125, "sample": [2.4005584716796875, 0.26694488525390625, 2.0927696228027344, -0.124542236328125, -0.5287437438964844, 2.798717498779297, 1.9134101867675781, 0.19855117797851562, -0.4417076110839844, 0.9083175659179688, 3.8379058837890625, 2.953340530395508, 0.49698638916015625, 1.1060562133789062, 0.5255165100097656, -0.4646892547607422, 3.0767059326171875, -1.9054679870605469, 1.52569580078125, -0.049224853515625, 1.0416069030761719, 3.86138916015625, -0.3504905700683594, 2.0063552856445312, 2.677967071533203, 1.0880718231201172, -0.08481597900390625, 0.5124473571777344, 4.334251403808594, 0.456634521484375, 0.9698295593261719, 0.080413818359375, 2.286771774291992, 1.4540557861328125, 1.6126251220703125, 0.4966850280761719, 0.4022865295410156, -0.29582786560058594, 1.704986572265625, 0.5135726928710938, 2.6143798828125, -0.6927680969238281, 4.2003173828125, 0.8381576538085938, 2.301239013671875, -0.14412879943847656, 0.478118896484375, 1.0836868286132812, 1.1342964172363281, 3.607940673828125, 4.662071228027344, 0.3355827331542969, 0.9012374877929688, 1.0755691528320312, 1.7250823974609375, 0.9880523681640625, 0.3772544860839844, 0.1369476318359375, 0.543212890625, 0.5368633270263672, 2.0629196166992188, 2.5966720581054688, 1.691650390625, 0.0385284423828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000324.npy"} +{"epoch": 0.4897959183673469, "step": 325, "batch_size": 64, "mean": 1.059891700744629, "std": 1.5665075778961182, "min": -2.5141143798828125, "p10": -0.6609004974365235, "median": 0.9770059585571289, "p90": 2.877861022949219, "max": 5.853963851928711, "pos_frac": 0.75, "sample": [0.9555110931396484, 3.7950057983398438, 0.6891593933105469, 0.8706836700439453, 1.3194198608398438, 1.5197601318359375, 1.7517738342285156, 1.4365882873535156, 0.6911239624023438, 1.1324310302734375, 1.302154541015625, 2.099346160888672, 0.35819053649902344, -1.1958179473876953, 2.446441650390625, -0.3524188995361328, 2.387176513671875, 0.9779281616210938, 0.5100021362304688, -0.4580402374267578, 5.297641754150391, -0.92706298828125, 1.2469978332519531, 0.4548301696777344, 0.1566143035888672, 4.4535369873046875, -0.6510124206542969, 2.080190658569336, -0.2045726776123047, 1.209890365600586, -0.18914794921875, 0.8986740112304688, 2.9184417724609375, 1.759521484375, -1.2708358764648438, 2.783172607421875, -0.8624992370605469, 1.0011749267578125, 0.9760837554931641, 1.44647216796875, 0.3113861083984375, 0.49239158630371094, 0.19753265380859375, 4.51580810546875, -2.5141143798828125, 0.114593505859375, 1.3133087158203125, 0.036590576171875, 3.4372806549072266, 1.0700435638427734, 5.853963851928711, -0.6651382446289062, 1.3501720428466797, -1.0505905151367188, -0.455291748046875, -0.24579620361328125, 1.90771484375, 1.8721160888671875, 1.9907989501953125, 0.38629913330078125, 2.0259170532226562, -0.1638641357421875, 1.4253158569335938, -0.187896728515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000325.npy"} +{"epoch": 0.491307634164777, "step": 326, "batch_size": 64, "mean": 0.6357403993606567, "std": 1.7094703912734985, "min": -3.6011810302734375, "p10": -1.313703727722168, "median": 0.5773258209228516, "p90": 3.0270252227783208, "max": 4.307231903076172, "pos_frac": 0.640625, "sample": [-0.9295806884765625, 0.2552223205566406, 1.2200126647949219, 0.14609909057617188, 1.6766357421875, -3.2435150146484375, 3.8213119506835938, 1.405670166015625, 0.9739303588867188, 3.5331954956054688, 0.7296104431152344, 1.6402015686035156, 0.23719406127929688, 0.60394287109375, -0.16251373291015625, 0.90869140625, 0.23670387268066406, 1.8275680541992188, -0.5729522705078125, 3.327239990234375, 2.683347702026367, 1.042694091796875, 2.2205352783203125, 2.0800094604492188, -0.884613037109375, 1.002593994140625, -2.3490829467773438, -1.36492919921875, 0.1466064453125, -0.705963134765625, 2.1594295501708984, -1.3950881958007812, 0.14934539794921875, -0.4937744140625, 2.2057113647460938, 0.12379837036132812, 0.5507087707519531, -0.5496978759765625, -0.515106201171875, -0.606903076171875, 3.219938278198242, 3.5203018188476562, -1.4344940185546875, -0.17738723754882812, 2.9115219116210938, 0.017822265625, 3.076526641845703, -1.1941776275634766, 0.7020263671875, 4.307231903076172, -0.038539886474609375, -0.2934989929199219, -3.6011810302734375, 1.1676025390625, -1.1801223754882812, 2.291168212890625, -2.2380733489990234, 0.962188720703125, 2.1628952026367188, 2.213043212890625, 1.3315677642822266, -1.0281600952148438, -1.0828628540039062, 1.9377574920654297], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000326.npy"} +{"epoch": 0.4928193499622071, "step": 327, "batch_size": 64, "mean": 1.1204906702041626, "std": 1.706774115562439, "min": -3.6262664794921875, "p10": -0.8721614837646483, "median": 1.0498943328857422, "p90": 3.2958049774169926, "max": 4.663787841796875, "pos_frac": 0.734375, "sample": [2.8557510375976562, 0.5751495361328125, 0.6325416564941406, 4.663787841796875, -0.092437744140625, 2.4345703125, -2.6419925689697266, 1.6876144409179688, -1.364389419555664, 2.6057510375976562, 3.356212615966797, -0.040584564208984375, -0.027187347412109375, 1.8990936279296875, 1.2925338745117188, 2.4835777282714844, 3.0114593505859375, -0.4191761016845703, -1.7376556396484375, -0.9468879699707031, 0.2101268768310547, 0.21812820434570312, 1.6354923248291016, 2.966156005859375, 3.78790283203125, -0.6977996826171875, 0.35504913330078125, 0.7919235229492188, 2.23272705078125, -0.618133544921875, 0.6736526489257812, 1.20660400390625, -0.28128814697265625, 3.491546630859375, 1.5327529907226562, 0.6652450561523438, 3.1548538208007812, 0.7583160400390625, -0.5849456787109375, 0.07924461364746094, 4.3382415771484375, 0.6396865844726562, -3.6262664794921875, 2.05340576171875, 2.3505630493164062, 4.5790863037109375, 0.5118331909179688, 2.3741989135742188, 4.042388916015625, 1.1269187927246094, 2.6401596069335938, -0.2417926788330078, 1.5694580078125, 1.4418563842773438, -0.9614944458007812, 0.23271560668945312, 2.5177383422851562, 1.5312423706054688, 1.2343063354492188, 0.3488426208496094, 0.972869873046875, -1.4010066986083984, 1.758636474609375, -0.127471923828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000327.npy"} +{"epoch": 0.4943310657596372, "step": 328, "batch_size": 64, "mean": 1.0702919960021973, "std": 1.5639634132385254, "min": -1.857269287109375, "p10": -0.8315227508544921, "median": 1.1274681091308594, "p90": 3.3015714645385743, "max": 4.5197601318359375, "pos_frac": 0.6875, "sample": [4.215858459472656, 0.6425304412841797, 0.8449916839599609, -0.0187225341796875, 2.6102066040039062, -0.030698776245117188, -0.48458290100097656, 2.5887908935546875, 0.814361572265625, 1.7183151245117188, -0.5559158325195312, 1.4047088623046875, 4.404388427734375, 0.7574501037597656, 2.171234130859375, -0.9911346435546875, 2.6415462493896484, 1.4846210479736328, 1.8902740478515625, -1.6432723999023438, 1.4152679443359375, -1.0340728759765625, -0.044422149658203125, 1.2890663146972656, -0.552001953125, -0.8571701049804688, 4.5197601318359375, 1.5793228149414062, -1.857269287109375, 1.4377365112304688, -0.5084152221679688, 1.3165149688720703, 2.6610069274902344, 1.5857086181640625, 2.183269500732422, 0.7899551391601562, 0.07753753662109375, -0.7275810241699219, 4.04766845703125, 0.235076904296875, 1.182037353515625, 2.77459716796875, -0.38728904724121094, -0.7716789245605469, 3.8612518310546875, 1.6598434448242188, 0.5480499267578125, -0.423431396484375, 1.4286346435546875, 1.3420791625976562, 0.5999469757080078, -1.0688133239746094, 1.907358169555664, 3.2592620849609375, -0.9934539794921875, -0.122283935546875, 1.22772216796875, 0.8107662200927734, 3.319704055786133, 1.0728988647460938, -0.7140083312988281, 0.11863327026367188, 3.7832908630371094, 2.0616588592529297], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000328.npy"} +{"epoch": 0.4958427815570673, "step": 329, "batch_size": 64, "mean": 1.0447362661361694, "std": 2.015392780303955, "min": -2.604766845703125, "p10": -1.2036186218261717, "median": 0.824432373046875, "p90": 3.245797729492188, "max": 7.592914581298828, "pos_frac": 0.671875, "sample": [1.2375831604003906, 0.17291641235351562, 1.7326126098632812, 2.313201904296875, -1.0718994140625, 0.8044967651367188, 3.32061767578125, 0.7130126953125, 2.067596435546875, 0.15340423583984375, 2.2562789916992188, 1.3679580688476562, -1.5661258697509766, 1.7692489624023438, -0.46271514892578125, -1.2178115844726562, 0.7348709106445312, 1.263519287109375, -1.6353912353515625, 2.1509628295898438, -0.0623626708984375, -0.13874053955078125, 4.915887832641602, 2.15826416015625, -2.604766845703125, 3.310699462890625, 6.91357421875, 0.972412109375, -1.85174560546875, -0.4303741455078125, 2.1051559448242188, 0.47925567626953125, -2.150867462158203, 1.3126220703125, 1.9910087585449219, -1.060546875, 5.702491760253906, -0.47215843200683594, 2.5519886016845703, 2.490234375, 3.0943603515625, 2.2224159240722656, -0.32024574279785156, -1.170501708984375, 0.9323348999023438, 0.24355316162109375, 0.09589195251464844, 0.7389183044433594, 0.78997802734375, 1.9943428039550781, 0.284698486328125, 1.535736083984375, 1.3420257568359375, -0.32398223876953125, 7.592914581298828, 2.24560546875, -0.7356586456298828, 4.407997131347656, 0.8443679809570312, -2.083953857421875, 1.8311004638671875, -0.15459442138671875, -0.029821395874023438, -0.750732421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000329.npy"} +{"epoch": 0.4973544973544973, "step": 330, "batch_size": 64, "mean": 1.1226279735565186, "std": 1.7190712690353394, "min": -3.3219528198242188, "p10": -0.9439685821533201, "median": 1.1818046569824219, "p90": 3.420656967163086, "max": 5.288421630859375, "pos_frac": 0.75, "sample": [-0.375, -0.03692626953125, -0.7118110656738281, -1.9074535369873047, -0.3020172119140625, -3.3219528198242188, 3.8877487182617188, 1.223785400390625, -1.0434646606445312, 3.8991241455078125, 3.61822509765625, 0.8371944427490234, -2.170644760131836, 1.0872268676757812, 2.2388954162597656, 3.3448944091796875, 3.2902069091796875, -0.6668701171875, 5.288421630859375, 2.9541587829589844, 2.2873687744140625, -0.6724414825439453, 1.16827392578125, 0.18349456787109375, 2.5388565063476562, 1.866943359375, 1.1953353881835938, 0.1226806640625, 0.18480682373046875, 0.49491119384765625, 0.7389812469482422, 2.221099853515625, 1.2948646545410156, 1.5867385864257812, -2.237030029296875, 1.7877883911132812, 2.0914535522460938, 1.725830078125, 3.1193008422851562, 1.2599983215332031, -0.17151641845703125, 1.4364166259765625, 1.0398483276367188, 1.6264266967773438, 3.4393348693847656, 2.9842700958251953, 0.8689250946044922, 1.1389427185058594, 1.310791015625, 0.8149871826171875, 0.13998031616210938, 0.44732666015625, 3.5173301696777344, 1.4101791381835938, 1.2225341796875, 0.7440643310546875, -0.47046661376953125, -1.2859764099121094, 3.3770751953125, -1.7902984619140625, 1.6471443176269531, -0.0219573974609375, 0.41110992431640625, 3.918720245361328], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000330.npy"} +{"epoch": 0.4988662131519274, "step": 331, "batch_size": 64, "mean": 1.420309066772461, "std": 1.865747094154358, "min": -2.2935256958007812, "p10": -0.6848209381103515, "median": 1.4092540740966797, "p90": 3.397577095031739, "max": 8.453773498535156, "pos_frac": 0.78125, "sample": [2.5655555725097656, 1.2860260009765625, 0.7201156616210938, 1.2685623168945312, 1.4706459045410156, 2.1974411010742188, 1.8698577880859375, 3.2279510498046875, 2.0989761352539062, 0.45998382568359375, -0.7297172546386719, 3.7265777587890625, 2.9990577697753906, 3.676239013671875, 0.2324981689453125, 0.09747314453125, 8.453773498535156, 1.52667236328125, 1.5448074340820312, 6.029462814331055, 1.9984321594238281, 0.6717739105224609, 0.466064453125, 0.7318916320800781, 0.13439559936523438, 2.7529220581054688, 2.562124252319336, 4.7623748779296875, -0.5800628662109375, 2.393829345703125, 2.5238494873046875, 1.4009284973144531, 2.325939178466797, 2.9152908325195312, 1.6316680908203125, -0.03688812255859375, 3.1369400024414062, -0.2661552429199219, -0.3356971740722656, 1.6732330322265625, -0.0713653564453125, 3.11578369140625, 0.9129199981689453, -1.213226318359375, 0.4942455291748047, -2.2935256958007812, 0.3641357421875, -1.2516555786132812, -1.742095947265625, 0.10379981994628906, -0.8226852416992188, -0.2718353271484375, 3.0836944580078125, 0.3644142150878906, 1.4175796508789062, 0.43536376953125, -0.3041248321533203, 2.5318527221679688, 1.7184906005859375, -1.4398612976074219, 3.470273971557617, 1.7648372650146484, 0.6233978271484375, 4.324556350708008], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000331.npy"} +{"epoch": 0.5003779289493575, "step": 332, "batch_size": 64, "mean": 1.2151854038238525, "std": 1.665781021118164, "min": -2.2678871154785156, "p10": -0.5359825134277343, "median": 0.9639816284179688, "p90": 3.2959560394287117, "max": 7.403232574462891, "pos_frac": 0.8125, "sample": [0.19736099243164062, 1.4211502075195312, -0.0145111083984375, 0.5390434265136719, 1.3845539093017578, 0.684906005859375, 1.32147216796875, 1.1366405487060547, 1.0220489501953125, 0.6351356506347656, 0.694610595703125, 4.9775390625, 2.7441558837890625, 3.1056289672851562, 3.1332473754882812, 0.9197311401367188, 0.35129356384277344, 0.10123443603515625, 1.8361930847167969, -0.096710205078125, 3.9987220764160156, 1.37689208984375, -1.8946571350097656, 3.3656883239746094, 0.28145599365234375, 1.6224212646484375, -0.9352054595947266, -0.0472259521484375, 1.1455535888671875, 1.6844673156738281, -0.555755615234375, 2.5540409088134766, -0.8496170043945312, -0.48984527587890625, 2.961273193359375, 1.568979263305664, 0.4971046447753906, 3.8758316040039062, -1.3967971801757812, 0.20299339294433594, 0.3532257080078125, 2.962982177734375, 1.7603778839111328, 1.726766586303711, 0.745697021484375, 0.375030517578125, 0.85784912109375, 2.360546112060547, 1.078369140625, 1.4761581420898438, 3.7171478271484375, 0.13240814208984375, 0.4165153503417969, 0.3655853271484375, 1.0082321166992188, 1.04656982421875, 7.403232574462891, -0.0110321044921875, 0.5588226318359375, 2.852731704711914, 4.3628692626953125, 0.068603515625, -2.2678871154785156, -0.639984130859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000332.npy"} +{"epoch": 0.5018896447467877, "step": 333, "batch_size": 64, "mean": 1.5352662801742554, "std": 1.7865583896636963, "min": -1.2359237670898438, "p10": -0.5515525817871093, "median": 1.2819671630859375, "p90": 3.8409572601318365, "max": 6.662635803222656, "pos_frac": 0.78125, "sample": [0.33098602294921875, -0.25187110900878906, 5.929679870605469, 2.6479721069335938, 1.3243865966796875, -0.26245689392089844, 2.2065696716308594, 0.857391357421875, 1.0075035095214844, -0.56378173828125, 3.6999053955078125, 2.6027774810791016, 1.5387496948242188, 1.3423728942871094, 3.096834182739258, 2.7036590576171875, 1.2395477294921875, 5.3313751220703125, 3.610137939453125, 3.742084503173828, 1.062774658203125, 1.6349105834960938, 0.5286636352539062, -0.906829833984375, 1.0331859588623047, -0.5230178833007812, -0.5761966705322266, -0.019916534423828125, 0.7435226440429688, 0.291748046875, 1.7698020935058594, 4.0682830810546875, -0.24341964721679688, 0.5148391723632812, 2.1683921813964844, 3.0404624938964844, 6.662635803222656, -0.8738250732421875, 0.47440338134765625, 0.721435546875, 0.3966217041015625, 2.6738433837890625, -0.8595199584960938, 0.6847801208496094, 0.1727447509765625, -0.4202919006347656, 3.69677734375, 0.625030517578125, -1.1561508178710938, 1.3540153503417969, 1.4403762817382812, -0.21398162841796875, 1.4724617004394531, 3.6566848754882812, 0.09443473815917969, -1.2359237670898438, 0.5623550415039062, 1.826324462890625, 4.052055358886719, 4.428457260131836, 3.883331298828125, 2.6379241943359375, 2.65008544921875, 2.1289291381835938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000333.npy"} +{"epoch": 0.5034013605442177, "step": 334, "batch_size": 64, "mean": 1.118227481842041, "std": 1.7681703567504883, "min": -2.8492813110351562, "p10": -1.234210205078125, "median": 0.9080848693847656, "p90": 3.3269241333007815, "max": 4.571796417236328, "pos_frac": 0.734375, "sample": [-0.168731689453125, -2.2566757202148438, 1.812164306640625, 0.5857887268066406, 1.2874908447265625, 1.1948089599609375, -1.1368408203125, 4.325920104980469, 0.2351531982421875, -2.8492813110351562, -0.23073577880859375, 4.2780914306640625, -1.4015941619873047, 0.6429061889648438, 0.5771884918212891, 1.2529029846191406, -0.281494140625, -0.4865531921386719, 1.26373291015625, 4.571796417236328, 3.2962265014648438, 3.0529327392578125, 0.6377410888671875, 0.7139015197753906, 3.072193145751953, -1.9612541198730469, 0.44190216064453125, 0.07666587829589844, 3.3659019470214844, 1.029134750366211, 0.3891258239746094, -1.0720901489257812, 3.0739212036132812, 0.129913330078125, 2.708414077758789, 4.283542633056641, 2.216522216796875, 1.844757080078125, 1.8604812622070312, 0.18072509765625, 2.053997039794922, -1.27593994140625, -0.39276123046875, 2.14935302734375, 3.9325523376464844, 0.48873138427734375, -0.5892715454101562, 0.6474456787109375, -1.67425537109375, 3.2054519653320312, 1.7469902038574219, -0.6158676147460938, 2.382598876953125, 0.7870349884033203, -1.3249053955078125, 2.7727203369140625, 3.1278305053710938, 3.3400802612304688, -0.5327835083007812, 0.48674964904785156, 2.1221466064453125, 1.1688995361328125, 2.1603851318359375, 2.842681884765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000334.npy"} +{"epoch": 0.5049130763416477, "step": 335, "batch_size": 64, "mean": 1.2369496822357178, "std": 1.7391375303268433, "min": -2.58282470703125, "p10": -1.157388877868652, "median": 1.139181137084961, "p90": 3.50749740600586, "max": 5.632354736328125, "pos_frac": 0.75, "sample": [1.2682037353515625, 2.5264511108398438, -1.4812641143798828, 3.618988037109375, 1.112548828125, 5.632354736328125, 2.6672515869140625, 0.05841636657714844, 2.149749755859375, 1.1518707275390625, 3.5758895874023438, 2.0840835571289062, 1.07806396484375, 2.4422454833984375, 4.736713409423828, 2.8601417541503906, 1.1270942687988281, -0.05908012390136719, 0.23821640014648438, 4.381336212158203, -1.2929096221923828, 1.2030563354492188, 1.2212944030761719, 2.6397972106933594, -0.971893310546875, 3.3479156494140625, 2.7694778442382812, -1.8703994750976562, -0.4676513671875, 1.9483413696289062, 2.0361175537109375, -0.8574371337890625, 0.6539421081542969, 1.0608253479003906, 1.483795166015625, -0.5937061309814453, 2.8153076171875, -0.06850814819335938, 0.42974853515625, 3.6169204711914062, 2.3152084350585938, 1.5378341674804688, 0.3684539794921875, 1.1237335205078125, 1.0747337341308594, 4.091978073120117, -0.08524322509765625, 1.1512680053710938, -2.58282470703125, -0.026622772216796875, 0.44994354248046875, -0.6081123352050781, 0.8991432189941406, -1.236886978149414, -2.14727783203125, -1.2863082885742188, 2.4588546752929688, 2.885467529296875, 0.9042491912841797, 2.5456199645996094, 0.27112579345703125, 1.2590103149414062, 0.3621788024902344, 3.1659393310546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000335.npy"} +{"epoch": 0.5064247921390779, "step": 336, "batch_size": 64, "mean": 1.151987075805664, "std": 1.987610101699829, "min": -2.4831314086914062, "p10": -0.9592994689941406, "median": 0.8680639266967773, "p90": 4.119616699218751, "max": 7.1369781494140625, "pos_frac": 0.734375, "sample": [0.125732421875, 3.813751220703125, 2.04034423828125, -1.02410888671875, 6.4548187255859375, 1.947540283203125, 1.3046302795410156, -0.9422683715820312, 6.045249938964844, 0.21801185607910156, -0.045246124267578125, 0.02442169189453125, 4.337352752685547, -1.0935287475585938, 2.357757568359375, 1.2221946716308594, 0.1339111328125, 0.5174636840820312, 4.250701904296875, -0.6743450164794922, 0.21074676513671875, 0.9675216674804688, 3.0924072265625, 1.5330123901367188, -0.7661895751953125, 2.9233474731445312, 0.19353103637695312, -1.8869781494140625, -0.9665985107421875, -2.4831314086914062, 4.628265380859375, -1.4428062438964844, 0.45771217346191406, 1.5457687377929688, 0.23939895629882812, -0.28171348571777344, 1.3010101318359375, -0.8819351196289062, 2.8777408599853516, 1.1655998229980469, 0.158966064453125, -0.41650390625, 0.983856201171875, 0.328460693359375, 1.8433952331542969, 0.9472274780273438, 0.1012115478515625, -1.8040618896484375, 0.8500804901123047, 3.8040771484375, 0.2481708526611328, 1.7211761474609375, 2.1492385864257812, 4.29632568359375, 7.1369781494140625, 1.8433074951171875, 0.163177490234375, 0.88604736328125, 2.6723556518554688, -0.0290679931640625, 0.971343994140625, 1.7331867218017578, -0.1400909423828125, -0.16278076171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000336.npy"} +{"epoch": 0.5079365079365079, "step": 337, "batch_size": 64, "mean": 1.0915067195892334, "std": 1.726086139678955, "min": -2.068023681640625, "p10": -1.3691096305847168, "median": 1.073246955871582, "p90": 3.1620231628417974, "max": 5.646453857421875, "pos_frac": 0.71875, "sample": [0.8149986267089844, 0.03814697265625, -1.4469013214111328, -0.9106369018554688, 1.1829986572265625, 1.2878494262695312, -0.014635086059570312, 3.2938003540039062, -2.068023681640625, -0.32323455810546875, 4.150665283203125, 2.0917739868164062, 1.4559707641601562, -1.9912300109863281, 1.29034423828125, 1.2813491821289062, 1.9225292205810547, 2.42852783203125, 0.5508308410644531, -0.43750762939453125, 1.3261795043945312, 0.0028285980224609375, -1.3762636184692383, -2.0599708557128906, 2.637920379638672, 2.0357227325439453, -1.4457855224609375, 1.7001533508300781, 0.8792190551757812, 5.281524658203125, 0.7672958374023438, -0.8008880615234375, 3.0424633026123047, 0.15864181518554688, 2.596923828125, 2.278066635131836, 2.6231155395507812, 0.49747467041015625, 1.0425567626953125, 2.4814414978027344, 3.2804946899414062, 1.3986434936523438, 1.9034881591796875, 0.9198226928710938, -0.095245361328125, -1.3524169921875, 1.1039371490478516, -0.12957763671875, 2.960723876953125, 0.4400215148925781, 3.069091796875, 5.646453857421875, -0.019989013671875, 0.994903564453125, 0.7269287109375, 3.925811767578125, -0.3680992126464844, -1.0234603881835938, 1.9301719665527344, 1.6725997924804688, -1.725799560546875, 2.3443450927734375, 3.2018508911132812, 0.785491943359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000337.npy"} +{"epoch": 0.509448223733938, "step": 338, "batch_size": 64, "mean": 1.002780795097351, "std": 1.498197078704834, "min": -1.849945068359375, "p10": -0.7155931472778321, "median": 1.020578384399414, "p90": 3.3061885833740234, "max": 4.166530609130859, "pos_frac": 0.75, "sample": [-1.849945068359375, -0.2523002624511719, 1.977386474609375, 1.8745098114013672, 1.5125465393066406, 1.0849761962890625, -1.703115463256836, 1.8870620727539062, 0.025299072265625, 3.9897899627685547, 3.865093231201172, 0.00510406494140625, -0.5955238342285156, 1.03704833984375, 0.8218498229980469, -1.0316848754882812, 0.6008720397949219, -1.548868179321289, -0.241912841796875, 0.44054412841796875, 1.5340652465820312, 1.5100250244140625, 0.9357757568359375, -0.41437721252441406, 2.4199142456054688, 1.7356796264648438, -1.78118896484375, 3.2879638671875, 1.781158447265625, 2.4352588653564453, 1.2368240356445312, 3.427459716796875, 0.22675514221191406, 1.6196098327636719, 0.24872779846191406, 1.1414718627929688, 2.083658218383789, 0.8874359130859375, 1.6183929443359375, -0.57928466796875, -0.21540069580078125, 1.7816295623779297, -0.49353790283203125, 1.3978462219238281, 1.0041084289550781, 1.5884380340576172, -1.0593185424804688, -0.7125263214111328, 0.854644775390625, 0.4873924255371094, 0.15251922607421875, 4.146413803100586, 4.008190155029297, 0.3138465881347656, 0.24268722534179688, 2.8260231018066406, 1.1737518310546875, 4.166530609130859, 1.5976028442382812, 0.2950782775878906, -0.7169075012207031, -0.3050689697265625, 3.3139991760253906, 1.0759735107421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000338.npy"} +{"epoch": 0.5109599395313681, "step": 339, "batch_size": 64, "mean": 1.137109398841858, "std": 1.60611891746521, "min": -1.6900634765625, "p10": -0.9980974197387694, "median": 1.07421875, "p90": 3.2770980834960937, "max": 5.8139801025390625, "pos_frac": 0.765625, "sample": [1.4043121337890625, 1.2903976440429688, -0.14832687377929688, 0.3764152526855469, -0.78546142578125, 2.364776611328125, 1.8509063720703125, 1.0624771118164062, 0.7073802947998047, -0.8941097259521484, 3.5107803344726562, -0.4233551025390625, 0.6260623931884766, 0.31380653381347656, 0.5740776062011719, -0.5456409454345703, -1.1786823272705078, 0.9255218505859375, 3.2814788818359375, 0.09180831909179688, 2.8187904357910156, 1.0340652465820312, 1.42645263671875, 1.6082611083984375, -0.10203742980957031, 2.629718780517578, 0.2840595245361328, 0.7567996978759766, 1.0859603881835938, 1.4825763702392578, 3.9102096557617188, 2.740753173828125, 5.1905364990234375, 4.5132904052734375, 1.7421646118164062, 1.2150306701660156, 1.8475723266601562, -1.1902313232421875, 3.2586669921875, 1.70166015625, 3.8138046264648438, 0.772308349609375, -1.4003353118896484, -1.1317462921142578, 1.17205810546875, -1.0440826416015625, -1.04266357421875, 1.4881134033203125, -0.8638916015625, -1.6900634765625, 0.31553077697753906, 0.30127906799316406, 1.2068424224853516, 1.821359634399414, -0.3503875732421875, 5.8139801025390625, 0.520599365234375, 1.3699779510498047, 0.7751922607421875, 1.8728485107421875, 1.4027938842773438, 1.3196601867675781, 0.706024169921875, 3.266876220703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000339.npy"} +{"epoch": 0.5124716553287982, "step": 340, "batch_size": 64, "mean": 1.3231074810028076, "std": 1.9610782861709595, "min": -3.453580856323242, "p10": -0.8867776870727538, "median": 1.1917181015014648, "p90": 3.9491233825683594, "max": 6.919748306274414, "pos_frac": 0.796875, "sample": [1.15374755859375, 3.1050758361816406, 2.039173126220703, 0.9764919281005859, 0.05785369873046875, 4.951967239379883, 0.26250267028808594, -0.502899169921875, -0.8507061004638672, 0.023529052734375, 1.4828109741210938, 2.354778289794922, 1.6863861083984375, 2.202543258666992, 5.2211151123046875, 2.155719757080078, -0.44724082946777344, -2.686656951904297, 3.96875, 3.3147735595703125, -0.5670089721679688, 0.15923118591308594, -1.455841064453125, 1.7464332580566406, 2.17669677734375, 3.7012405395507812, 0.678436279296875, 1.7369537353515625, 3.2023086547851562, 3.6576881408691406, -1.1286697387695312, 1.0534744262695312, 1.21484375, 1.43035888671875, 1.6873321533203125, 0.8213768005371094, 3.9220733642578125, -0.2556743621826172, 0.040313720703125, 3.4733734130859375, 6.919748306274414, -0.6276760101318359, 0.08009719848632812, 4.120874404907227, 1.9954452514648438, -1.4478015899658203, -0.9022369384765625, 1.3191871643066406, 0.9410572052001953, 2.8162384033203125, 1.540863037109375, 1.1685924530029297, 0.6730422973632812, 0.18382644653320312, -3.453580856323242, 0.1303253173828125, 3.9607162475585938, 0.49509429931640625, 0.8293399810791016, 4.7400360107421875, 1.5386276245117188, 0.2976799011230469, -2.0050125122070312, 1.5997390747070312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000340.npy"} +{"epoch": 0.5139833711262283, "step": 341, "batch_size": 64, "mean": 1.323413372039795, "std": 1.7776663303375244, "min": -2.63226318359375, "p10": -0.9483024597167967, "median": 0.9735136032104492, "p90": 3.9428602218627935, "max": 5.64825439453125, "pos_frac": 0.734375, "sample": [-1.0707206726074219, 2.9076499938964844, 1.7881240844726562, 2.4992542266845703, 3.783151626586914, 2.6770668029785156, 2.956512451171875, 5.64825439453125, 1.7833824157714844, -0.996246337890625, 4.0113067626953125, 0.6204357147216797, -0.1405181884765625, -0.4751167297363281, -0.07490158081054688, 0.767425537109375, 1.9591598510742188, 0.9365005493164062, 4.9760894775390625, 4.3167572021484375, 2.7044219970703125, 3.0748367309570312, 2.2446670532226562, 0.4688434600830078, -2.63226318359375, 4.275630950927734, 1.9694976806640625, -0.6270656585693359, -0.16059112548828125, -1.24053955078125, -0.09540176391601562, 2.1970596313476562, -0.48847198486328125, -0.05881500244140625, 2.8245468139648438, 3.346771240234375, 0.9805755615234375, 0.9664516448974609, 0.215606689453125, -1.6714019775390625, -1.3803939819335938, 0.2780609130859375, 0.8466339111328125, 2.506988525390625, -0.003448486328125, 4.146568298339844, 1.6251678466796875, 2.5203857421875, 1.50445556640625, 4.0221099853515625, 1.504364013671875, 1.6149063110351562, 2.5003719329833984, -1.5147247314453125, 0.433685302734375, 3.1552963256835938, 1.1141281127929688, 0.6453323364257812, 0.8730030059814453, 0.6613311767578125, 0.7877655029296875, 0.1768341064453125, 0.34814453125, -0.8364334106445312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000341.npy"} +{"epoch": 0.5154950869236583, "step": 342, "batch_size": 64, "mean": 1.4784512519836426, "std": 1.8620493412017822, "min": -2.439373016357422, "p10": -0.3912935256958008, "median": 0.9825363159179688, "p90": 4.389190673828126, "max": 5.289710998535156, "pos_frac": 0.734375, "sample": [2.119720458984375, -0.1066741943359375, -1.0165443420410156, -0.348724365234375, 0.036220550537109375, 4.077304840087891, 0.6240921020507812, 3.039764404296875, -0.24674224853515625, 4.49932861328125, 0.1890392303466797, 0.9030914306640625, 0.6396217346191406, 4.47772216796875, 0.646453857421875, -1.36279296875, -0.10482406616210938, -2.439373016357422, -0.7365474700927734, -0.3868541717529297, 0.434906005859375, 2.434459686279297, 2.0215911865234375, 2.8118724822998047, -0.01868438720703125, 2.2289199829101562, 1.8703155517578125, 1.8697319030761719, 1.8529739379882812, 3.1190948486328125, 1.1787891387939453, 0.8093242645263672, 1.0178604125976562, 0.2108001708984375, 2.2260971069335938, -0.8244152069091797, 3.668354034423828, -0.0248565673828125, 4.129913330078125, 3.959188461303711, -0.083892822265625, 4.599920272827148, 4.56781005859375, 1.4617424011230469, -0.39319610595703125, 5.1728668212890625, 0.29487037658691406, -0.2651386260986328, 3.9332122802734375, 4.1826171875, 0.9472122192382812, 1.766378402709961, -0.10291290283203125, 5.289710998535156, 0.7068576812744141, 4.836315155029297, 1.7979011535644531, 0.016307830810546875, 0.6767654418945312, 1.1090660095214844, 2.203092575073242, 0.3518829345703125, -1.0861663818359375, 3.15814208984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000342.npy"} +{"epoch": 0.5170068027210885, "step": 343, "batch_size": 64, "mean": 0.7660267353057861, "std": 1.6629047393798828, "min": -3.7351760864257812, "p10": -1.2355705261230467, "median": 0.7732353210449219, "p90": 2.7467094421386724, "max": 4.982330322265625, "pos_frac": 0.734375, "sample": [1.4413375854492188, -0.23929977416992188, 0.7826309204101562, 0.5395050048828125, -1.3344345092773438, 3.19268798828125, 2.371002197265625, -0.8887462615966797, 0.3908233642578125, -0.01274871826171875, -2.0945777893066406, 0.337677001953125, 0.8099822998046875, 2.4650192260742188, 3.18243408203125, -0.39495849609375, 0.8067626953125, 4.012359619140625, 3.8845272064208984, 1.9320564270019531, 0.6404876708984375, 2.067655563354492, 1.136962890625, 0.64019775390625, 0.46120452880859375, 0.5744228363037109, 0.60894775390625, -1.8623428344726562, 1.9337577819824219, 0.5310764312744141, -3.0153884887695312, -0.27875518798828125, 0.7638397216796875, 0.04141044616699219, 0.90289306640625, 0.07549285888671875, 2.1407470703125, 1.4726791381835938, -3.7351760864257812, 3.9919891357421875, -0.67535400390625, 0.8845062255859375, 2.5740432739257812, 2.204437255859375, 1.819183349609375, 0.9712600708007812, 4.982330322265625, 0.9772701263427734, -0.054798126220703125, 1.4400787353515625, 2.0691070556640625, -0.574920654296875, 1.2732467651367188, 0.5052566528320312, 0.8706588745117188, 2.820709228515625, -1.3149490356445312, 0.1046142578125, 1.3956680297851562, 1.0642776489257812, -2.643646240234375, -1.05035400390625, -0.9524154663085938, 0.059356689453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000343.npy"} +{"epoch": 0.5185185185185185, "step": 344, "batch_size": 64, "mean": 1.138460636138916, "std": 1.625303030014038, "min": -2.284412384033203, "p10": -1.0130950927734372, "median": 1.1412296295166016, "p90": 3.101809310913086, "max": 5.981170654296875, "pos_frac": 0.765625, "sample": [-1.0876007080078125, 1.9147682189941406, 1.7910079956054688, 0.5579719543457031, 2.0939064025878906, 0.16986465454101562, 0.061000823974609375, 1.8526496887207031, 1.47930908203125, 3.54998779296875, 0.772216796875, -0.8392486572265625, 1.1623497009277344, 2.1954727172851562, -0.12911224365234375, 1.7143058776855469, 0.2832794189453125, 5.981170654296875, 1.6876068115234375, 3.8321533203125, -1.1056251525878906, 1.3755645751953125, -2.0583114624023438, 2.35662841796875, 0.1423320770263672, -1.240081787109375, -0.1599884033203125, 0.2709178924560547, 2.6263885498046875, 3.0573463439941406, -0.07007598876953125, 3.7552719116210938, 1.2195205688476562, 0.37003326416015625, 0.5153579711914062, 3.1208648681640625, 1.96441650390625, 0.56158447265625, 0.07152748107910156, -1.7953414916992188, 0.311676025390625, 0.2714729309082031, -0.6140289306640625, -0.47707557678222656, 1.1201095581054688, -1.3762454986572266, 4.449985504150391, 2.3646926879882812, 1.257354736328125, 2.6714134216308594, 2.242156982421875, 1.3155441284179688, 3.0056800842285156, 0.8394985198974609, 1.87615966796875, -0.2172527313232422, 0.6749725341796875, 3.0557518005371094, 0.96337890625, -0.04328155517578125, -2.284412384033203, 3.1596298217773438, 2.3476734161376953, 1.92523193359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000344.npy"} +{"epoch": 0.5200302343159486, "step": 345, "batch_size": 64, "mean": 1.1047430038452148, "std": 1.6992075443267822, "min": -2.1644363403320312, "p10": -1.0391784667968749, "median": 0.8753395080566406, "p90": 3.3624275207519534, "max": 5.7102813720703125, "pos_frac": 0.734375, "sample": [0.3963775634765625, -0.16294097900390625, 5.116546630859375, -1.7799091339111328, 0.8407402038574219, -0.889495849609375, 0.5894508361816406, -1.16558837890625, 0.3714942932128906, 0.2365093231201172, 1.1581611633300781, 1.5499916076660156, 2.742929458618164, 1.1727104187011719, -1.2510147094726562, 0.6142425537109375, -0.9524917602539062, 0.7708988189697266, 0.3531227111816406, 2.6075477600097656, 5.7102813720703125, 0.24520301818847656, 1.9250946044921875, 1.8015251159667969, 1.3262004852294922, 0.414215087890625, -0.7434158325195312, 0.8105411529541016, 0.8817596435546875, 1.9732284545898438, 1.8322830200195312, 3.192169189453125, 1.7640380859375, 1.3884143829345703, 2.9014129638671875, -1.3336143493652344, 3.755502700805664, 3.621063232421875, 3.4131317138671875, 0.6720695495605469, 3.2781906127929688, -0.33791351318359375, -1.2706527709960938, 4.854827880859375, 2.069915771484375, 0.18849563598632812, -1.0667495727539062, -0.690338134765625, 1.518890380859375, 1.0099563598632812, 1.6522598266601562, 0.6230545043945312, -0.41829681396484375, 3.398529052734375, -0.3183135986328125, 2.2507781982421875, -0.2301788330078125, -2.1644363403320312, 0.8689193725585938, -0.9748458862304688, 2.119253158569336, 2.3456268310546875, 1.3011627197265625, 2.825031280517578], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000345.npy"} +{"epoch": 0.5215419501133787, "step": 346, "batch_size": 64, "mean": 1.334245204925537, "std": 1.7139487266540527, "min": -2.2300872802734375, "p10": -0.5222904205322265, "median": 1.0176773071289062, "p90": 3.7181978225708012, "max": 5.5369873046875, "pos_frac": 0.8125, "sample": [3.4695262908935547, 4.367208480834961, 3.4842987060546875, 3.244293212890625, 5.5369873046875, 1.6531829833984375, -1.184326171875, 2.133970260620117, 3.9546775817871094, 2.9965133666992188, 3.5445938110351562, 1.4283523559570312, 1.64947509765625, 3.203317642211914, 0.5766258239746094, 4.012859344482422, 0.1440582275390625, 0.20111465454101562, -0.8790359497070312, 0.20438385009765625, 2.0536460876464844, 0.42717742919921875, 0.3509979248046875, 0.992523193359375, 1.0428314208984375, 3.8105926513671875, -2.2300872802734375, 1.3235054016113281, 0.6493186950683594, -2.1707916259765625, 0.343170166015625, -1.3192367553710938, 2.365802764892578, -0.4800758361816406, 0.5610847473144531, 0.7542953491210938, 0.49544525146484375, -1.0341720581054688, 3.786510467529297, 2.3174896240234375, 2.69439697265625, 3.5588016510009766, 1.054534912109375, 1.6642608642578125, 0.7538318634033203, 0.05213165283203125, 5.063892364501953, 2.0730361938476562, -0.01619720458984375, 2.766387939453125, 1.6923980712890625, -0.1849212646484375, 0.44099998474121094, 0.36175537109375, -0.10460281372070312, -0.5403823852539062, 1.0677719116210938, 0.6218948364257812, -0.3861083984375, 2.5065689086914062, 0.06020355224609375, 0.05016899108886719, 0.6851806640625, 1.673583984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000346.npy"} +{"epoch": 0.5230536659108088, "step": 347, "batch_size": 64, "mean": 1.1055727005004883, "std": 1.7862834930419922, "min": -2.540374755859375, "p10": -1.2989887237548825, "median": 0.9378528594970703, "p90": 3.16417236328125, "max": 6.868133544921875, "pos_frac": 0.765625, "sample": [0.5135383605957031, 1.5615158081054688, 1.7063064575195312, 2.065673828125, -2.540374755859375, 0.5835685729980469, 1.5521697998046875, 2.2412471771240234, 0.3023262023925781, 2.5972023010253906, 0.2570343017578125, 3.848377227783203, 0.26988983154296875, -1.9711532592773438, -1.7917404174804688, 5.000495910644531, 0.7685623168945312, 1.5964279174804688, -1.876800537109375, 1.4066352844238281, 0.6529464721679688, 0.5280609130859375, 2.4804763793945312, 0.5637130737304688, -1.475189208984375, -0.9223060607910156, 1.4395980834960938, 3.16796875, 3.1652984619140625, -1.4004096984863281, -0.6119918823242188, -0.3232917785644531, 0.6746597290039062, 1.1071434020996094, -0.88763427734375, 0.3561897277832031, -1.5424613952636719, 1.6640777587890625, 1.662994384765625, 1.209686279296875, 1.8151016235351562, 0.24076080322265625, 0.36235809326171875, 1.9759445190429688, 1.2005043029785156, 4.605949401855469, -0.11849403381347656, -1.0623397827148438, 6.868133544921875, 4.577068328857422, 2.0069580078125, 2.040332794189453, 3.023895263671875, 3.1615447998046875, -0.015689849853515625, 2.0241966247558594, 1.3550605773925781, 0.6062393188476562, 0.7435417175292969, 0.1741962432861328, -0.4492683410644531, 0.5777206420898438, 2.41064453125, 3.0318641662597656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000347.npy"} +{"epoch": 0.5245653817082389, "step": 348, "batch_size": 64, "mean": 1.6269643306732178, "std": 2.5520389080047607, "min": -1.474294662475586, "p10": -0.8817497253417969, "median": 0.9572315216064453, "p90": 5.684617614746096, "max": 11.331621170043945, "pos_frac": 0.6875, "sample": [5.0165863037109375, 3.5954017639160156, -0.4322052001953125, -0.37497711181640625, 2.8507766723632812, 0.31027984619140625, 1.719879150390625, -0.1867523193359375, -0.9742431640625, -0.216461181640625, 6.147865295410156, 0.7734222412109375, 1.3012542724609375, 2.2612380981445312, 5.970916748046875, 3.2239837646484375, 0.6339569091796875, 6.496978759765625, 1.0510215759277344, 4.312778472900391, 0.2919425964355469, 2.5089797973632812, 3.321491241455078, 1.9136199951171875, 1.3832130432128906, 2.392192840576172, 0.610198974609375, 3.232065200805664, -0.3165302276611328, -0.8763236999511719, 2.1161956787109375, -0.4051666259765625, 1.0467491149902344, 6.35223388671875, -0.263092041015625, -0.3540477752685547, 1.5487556457519531, -1.0545425415039062, 0.394195556640625, 0.6666126251220703, 6.705230712890625, 11.331621170043945, 0.826629638671875, -0.3693103790283203, 4.102405548095703, 0.006465911865234375, -1.275979995727539, -1.040700912475586, -1.12774658203125, 1.1882247924804688, 0.2276782989501953, 8.416379928588867, -1.474294662475586, 1.6483287811279297, 2.5474605560302734, 0.8677139282226562, 1.8493633270263672, 0.34590911865234375, -0.6746253967285156, -0.3385047912597656, -0.40515899658203125, 1.6650848388671875, 1.9971771240234375, -0.8840751647949219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000348.npy"} +{"epoch": 0.5260770975056689, "step": 349, "batch_size": 64, "mean": 1.4119596481323242, "std": 1.8669369220733643, "min": -3.3837432861328125, "p10": -0.7276863098144531, "median": 1.4218339920043945, "p90": 3.223366928100587, "max": 6.933786392211914, "pos_frac": 0.765625, "sample": [1.5825767517089844, 2.998504638671875, 2.3506698608398438, 1.3737335205078125, 1.4699344635009766, 1.2371902465820312, 2.273662567138672, 1.31756591796875, -0.008714675903320312, 1.9076957702636719, -0.6309528350830078, 0.08646392822265625, 2.064208984375, 0.7690563201904297, 2.5742645263671875, 1.6262855529785156, 2.2974815368652344, -1.2889022827148438, -3.3837432861328125, 2.728727340698242, 1.81475830078125, 4.6655426025390625, 1.51544189453125, 0.944061279296875, 2.469087600708008, 0.6761474609375, -0.7942733764648438, 2.304903030395508, 0.77777099609375, 0.863311767578125, 2.287393569946289, 2.1823463439941406, -1.1775360107421875, 6.933786392211914, -0.6926651000976562, 2.7517013549804688, 0.4723014831542969, 5.97235107421875, -0.303131103515625, 0.5236682891845703, 2.2476654052734375, 0.963592529296875, 0.4764671325683594, 1.48406982421875, 2.208040237426758, 4.728206634521484, -0.004730224609375, 0.4944801330566406, -0.724700927734375, -0.8774490356445312, 2.1672897338867188, 2.147979736328125, -0.28129005432128906, 0.28936767578125, 1.10845947265625, 6.111541748046875, 0.3261566162109375, -0.7289657592773438, -1.3707351684570312, 3.3197364807128906, -0.42566871643066406, 2.8320541381835938, 1.7199478149414062, 4.621223449707031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000349.npy"} +{"epoch": 0.527588813303099, "step": 350, "batch_size": 64, "mean": 1.6710278987884521, "std": 1.993577003479004, "min": -2.0665359497070312, "p10": -0.6104652404785152, "median": 1.3380966186523438, "p90": 4.396894073486329, "max": 6.7348175048828125, "pos_frac": 0.84375, "sample": [6.033912658691406, 1.4109001159667969, 1.916015625, -0.032238006591796875, 1.9197158813476562, 0.7731075286865234, 4.195335388183594, 6.7348175048828125, 0.8798313140869141, 2.3715991973876953, 0.124725341796875, 0.022796630859375, 0.112762451171875, 1.8385028839111328, 0.4912109375, 2.1886749267578125, 1.4893035888671875, 0.4454975128173828, -0.8545341491699219, -0.9289360046386719, 2.356475830078125, 3.667327880859375, -2.0665359497070312, 0.829620361328125, 0.6621150970458984, -0.21260833740234375, -1.7258338928222656, 3.6942596435546875, 0.043285369873046875, -1.0071086883544922, 3.6455307006835938, -1.05010986328125, 4.002838134765625, 0.48920440673828125, 2.2306671142578125, 3.7069854736328125, 1.3733978271484375, 2.9800796508789062, 4.043525695800781, 4.8968048095703125, 1.5880146026611328, 5.709095001220703, 6.591072082519531, 0.69189453125, 1.508575439453125, 0.7921085357666016, 3.1567840576171875, 0.6223354339599609, 0.030788421630859375, 3.0149917602539062, -0.1380462646484375, 1.2920494079589844, 0.52069091796875, 1.4077529907226562, 0.504974365234375, -0.780975341796875, 4.4832763671875, 0.3066253662109375, 1.30279541015625, 0.2330169677734375, 4.598878860473633, 1.1883163452148438, 1.6261234283447266, 3.0017242431640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000350.npy"} +{"epoch": 0.5291005291005291, "step": 351, "batch_size": 64, "mean": 1.1261450052261353, "std": 1.7088854312896729, "min": -3.995594024658203, "p10": -0.9645458221435547, "median": 1.5309066772460938, "p90": 3.008280563354493, "max": 3.9688682556152344, "pos_frac": 0.78125, "sample": [1.051584243774414, 0.868927001953125, 3.4985313415527344, 1.5414962768554688, -0.6346588134765625, 2.0661144256591797, -0.6929740905761719, 2.0317611694335938, 2.7441329956054688, 1.61749267578125, 2.42340087890625, 3.300872802734375, 0.6979160308837891, 0.5101470947265625, 1.5887451171875, -1.2962265014648438, 0.694427490234375, -0.9930381774902344, 3.9688682556152344, 0.5701313018798828, 1.3972015380859375, -0.26508331298828125, 2.776836395263672, 2.3862457275390625, 0.8055419921875, 1.3828887939453125, 2.8861656188964844, 1.0906982421875, -3.995594024658203, 1.9806137084960938, 3.0606155395507812, -1.2890167236328125, 3.337932586669922, -0.5282821655273438, 1.9956398010253906, -2.014862060546875, 0.7014083862304688, 2.632781982421875, 1.7017440795898438, 1.8339290618896484, 2.3622817993164062, 2.2827072143554688, 1.59930419921875, -0.8980636596679688, 2.8585357666015625, 1.0587692260742188, 2.0803756713867188, 0.25591278076171875, -0.47216033935546875, 1.8171157836914062, -0.7133331298828125, 0.2807884216308594, 1.9146366119384766, 2.095304489135742, 3.5177650451660156, 1.6504898071289062, 1.0038318634033203, 3.86077880859375, -3.771495819091797, 1.6849098205566406, -2.9699325561523438, 1.5203170776367188, 1.4405536651611328, 0.1788330078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000351.npy"} +{"epoch": 0.5306122448979592, "step": 352, "batch_size": 64, "mean": 1.0951130390167236, "std": 1.6904197931289673, "min": -2.23687744140625, "p10": -0.7390033721923828, "median": 0.7402811050415039, "p90": 3.0100154876708998, "max": 6.8713226318359375, "pos_frac": 0.796875, "sample": [2.6283950805664062, 5.6833953857421875, 6.8713226318359375, 0.2871856689453125, 2.054412841796875, -2.23687744140625, -0.7665367126464844, 0.9522838592529297, 0.2807807922363281, 0.6389827728271484, 1.6006183624267578, 2.1766433715820312, -0.864837646484375, -0.3626880645751953, 1.827728271484375, 0.19732666015625, -1.141265869140625, 0.003173828125, -0.2228240966796875, 0.04116058349609375, 3.16766357421875, 1.1549835205078125, 1.99920654296875, 3.2048568725585938, 2.642169952392578, 2.2815704345703125, 1.800872802734375, -0.9534072875976562, 0.6455497741699219, 0.10222244262695312, -0.08916664123535156, 0.47681427001953125, 0.03167724609375, 0.6156883239746094, -0.6747589111328125, 0.9969749450683594, 0.2103271484375, 1.2770843505859375, 2.084991455078125, 0.9970474243164062, 1.8097152709960938, 1.1276168823242188, 6.0281982421875, 0.4902324676513672, 0.47711944580078125, 2.301776885986328, 0.47754669189453125, 0.4855194091796875, -0.2315845489501953, 1.566192626953125, 0.164031982421875, 0.083526611328125, 1.9718017578125, 3.963409423828125, 1.9886703491210938, 1.2678604125976562, -0.0487518310546875, 3.5568389892578125, -1.8902244567871094, -1.1212158203125, 0.1652984619140625, 1.4306888580322266, 1.5672035217285156, 0.8350124359130859], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000352.npy"} +{"epoch": 0.5321239606953893, "step": 353, "batch_size": 64, "mean": 1.2259814739227295, "std": 1.838463306427002, "min": -4.4516143798828125, "p10": -0.9361751556396485, "median": 0.954864501953125, "p90": 3.797247695922852, "max": 5.952857971191406, "pos_frac": 0.796875, "sample": [0.7269496917724609, 2.76861572265625, -1.582275390625, 0.5772666931152344, -0.9642925262451172, 0.9828510284423828, 0.6595306396484375, -1.986673355102539, 4.9288177490234375, 1.133880615234375, -0.4950218200683594, 3.3251495361328125, 2.727449417114258, 0.7470703125, 3.0656089782714844, -0.015130996704101562, 3.0594940185546875, 1.025787353515625, 2.1365737915039062, 2.5830307006835938, 4.485076904296875, 3.813098907470703, 0.06556510925292969, 1.9453659057617188, -0.10178375244140625, -0.9381446838378906, 4.869758605957031, 1.8414344787597656, 4.108955383300781, 0.7197341918945312, 1.7844924926757812, 1.0905113220214844, 1.3247871398925781, 0.6550102233886719, -4.4516143798828125, 2.2576732635498047, -0.5801048278808594, 0.24933624267578125, 1.7724952697753906, 3.211545944213867, 0.46030426025390625, 3.7602615356445312, 5.952857971191406, 0.52984619140625, 0.962188720703125, 1.2669639587402344, 0.21024322509765625, 0.7733325958251953, 0.947540283203125, 0.35977935791015625, 1.1689319610595703, 0.2518310546875, 0.5126953125, 0.46942901611328125, -0.1974010467529297, 1.0181884765625, 0.739898681640625, 0.8419952392578125, -1.3644599914550781, -1.221466064453125, 2.31622314453125, -0.93157958984375, 4.349466323852539, 1.7578697204589844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000353.npy"} +{"epoch": 0.5336356764928194, "step": 354, "batch_size": 64, "mean": 1.3514502048492432, "std": 1.8014642000198364, "min": -3.521635055541992, "p10": -0.5788200378417967, "median": 1.1056747436523438, "p90": 3.8849168777465826, "max": 6.123832702636719, "pos_frac": 0.78125, "sample": [6.123832702636719, 3.41424560546875, -1.1684417724609375, 3.0284805297851562, -0.20619964599609375, 0.8181667327880859, 3.0940628051757812, 0.41939544677734375, 2.05023193359375, 0.3396415710449219, 1.6817035675048828, 4.132820129394531, 1.1163034439086914, 0.08557891845703125, -0.425689697265625, 0.41208648681640625, 2.7891998291015625, -0.1387615203857422, 4.552524566650391, 4.2568817138671875, 3.7055416107177734, 1.5530624389648438, 0.9527435302734375, 1.621429443359375, -3.521635055541992, -0.10402870178222656, -0.41448974609375, 3.9617919921875, 1.343994140625, 1.104888916015625, 1.238677978515625, 3.311298370361328, 2.6079254150390625, 2.38104248046875, 0.010223388671875, 0.003040313720703125, 1.0159645080566406, -1.2957191467285156, 2.531106948852539, 0.73004150390625, 1.2397079467773438, 2.721599578857422, -0.6444473266601562, -0.86163330078125, 5.117340087890625, -0.032909393310546875, 2.8450927734375, 4.8730316162109375, 0.9735622406005859, 0.24196624755859375, 1.5303955078125, 0.867767333984375, 2.4010467529296875, 2.516315460205078, 1.6019554138183594, 0.9639644622802734, 0.04207611083984375, 0.40410423278808594, -1.1997108459472656, 1.1064605712890625, 0.16829299926757812, 2.0179176330566406, -1.2299728393554688, -0.2840728759765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000354.npy"} +{"epoch": 0.5351473922902494, "step": 355, "batch_size": 64, "mean": 1.3419021368026733, "std": 1.8395774364471436, "min": -2.7421188354492188, "p10": -0.7341489791870116, "median": 1.5472278594970703, "p90": 3.7336824417114256, "max": 6.1636810302734375, "pos_frac": 0.765625, "sample": [1.9474143981933594, 6.1636810302734375, 0.5708446502685547, 1.7253875732421875, 3.143798828125, 2.1585693359375, 1.1371688842773438, -0.8743400573730469, -0.6578922271728516, 0.17896270751953125, 1.7174110412597656, -0.3584442138671875, 1.9538917541503906, -1.752532958984375, 1.578521728515625, 2.029367446899414, -0.4848976135253906, 1.6125831604003906, -1.169565200805664, -0.462799072265625, -2.7421188354492188, 1.7956809997558594, 3.7081146240234375, 4.249668121337891, 1.9780502319335938, 0.7713470458984375, 1.5159339904785156, 1.8554878234863281, 3.393749237060547, 4.5365753173828125, 0.00745391845703125, 0.34111785888671875, -0.10320854187011719, 5.3394775390625, 0.2460479736328125, -0.7668304443359375, 1.5948028564453125, -0.9770851135253906, 0.1957721710205078, 1.9772987365722656, 1.7469100952148438, 0.339447021484375, 4.089942932128906, 0.11675262451171875, 1.281768798828125, 3.7328720092773438, 2.686004638671875, 0.3671913146972656, 3.468921661376953, -2.26220703125, 3.734029769897461, 1.1506805419921875, 2.6018218994140625, -0.11861038208007812, 4.883941650390625, -0.3890666961669922, -0.5937900543212891, 0.3960094451904297, 3.2162933349609375, 0.35221099853515625, 1.6971435546875, 1.7010478973388672, 1.9567718505859375, 0.65118408203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000355.npy"} +{"epoch": 0.5366591080876795, "step": 356, "batch_size": 64, "mean": 1.1650193929672241, "std": 2.128269910812378, "min": -4.122718811035156, "p10": -1.7487182617187498, "median": 1.002030372619629, "p90": 4.042596054077149, "max": 6.824333190917969, "pos_frac": 0.765625, "sample": [4.648294448852539, 4.171398162841797, 1.8017425537109375, -2.8852996826171875, 3.386566162109375, 1.6249542236328125, 0.45459747314453125, 2.76458740234375, 0.692626953125, 0.19188499450683594, -2.5884265899658203, -2.39727783203125, -0.600830078125, -1.46844482421875, 1.9526710510253906, 3.9900741577148438, 1.3391036987304688, 0.35113525390625, 4.402013778686523, -0.05039787292480469, 2.6599273681640625, -4.122718811035156, 1.004364013671875, 2.2859954833984375, 1.05853271484375, 3.3208770751953125, -1.86883544921875, -1.252532958984375, 1.9561882019042969, 1.3914108276367188, 0.7841567993164062, 0.623504638671875, -0.8277912139892578, -0.5651283264160156, -2.2803421020507812, 1.3605194091796875, 2.0613021850585938, 0.4699993133544922, 1.7175216674804688, 3.8799514770507812, 0.9773406982421875, 0.143463134765625, 3.8766632080078125, 0.4550933837890625, 0.25388336181640625, 0.8252811431884766, 4.065105438232422, 0.8545608520507812, -0.4489269256591797, 2.2099685668945312, 0.5720176696777344, 1.6342105865478516, 5.288259506225586, 2.0674514770507812, 0.602691650390625, 1.5863265991210938, 0.9996967315673828, 0.4126014709472656, 2.36328125, -0.6699676513671875, -2.445903778076172, 6.824333190917969, 2.026948928833008, 4.64898681640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000356.npy"} +{"epoch": 0.5381708238851096, "step": 357, "batch_size": 64, "mean": 1.0884283781051636, "std": 1.893683910369873, "min": -3.3736724853515625, "p10": -1.3288890838623044, "median": 0.8508148193359375, "p90": 3.596414184570313, "max": 5.136634826660156, "pos_frac": 0.75, "sample": [1.8677215576171875, -0.09914779663085938, 2.4470367431640625, 0.30823516845703125, -3.3736724853515625, 4.0990447998046875, -2.1251678466796875, 0.2878074645996094, 0.8216323852539062, 0.9173431396484375, -2.1467742919921875, 5.136634826660156, -0.6517257690429688, 1.0956764221191406, 4.889415740966797, 0.8617706298828125, 3.9782962799072266, 0.37409210205078125, 3.25146484375, 0.3135490417480469, 1.4865188598632812, 0.8648529052734375, 0.8398590087890625, 0.7920913696289062, 0.4378166198730469, 2.1785507202148438, 0.7823905944824219, -0.3364753723144531, -0.954010009765625, 1.6546249389648438, -1.6119613647460938, 0.23000717163085938, -0.4658164978027344, 0.8398170471191406, 3.379199981689453, 4.964946746826172, 2.4784317016601562, 3.0310821533203125, 3.47723388671875, 3.9907798767089844, 0.8864822387695312, 1.9749526977539062, -0.022796630859375, 1.562469482421875, 2.84466552734375, 0.5657577514648438, 0.5736465454101562, -0.4505767822265625, -2.3647499084472656, 1.0864715576171875, 0.3708152770996094, 1.3554229736328125, 2.0213699340820312, 1.2275810241699219, 3.647491455078125, -0.3052215576171875, 0.08946990966796875, 2.1730728149414062, 3.283395767211914, -1.4895515441894531, -0.489044189453125, -2.8942298889160156, 3.2549667358398438, 0.4443817138671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000357.npy"} +{"epoch": 0.5396825396825397, "step": 358, "batch_size": 64, "mean": 1.5866005420684814, "std": 1.7414193153381348, "min": -1.8234024047851562, "p10": -0.5925819396972656, "median": 1.4951457977294922, "p90": 4.080558586120606, "max": 5.930694580078125, "pos_frac": 0.84375, "sample": [1.6531333923339844, 1.9366836547851562, 2.3001136779785156, 1.9034347534179688, -0.6099929809570312, 3.4261474609375, 1.9150810241699219, -0.6606216430664062, 0.6977367401123047, 1.4951171875, -1.2864933013916016, 1.7365303039550781, 0.6192626953125, 2.012847900390625, 2.758808135986328, 0.36144256591796875, -0.96832275390625, 3.425048828125, 2.3941802978515625, 2.0900001525878906, 4.3238372802734375, 0.7494182586669922, 0.1916656494140625, 2.6084671020507812, 2.649538040161133, 0.5251007080078125, 1.4678115844726562, 0.28325653076171875, -0.776275634765625, 0.8067550659179688, -1.4982414245605469, 2.304729461669922, -0.51416015625, 0.05080604553222656, 0.005859375, 3.477874755859375, 2.184711456298828, 3.41363525390625, 0.7388839721679688, 0.12134361267089844, 4.1840972900390625, 4.374847412109375, 5.930694580078125, 0.5062522888183594, 4.982795715332031, 5.770631790161133, 2.4212646484375, -0.21409988403320312, 1.0411300659179688, 2.42291259765625, 0.5777645111083984, -0.5519561767578125, -1.8234024047851562, 3.838968276977539, 0.7922744750976562, 1.4951744079589844, 1.7336196899414062, 1.1199932098388672, 0.83697509765625, 1.8597068786621094, 1.3315315246582031, 3.3777847290039062, 0.6591987609863281, 4.5591278076171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000358.npy"} +{"epoch": 0.5411942554799698, "step": 359, "batch_size": 64, "mean": 1.1224000453948975, "std": 1.8322715759277344, "min": -3.092845916748047, "p10": -1.0896692276000974, "median": 1.2471914291381836, "p90": 3.3565711975097656, "max": 5.173248291015625, "pos_frac": 0.703125, "sample": [-0.474273681640625, 3.3577728271484375, -1.2085762023925781, 0.2208099365234375, 2.04840087890625, -1.171051025390625, -0.406005859375, 0.9357357025146484, -0.5828971862792969, 5.15069580078125, 4.105888366699219, 0.8377265930175781, 0.3868560791015625, 2.2029876708984375, -0.352630615234375, 2.5545196533203125, 2.193349838256836, -2.01751708984375, 2.3664512634277344, -2.463146209716797, 0.3092155456542969, 2.169445037841797, 0.912750244140625, -2.2973480224609375, 0.3929595947265625, 2.0342750549316406, 1.1880931854248047, 1.6192455291748047, 1.96697998046875, -3.092845916748047, 3.35382080078125, 5.173248291015625, -0.4323272705078125, 3.3577499389648438, -0.257232666015625, 1.8758773803710938, 1.7821388244628906, 2.3247604370117188, 0.6341400146484375, 2.4487228393554688, -0.7738876342773438, 3.325387954711914, 2.4903392791748047, 2.4132137298583984, 1.3952560424804688, -1.614593505859375, 0.18494415283203125, -0.7252197265625, -0.7919692993164062, 1.3228378295898438, 1.0608177185058594, -0.8997783660888672, 1.5814895629882812, -0.38483238220214844, 1.992034912109375, -0.10281753540039062, 2.2388057708740234, 0.6261997222900391, 2.02313232421875, 4.9156951904296875, 2.4456253051757812, 4.145473480224609, 0.5103950500488281, 1.3062896728515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000359.npy"} +{"epoch": 0.5427059712773998, "step": 360, "batch_size": 64, "mean": 1.2418781518936157, "std": 1.9668306112289429, "min": -3.012664794921875, "p10": -0.8690448760986328, "median": 0.9543771743774414, "p90": 3.8859729766845708, "max": 7.002922058105469, "pos_frac": 0.78125, "sample": [1.533172607421875, 6.490211486816406, -3.012664794921875, 0.34076690673828125, 2.0396881103515625, 2.3419227600097656, -1.59979248046875, 1.1959171295166016, -0.7991981506347656, 0.542633056640625, -0.37714576721191406, -0.1283111572265625, 0.9933700561523438, -1.1676368713378906, 2.5623817443847656, -1.864105224609375, 4.439208984375, -0.3441314697265625, 1.855844497680664, 1.241729736328125, 0.3073463439941406, 0.9153842926025391, -2.9257354736328125, -1.2373809814453125, 1.1079254150390625, 0.808685302734375, 2.0621337890625, 1.3634796142578125, 2.50067138671875, 2.7506103515625, 1.5111312866210938, 0.8812885284423828, 1.2387199401855469, -0.342071533203125, 0.9137229919433594, 0.05243682861328125, -0.8989791870117188, 1.8272533416748047, 3.3839149475097656, 0.5010604858398438, 7.002922058105469, 0.8253402709960938, 1.9380416870117188, -0.4822406768798828, 5.3980255126953125, 1.3184356689453125, 3.217233657836914, 1.3809432983398438, 0.8588638305664062, 5.580757141113281, 4.012947082519531, 2.0122833251953125, 3.8159446716308594, -0.057186126708984375, 0.08443260192871094, 2.1262359619140625, 0.43204498291015625, 0.01556396484375, 0.6390571594238281, 3.915985107421875, 0.20696258544921875, 0.12276458740234375, 1.4218673706054688, 0.6875152587890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000360.npy"} +{"epoch": 0.54421768707483, "step": 361, "batch_size": 64, "mean": 1.2811236381530762, "std": 1.9114253520965576, "min": -2.5985260009765625, "p10": -0.8927284240722655, "median": 1.2348098754882812, "p90": 3.225467300415039, "max": 8.032478332519531, "pos_frac": 0.796875, "sample": [1.56005859375, 2.3985671997070312, 0.10081672668457031, -2.511045455932617, 4.027168273925781, 0.42900848388671875, -1.0411643981933594, -1.2698440551757812, 0.9086074829101562, 1.3906402587890625, 0.0253143310546875, -1.7412872314453125, -0.3783988952636719, 2.5268936157226562, 3.2465438842773438, 1.7278976440429688, -0.5732994079589844, -0.6700706481933594, -0.08686065673828125, 1.0789794921875, 0.34613037109375, 2.1236495971679688, -0.9516677856445312, 2.9866943359375, 1.775299072265625, 1.4967041015625, 1.4943809509277344, -2.5985260009765625, 3.4266433715820312, 0.5686836242675781, 1.79754638671875, 0.4191436767578125, 1.8893165588378906, 7.976356506347656, 1.4735794067382812, 3.579193115234375, 1.4699039459228516, 2.055074691772461, 0.8739242553710938, -0.941253662109375, -0.2313079833984375, 0.311248779296875, 3.0312843322753906, 2.110790252685547, 3.411518096923828, 3.134441375732422, 0.9381027221679688, 0.0608978271484375, -0.7795028686523438, 2.936767578125, 2.444061279296875, 0.6331100463867188, 2.110370635986328, 0.4271392822265625, 8.032478332519531, 1.4431819915771484, 3.176288604736328, 0.903472900390625, 0.4892120361328125, 2.4122886657714844, 0.2540931701660156, 1.6415767669677734, 0.4169578552246094, 0.274139404296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000361.npy"} +{"epoch": 0.54572940287226, "step": 362, "batch_size": 64, "mean": 1.3991665840148926, "std": 1.772331714630127, "min": -1.5653266906738281, "p10": -0.433624267578125, "median": 0.9574432373046875, "p90": 3.715401077270508, "max": 6.782615661621094, "pos_frac": 0.78125, "sample": [-0.6826610565185547, 2.0017967224121094, 0.2029571533203125, 0.13281631469726562, 1.1546344757080078, -1.2274589538574219, 3.0203933715820312, 0.762969970703125, 4.77099609375, 2.6288909912109375, 0.6511993408203125, -1.5653266906738281, 1.5777206420898438, 0.708465576171875, 0.6774005889892578, -0.010812759399414062, 1.23809814453125, 2.9850387573242188, 0.4860057830810547, 2.181243896484375, 0.2288532257080078, 0.1314849853515625, 0.5984725952148438, 0.317626953125, 2.7210845947265625, 0.6823921203613281, 2.8029842376708984, 1.036285400390625, -0.22855758666992188, 2.757526397705078, 4.673431396484375, 0.20784950256347656, 0.6510963439941406, 2.2768287658691406, -0.5169754028320312, 3.929351806640625, 1.0435752868652344, 0.0141448974609375, 1.1656341552734375, -0.36054229736328125, -0.06696701049804688, 4.276611328125, 3.2680130004882812, 3.587566375732422, 2.8073501586914062, 0.073089599609375, 3.522052764892578, 2.89532470703125, 2.935548782348633, 0.87860107421875, -1.5564346313476562, 2.1419677734375, 1.6094741821289062, -1.4575042724609375, 0.4011039733886719, -0.1409149169921875, 5.038105010986328, -0.4264678955078125, -0.4366912841796875, 2.3773345947265625, 6.782615661621094, -0.4147377014160156, 3.7701873779296875, 1.8545150756835938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000362.npy"} +{"epoch": 0.54724111866969, "step": 363, "batch_size": 64, "mean": 1.512410044670105, "std": 2.125594139099121, "min": -5.41534423828125, "p10": -0.6721841812133785, "median": 1.1315231323242188, "p90": 4.805748748779298, "max": 6.3863525390625, "pos_frac": 0.8125, "sample": [2.686624526977539, -1.26953125, 4.366844177246094, 1.1445159912109375, 1.68829345703125, 1.0772476196289062, 1.0705642700195312, 6.3863525390625, 2.1088104248046875, -0.33161163330078125, -1.0904693603515625, 2.74420166015625, 5.571502685546875, 0.872039794921875, 0.5295562744140625, 5.177589416503906, 0.6925754547119141, 0.5649394989013672, 1.1054153442382812, 0.856719970703125, 0.016307830810546875, 2.224691390991211, 1.6738624572753906, 4.897216796875, 4.592323303222656, 1.8708248138427734, -0.13262939453125, 0.8162269592285156, -0.32810211181640625, 0.48154640197753906, -1.6617507934570312, -2.679790496826172, 1.1185302734375, -1.2699165344238281, 5.578937530517578, 2.7687435150146484, 0.5074443817138672, 2.0475082397460938, 1.2871723175048828, -0.09755325317382812, 1.7013130187988281, 0.8633804321289062, 3.011749267578125, 4.1886749267578125, 2.680868148803711, 3.3739700317382812, 2.13238525390625, 1.0505256652832031, 1.4300460815429688, 0.5932388305664062, 5.352365493774414, 0.9703178405761719, 0.015125274658203125, 2.9362926483154297, -0.8181438446044922, -5.41534423828125, 2.6576385498046875, -0.15067672729492188, 5.622528076171875, 0.4822578430175781, 0.10515785217285156, 1.4306488037109375, 1.681640625, 1.2345085144042969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000363.npy"} +{"epoch": 0.5487528344671202, "step": 364, "batch_size": 64, "mean": 1.5479564666748047, "std": 2.035463333129883, "min": -2.8247604370117188, "p10": -0.7385772705078125, "median": 1.4189071655273438, "p90": 4.299823760986329, "max": 6.400177001953125, "pos_frac": 0.796875, "sample": [4.133705139160156, -0.5009536743164062, 0.1333160400390625, 5.649101257324219, 4.3976287841796875, 1.0406875610351562, 2.5475921630859375, 1.70599365234375, -0.954071044921875, 3.214691162109375, 6.400177001953125, 0.9770317077636719, -0.31775665283203125, -0.7190628051757812, 4.1063232421875, 4.497770309448242, 0.89141845703125, 0.1900177001953125, 5.4842376708984375, -0.29402923583984375, 3.7687530517578125, 1.0914840698242188, 1.6960601806640625, 2.28240966796875, -1.9538192749023438, -1.3141403198242188, 5.125297546386719, -0.7469406127929688, 0.12213516235351562, 2.0270004272460938, 2.702177047729492, 2.6134872436523438, 0.09789276123046875, 1.1651687622070312, 1.3873977661132812, 0.9922637939453125, 3.834857940673828, 2.3294677734375, 0.2423858642578125, -0.03320121765136719, 1.8512344360351562, 1.4504165649414062, 0.9689407348632812, 2.025604248046875, 1.51904296875, 0.3028068542480469, 0.27165794372558594, 2.0556716918945312, 0.004970550537109375, 3.793743133544922, -0.588775634765625, 3.6736087799072266, -2.502695083618164, 4.051536560058594, -2.8247604370117188, 0.5346832275390625, 1.7854537963867188, 2.4792938232421875, 0.30763816833496094, 0.565582275390625, 1.6489982604980469, -1.59234619140625, 4.3710174560546875, 2.9019317626953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000364.npy"} +{"epoch": 0.5502645502645502, "step": 365, "batch_size": 64, "mean": 1.4665677547454834, "std": 1.7151868343353271, "min": -2.8736534118652344, "p10": -0.7246948242187499, "median": 1.5673789978027344, "p90": 3.6925930023193367, "max": 5.704010009765625, "pos_frac": 0.796875, "sample": [0.30591583251953125, 3.005228042602539, -0.2949066162109375, 2.2560176849365234, 1.8186492919921875, 1.5324783325195312, 4.0152435302734375, 1.318563461303711, 3.4232940673828125, 1.191314697265625, 1.0119094848632812, 0.6309089660644531, -2.8736534118652344, 1.957763671875, 1.6133193969726562, 2.1332969665527344, -0.4646148681640625, 2.4034156799316406, 1.0579147338867188, 0.4931468963623047, 3.0656967163085938, -0.8148574829101562, 2.7820663452148438, 0.6099090576171875, 0.6733570098876953, -2.359283447265625, 0.829376220703125, 5.704010009765625, 1.8223190307617188, 3.276020050048828, 1.5875396728515625, 1.6234245300292969, 2.2597427368164062, 1.6968879699707031, 3.8979339599609375, 0.9995384216308594, 2.150390625, -0.01264190673828125, 0.24931716918945312, 1.645050048828125, 1.5472183227539062, 3.4504165649414062, 2.5764617919921875, -1.54052734375, 2.3359527587890625, 5.611089706420898, 1.0587539672851562, -1.019296646118164, 4.75054931640625, 1.1858367919921875, 4.0509796142578125, -0.967041015625, 2.368946075439453, 2.028839111328125, 0.8138408660888672, 1.1286544799804688, -0.2673187255859375, -0.7758407592773438, -0.0486297607421875, -0.6053543090820312, 3.7963829040527344, 2.27301025390625, 0.25028228759765625, 1.6361236572265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000365.npy"} +{"epoch": 0.5517762660619804, "step": 366, "batch_size": 64, "mean": 1.0714315176010132, "std": 1.9981151819229126, "min": -4.15380859375, "p10": -1.1252281188964843, "median": 0.9817323684692383, "p90": 3.393235778808594, "max": 5.917015075683594, "pos_frac": 0.71875, "sample": [-0.45839881896972656, 3.633392333984375, 1.9605712890625, 0.5017051696777344, -0.2538604736328125, 3.7953243255615234, -3.3480682373046875, -0.9258766174316406, 0.7460784912109375, -1.1328544616699219, 1.3149261474609375, 1.0087356567382812, 2.111858367919922, 2.4701499938964844, 2.259002685546875, -0.28314208984375, -4.15380859375, 2.525177001953125, 0.7248439788818359, 2.169605255126953, 1.77178955078125, 0.7824554443359375, 1.1846237182617188, -1.1074333190917969, -0.6484870910644531, 1.4274845123291016, 1.2164154052734375, 2.322551727294922, -2.0457077026367188, 0.07420539855957031, 5.917015075683594, -0.5271224975585938, -1.4302330017089844, 2.5100250244140625, -2.91156005859375, 0.44208335876464844, 0.034145355224609375, -0.8463211059570312, 1.2152633666992188, 0.8382129669189453, 0.025163650512695312, -0.28530311584472656, 0.29428672790527344, 3.090576171875, -0.221649169921875, -0.26229095458984375, 0.4716682434082031, 3.1289520263671875, 5.2815093994140625, 0.34517669677734375, 1.6945877075195312, 2.6960105895996094, 3.1623992919921875, -1.8167266845703125, 2.213134765625, 0.05689048767089844, 5.434440612792969, 3.3455657958984375, 3.1447296142578125, 3.413665771484375, 3.6434173583984375, 2.422222137451172, 1.4536914825439453, 0.9547290802001953], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000366.npy"} +{"epoch": 0.5532879818594104, "step": 367, "batch_size": 64, "mean": 0.971848726272583, "std": 1.5732076168060303, "min": -3.250102996826172, "p10": -0.8116191864013672, "median": 1.0865631103515625, "p90": 2.6530906677246096, "max": 4.5101776123046875, "pos_frac": 0.734375, "sample": [0.6242256164550781, 4.104949951171875, 1.4960365295410156, -0.14094924926757812, 2.3072891235351562, 0.38814544677734375, 4.296329498291016, 2.6432266235351562, 1.7412185668945312, 0.4156665802001953, -1.4341812133789062, 0.13352203369140625, 0.1020965576171875, 0.3592529296875, 4.5101776123046875, 1.7932586669921875, 0.6320114135742188, -0.28218841552734375, -0.7268505096435547, 0.8118915557861328, 1.67987060546875, 2.3230743408203125, 0.46649932861328125, -1.6930618286132812, -0.0201263427734375, -3.250102996826172, -0.7074317932128906, 1.2991485595703125, 1.278778076171875, 2.657318115234375, 2.41900634765625, 1.2132072448730469, -0.16504287719726562, 2.96087646484375, 0.9485931396484375, 0.9980125427246094, 2.4454574584960938, -0.8311080932617188, 1.2078571319580078, 2.22198486328125, 0.0358734130859375, -2.6347579956054688, 0.4377899169921875, 4.108795166015625, 1.2753486633300781, -0.3626708984375, 1.0967826843261719, 1.0763435363769531, 1.8823394775390625, -0.800689697265625, 1.9170684814453125, 1.1021404266357422, -1.2270889282226562, 1.1864471435546875, 2.4804458618164062, 2.292877197265625, -0.8163032531738281, -0.4941234588623047, 1.8470458984375, 3.1226730346679688, -0.571441650390625, 1.7019577026367188, 0.05052947998046875, 2.2629966735839844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000367.npy"} +{"epoch": 0.5547996976568406, "step": 368, "batch_size": 64, "mean": 1.052868127822876, "std": 1.8827828168869019, "min": -2.746490478515625, "p10": -1.4377872467041015, "median": 1.0003814697265625, "p90": 3.488349723815919, "max": 5.5762786865234375, "pos_frac": 0.6875, "sample": [-1.6603469848632812, 2.1827011108398438, -1.1300201416015625, 0.4617481231689453, 0.909454345703125, 1.0256805419921875, 3.593027114868164, 2.5326995849609375, -0.7916412353515625, -0.1612091064453125, 2.046051025390625, 0.5452308654785156, -0.458831787109375, 1.7065887451171875, 4.075187683105469, 0.7681350708007812, -1.1419525146484375, -1.103057861328125, -1.4556732177734375, 2.033660888671875, 0.5982437133789062, 2.0543861389160156, 1.7187957763671875, -1.5615921020507812, 2.513347625732422, 0.4289531707763672, -1.66802978515625, 0.9812469482421875, 3.081787109375, 4.082050323486328, -0.4939689636230469, -1.6339263916015625, 3.137887954711914, -0.5757541656494141, 0.6776313781738281, 2.2207260131835938, 0.5371932983398438, 1.0195159912109375, 2.5564422607421875, 1.9279403686523438, 1.4878692626953125, 0.5750389099121094, 3.6602096557617188, 5.5762786865234375, -1.3960533142089844, -1.1092300415039062, 1.1398086547851562, -0.47414398193359375, 5.2027740478515625, -2.3759765625, 3.2441024780273438, 2.4343032836914062, 2.8779296875, 2.480621337890625, 0.4712867736816406, 2.3898468017578125, 2.6015167236328125, -1.10015869140625, -0.5478000640869141, 1.866241455078125, 3.67388916015625, 1.678762435913086, -2.746490478515625, 0.192626953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000368.npy"} +{"epoch": 0.5563114134542706, "step": 369, "batch_size": 64, "mean": 1.0951848030090332, "std": 1.7061223983764648, "min": -2.7868194580078125, "p10": -1.180701446533203, "median": 1.128448486328125, "p90": 3.4079002380371097, "max": 4.752540588378906, "pos_frac": 0.71875, "sample": [3.2223663330078125, -2.6139602661132812, 1.4485397338867188, 1.2620849609375, -2.7868194580078125, 4.1753997802734375, 3.4303054809570312, 0.6793365478515625, 0.8522109985351562, 1.3208198547363281, 1.1119766235351562, 2.644306182861328, 1.797964096069336, 4.4936676025390625, 2.407968521118164, 1.1449203491210938, 0.8561973571777344, 1.2674102783203125, 2.0610084533691406, -0.03488922119140625, -0.3371124267578125, -2.6300735473632812, 1.4348068237304688, -1.561025619506836, 0.8655548095703125, 1.9515151977539062, -0.25110435485839844, 2.0016002655029297, 1.0232353210449219, -0.4847126007080078, 4.291778564453125, 1.037139892578125, 0.9917964935302734, 2.7300491333007812, 2.2269344329833984, 3.1985740661621094, 4.752540588378906, -0.4668121337890625, 0.983673095703125, -1.5228195190429688, 1.92840576171875, 0.6594429016113281, -0.33265113830566406, 0.87554931640625, -0.3314018249511719, -0.06511688232421875, 3.355621337890625, 1.2158012390136719, 1.422454833984375, 0.6803817749023438, 1.1850814819335938, 1.199798583984375, -1.1846427917480469, 3.5185890197753906, -0.08988189697265625, -0.47362518310546875, 0.2808513641357422, 3.5742015838623047, 0.3439178466796875, 1.4610748291015625, -1.1715049743652344, 2.6101303100585938, -1.4506893157958984, 1.9036827087402344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000369.npy"} +{"epoch": 0.5578231292517006, "step": 370, "batch_size": 64, "mean": 1.3421647548675537, "std": 2.0777971744537354, "min": -4.027378082275391, "p10": -1.133794403076172, "median": 1.1753807067871094, "p90": 3.551274108886719, "max": 5.625518798828125, "pos_frac": 0.765625, "sample": [0.4505443572998047, 4.386283874511719, 1.7158374786376953, 0.9649181365966797, 3.1330413818359375, 0.5351123809814453, 5.4393768310546875, 2.534332275390625, 5.60540771484375, 0.5151252746582031, 0.920196533203125, -0.5342941284179688, -0.7519626617431641, 3.348297119140625, 5.625518798828125, 0.8603858947753906, 0.1967010498046875, -1.1085586547851562, -1.9760284423828125, 3.1746826171875, 2.58331298828125, 3.5359039306640625, 1.2688522338867188, 3.40283203125, 2.5208969116210938, 1.0819091796875, 1.5831985473632812, -0.28124237060546875, 0.51513671875, -1.211151123046875, -0.8724250793457031, 0.1660614013671875, -0.059967041015625, -1.9945831298828125, 0.2530059814453125, 4.5630035400390625, 4.784881591796875, 1.756744384765625, 3.557861328125, 1.3547134399414062, 3.1674041748046875, -3.9231414794921875, 2.4610042572021484, 0.8429183959960938, 0.6802101135253906, 3.2121353149414062, 1.8163833618164062, 2.2403202056884766, 3.0852737426757812, 1.9734039306640625, -4.027378082275391, 1.4600467681884766, 3.2326278686523438, -1.602407455444336, -0.11983489990234375, 0.7138519287109375, 2.2364959716796875, 0.635498046875, -1.1150703430175781, 0.253631591796875, 2.2859420776367188, 2.943603515625, -1.1418190002441406, 1.0435810089111328], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000370.npy"} +{"epoch": 0.5593348450491308, "step": 371, "batch_size": 64, "mean": 1.3074922561645508, "std": 2.133824586868286, "min": -3.8772430419921875, "p10": -1.465044593811035, "median": 1.1717414855957031, "p90": 4.217235565185549, "max": 6.7530670166015625, "pos_frac": 0.75, "sample": [3.3300323486328125, -3.8772430419921875, 4.520271301269531, 0.150360107421875, 2.6694869995117188, -1.949554443359375, 2.777294158935547, -0.6318016052246094, 0.006378173828125, 0.4114952087402344, -0.537750244140625, 2.461322784423828, -1.9170799255371094, 6.067939758300781, -0.11500740051269531, 3.8168258666992188, 0.03843498229980469, 2.3245925903320312, 2.1646175384521484, 5.086009979248047, 1.0176849365234375, -0.812469482421875, -0.08222007751464844, 0.4314231872558594, 3.0538330078125, 3.5565872192382812, 1.991302490234375, 6.7530670166015625, 3.0829925537109375, 2.741617202758789, 3.7704925537109375, 1.912506103515625, -1.6497268676757812, 0.16626358032226562, 1.3965702056884766, 1.0258636474609375, 0.9655914306640625, -1.6781597137451172, -1.0392799377441406, 4.3888397216796875, -1.357940673828125, 4.964923858642578, 0.15304946899414062, 1.4474906921386719, 1.6679611206054688, -0.99591064453125, 1.7361736297607422, 0.32762908935546875, 0.5804443359375, 1.2064476013183594, 1.1370353698730469, -1.510946273803711, 0.03294563293457031, 2.2450485229492188, 4.736968994140625, 0.12783432006835938, 1.8287582397460938, 1.459503173828125, 3.5455322265625, 1.13677978515625, 1.45904541015625, 2.4521484375, -1.7557029724121094, -0.7351226806640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000371.npy"} +{"epoch": 0.5608465608465608, "step": 372, "batch_size": 64, "mean": 0.8514436483383179, "std": 2.1201021671295166, "min": -4.1782989501953125, "p10": -1.9177392959594726, "median": 1.0117378234863281, "p90": 3.422832489013672, "max": 7.229389190673828, "pos_frac": 0.6875, "sample": [-0.807342529296875, 1.8200263977050781, 1.7049598693847656, -0.204345703125, 1.5417556762695312, 1.0979499816894531, 3.3978118896484375, -0.88226318359375, 3.8417205810546875, 0.9186019897460938, 3.4335556030273438, 0.04726409912109375, 0.8116378784179688, 3.9356765747070312, 1.6383514404296875, 0.7211227416992188, 2.6918411254882812, -3.074676513671875, 0.9760589599609375, 3.1152496337890625, 0.08178901672363281, -0.9347267150878906, 1.4511566162109375, 3.5771026611328125, -0.3109855651855469, -1.0293807983398438, -2.2721099853515625, 1.7120590209960938, 6.455326080322266, -1.405792236328125, 2.158599853515625, 3.94580078125, 0.7951850891113281, -0.7359504699707031, -1.9069499969482422, 1.1949043273925781, -2.1044445037841797, 1.2677650451660156, 2.40179443359375, 1.709035873413086, 2.37469482421875, 1.4581718444824219, -0.55108642578125, 2.6232528686523438, 0.08118057250976562, 1.6669597625732422, -4.1782989501953125, 0.5409984588623047, 0.3509368896484375, 2.5663604736328125, 0.3636627197265625, -2.799121856689453, 1.5242080688476562, 0.5640945434570312, -0.6800727844238281, -1.8975048065185547, -2.7510719299316406, 1.3514328002929688, 1.5576457977294922, 7.229389190673828, -0.03847694396972656, -1.92236328125, 1.0474166870117188, 1.2348480224609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000372.npy"} +{"epoch": 0.562358276643991, "step": 373, "batch_size": 64, "mean": 1.2532098293304443, "std": 2.069075107574463, "min": -2.833984375, "p10": -1.4519264221191406, "median": 1.0840530395507812, "p90": 3.5961307525634765, "max": 6.4892730712890625, "pos_frac": 0.71875, "sample": [-1.4705047607421875, 3.623851776123047, 3.5973129272460938, 0.516143798828125, 2.269824981689453, -1.4085769653320312, -0.003063201904296875, 2.932098388671875, 0.5803451538085938, 0.6589946746826172, 2.789186477661133, -2.7952957153320312, 3.1974639892578125, -0.9122161865234375, -1.6927757263183594, 0.204498291015625, 1.2904911041259766, 1.5197830200195312, -2.2955474853515625, -0.96484375, 1.6871795654296875, 0.23431777954101562, 2.8905563354492188, -0.09664154052734375, 1.0135498046875, 3.4820632934570312, -0.336822509765625, -0.441680908203125, 3.8714828491210938, 0.8913955688476562, 4.074497222900391, 2.2561187744140625, 0.4147796630859375, 3.593372344970703, -1.0964736938476562, -0.07503890991210938, -1.657257080078125, 2.3577880859375, -0.38675689697265625, 3.4156341552734375, 1.6240653991699219, -2.833984375, -1.3495025634765625, 0.9849739074707031, 0.6612510681152344, 1.4582633972167969, -1.7996463775634766, 3.454936981201172, 1.1274166107177734, 0.8480987548828125, 1.4940719604492188, 6.4892730712890625, 1.2758731842041016, 5.859169006347656, 1.1717090606689453, 3.415088653564453, 2.6177215576171875, 3.0474700927734375, 0.40545654296875, 6.0148468017578125, 1.040689468383789, 0.9297828674316406, 1.2971649169921875, 3.24200439453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000373.npy"} +{"epoch": 0.563869992441421, "step": 374, "batch_size": 64, "mean": 1.665249228477478, "std": 1.7466661930084229, "min": -1.82666015625, "p10": -0.404688262939453, "median": 1.4951953887939453, "p90": 4.12651596069336, "max": 5.79046630859375, "pos_frac": 0.828125, "sample": [4.5682373046875, 1.2617263793945312, 2.1270751953125, -0.6068649291992188, 0.6731739044189453, 2.7119903564453125, 2.946147918701172, 2.431121826171875, 5.45166015625, 0.4862232208251953, 4.519453048706055, -0.6656646728515625, 0.8794326782226562, 4.059700012207031, 2.3381576538085938, -0.0947265625, 1.9229354858398438, 0.9281158447265625, -0.27475738525390625, 0.5041275024414062, 1.7506027221679688, 1.8893260955810547, 0.45556068420410156, 0.1776123046875, 0.365447998046875, 2.7221145629882812, 0.2311553955078125, 3.2036972045898438, 0.6931381225585938, 1.9292678833007812, 1.1895065307617188, 3.3878707885742188, 0.48293304443359375, -0.10321617126464844, 0.60174560546875, 3.3118057250976562, 2.2950515747070312, 0.1387939453125, 2.1651763916015625, 3.052967071533203, 1.5919685363769531, -0.5805416107177734, -0.0017223358154296875, 4.4908294677734375, 3.625701904296875, 0.9048843383789062, -1.282257080078125, 2.2656707763671875, 0.2656688690185547, 4.0471343994140625, 5.79046630859375, 4.583892822265625, 4.1551513671875, 0.3327484130859375, 2.486377716064453, 0.8562068939208984, 4.0040435791015625, 1.779022216796875, 1.3984222412109375, 2.5298538208007812, -1.1246509552001953, -0.4603729248046875, 0.63629150390625, -1.82666015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000374.npy"} +{"epoch": 0.5653817082388511, "step": 375, "batch_size": 64, "mean": 0.8576098084449768, "std": 1.7321844100952148, "min": -3.198019027709961, "p10": -1.4510753631591795, "median": 0.8530483245849609, "p90": 3.053749084472657, "max": 5.05859375, "pos_frac": 0.703125, "sample": [-0.31798553466796875, -2.0436248779296875, -0.03292083740234375, 1.2206611633300781, 2.1874008178710938, 0.8651123046875, 2.593505859375, 1.5374641418457031, 0.03515625, 1.0164680480957031, -0.6348114013671875, 2.653900146484375, -0.21795654296875, 3.3795089721679688, 1.3932781219482422, -1.0751304626464844, 0.7619915008544922, 0.4580230712890625, 1.6265182495117188, -0.7402858734130859, 0.8409843444824219, 1.80572509765625, 2.82232666015625, -1.498260498046875, 1.1272354125976562, 2.8227691650390625, 0.36365509033203125, -0.6815338134765625, 0.47893524169921875, 2.2131710052490234, 0.27701568603515625, 0.1531848907470703, -0.44409942626953125, -1.8044509887695312, -0.837982177734375, 0.6319828033447266, 1.1825790405273438, 2.0801849365234375, 5.026065826416016, 0.05780029296875, 3.6029052734375, 2.349334716796875, 3.4078330993652344, -3.198019027709961, 0.3374481201171875, -0.540008544921875, 5.05859375, 1.0513229370117188, 3.52337646484375, 0.8247413635253906, 1.50750732421875, 3.152740478515625, -1.614898681640625, 1.477935791015625, 0.7771415710449219, 1.1713485717773438, 1.4254531860351562, -2.7580490112304688, -1.3409767150878906, 1.6749305725097656, -1.5846405029296875, 2.57159423828125, 1.7316055297851562, -1.0057525634765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000375.npy"} +{"epoch": 0.5668934240362812, "step": 376, "batch_size": 64, "mean": 1.7395691871643066, "std": 2.129511594772339, "min": -2.25762939453125, "p10": -0.8740394592285156, "median": 1.4856281280517578, "p90": 4.590167808532715, "max": 9.048736572265625, "pos_frac": 0.796875, "sample": [0.7655105590820312, 1.0691452026367188, 0.8471012115478516, 1.69683837890625, 1.701904296875, -1.0106010437011719, -0.8326339721679688, 4.3535614013671875, 1.1923141479492188, 1.450714111328125, 3.36810302734375, 1.9895744323730469, -1.96588134765625, -0.3269500732421875, -0.9830856323242188, 0.7244167327880859, 5.026573181152344, 2.310028076171875, 1.400970458984375, 9.048736572265625, 1.50909423828125, 5.092132568359375, 0.08240509033203125, 3.322174072265625, 4.259223937988281, -0.89178466796875, 4.5310516357421875, 3.656026840209961, 1.9255294799804688, 3.2257919311523438, 2.249227523803711, 1.1944293975830078, 5.173070907592773, 3.934844970703125, 0.9345626831054688, 2.7960586547851562, 1.48931884765625, -0.09598922729492188, 4.5083465576171875, -2.25762939453125, 0.7666549682617188, -0.5472068786621094, 2.9224281311035156, -0.23709869384765625, 0.956390380859375, 5.01422119140625, -0.4169940948486328, 1.7324485778808594, 0.09639739990234375, 2.515209197998047, -1.5910110473632812, 2.7506446838378906, 0.39446258544921875, 1.4819374084472656, 5.3666229248046875, 0.4495735168457031, 1.0325927734375, 1.5675125122070312, 4.615503311157227, 2.5694351196289062, 1.2068328857421875, 1.5560417175292969, 0.40618133544921875, -1.7405853271484375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000376.npy"} +{"epoch": 0.5684051398337112, "step": 377, "batch_size": 64, "mean": 1.1734893321990967, "std": 1.7417067289352417, "min": -2.4268341064453125, "p10": -1.210672569274902, "median": 1.119832992553711, "p90": 3.719084167480469, "max": 4.944797515869141, "pos_frac": 0.734375, "sample": [1.706298828125, -1.6716384887695312, 2.5439720153808594, 1.8615188598632812, 0.5669403076171875, -0.24614715576171875, -2.4268341064453125, -0.06356048583984375, 0.5825576782226562, 2.932035446166992, 3.2114639282226562, -0.42723846435546875, 4.1549072265625, 0.2514495849609375, 2.0561885833740234, 0.8883743286132812, 0.47705841064453125, 1.7981109619140625, -0.4939537048339844, 0.6074752807617188, 3.5829315185546875, 0.12072372436523438, 2.4449996948242188, 2.947406768798828, 1.428670883178711, -1.677093505859375, 2.2930259704589844, 4.60943603515625, 0.8387260437011719, 0.3313770294189453, 1.9265899658203125, -0.7093753814697266, 2.1840877532958984, 0.20351600646972656, 1.3658981323242188, -0.9841480255126953, 4.644989013671875, 3.777435302734375, 1.2131271362304688, 1.275787353515625, -0.4448814392089844, 4.944797515869141, 1.0874748229980469, 1.1003837585449219, 2.0351409912109375, 4.287395477294922, 1.1392822265625, -0.3765602111816406, -1.3077545166015625, 1.9539203643798828, 2.10784912109375, -1.5237884521484375, 0.15869712829589844, 4.3228759765625, -0.462158203125, -1.4482498168945312, 2.581808090209961, 0.6051616668701172, -1.5897693634033203, 0.632049560546875, 1.9657211303710938, -0.19605636596679688, 1.8499298095703125, 1.5529594421386719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000377.npy"} +{"epoch": 0.5699168556311414, "step": 378, "batch_size": 64, "mean": 1.1440811157226562, "std": 1.905044674873352, "min": -2.9882850646972656, "p10": -0.6026912689208983, "median": 0.9233865737915039, "p90": 3.839989471435547, "max": 8.449996948242188, "pos_frac": 0.71875, "sample": [1.277557373046875, -0.1407928466796875, 1.115285873413086, 0.38403892517089844, 1.4276580810546875, 2.3759689331054688, -0.474578857421875, -0.4321098327636719, 0.570465087890625, 4.3513336181640625, -2.420074462890625, 0.854736328125, 4.4347991943359375, 1.3046016693115234, 2.2240753173828125, 2.5145034790039062, 0.2735424041748047, 1.7020187377929688, 3.9376258850097656, 0.1449127197265625, 3.4119091033935547, 1.0629653930664062, 0.3748512268066406, 0.5722198486328125, 3.7308921813964844, -0.17041778564453125, 0.5801277160644531, 4.32635498046875, -1.6113739013671875, 0.5080585479736328, 1.810516357421875, 0.23333740234375, -1.3048782348632812, 1.2235374450683594, 2.9476776123046875, -0.17068099975585938, -0.2085113525390625, 0.07698440551757812, 2.6009521484375, -0.7135906219482422, 8.449996948242188, 1.5016937255859375, 1.2176856994628906, 3.8867454528808594, 1.127593994140625, 1.4938201904296875, -0.6575965881347656, 4.191991806030273, 1.5929031372070312, 0.7866382598876953, 1.0481071472167969, -0.35695648193359375, 0.44558143615722656, -2.1556243896484375, -0.075775146484375, 3.3609619140625, 0.32009315490722656, -0.30889892578125, 0.9920368194580078, 2.02117919921875, -0.4288520812988281, -2.9882850646972656, -0.045574188232421875, 3.0952224731445312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000378.npy"} +{"epoch": 0.5714285714285714, "step": 379, "batch_size": 64, "mean": 1.5013036727905273, "std": 1.8955292701721191, "min": -2.5081253051757812, "p10": -0.6615074157714842, "median": 1.235321044921875, "p90": 3.988162994384767, "max": 7.36822509765625, "pos_frac": 0.8125, "sample": [1.20208740234375, -0.22784423828125, 3.44366455078125, -1.5419464111328125, 0.92236328125, 1.2685546875, 2.0513381958007812, -1.2299270629882812, 0.4253349304199219, 1.6472892761230469, 0.7706069946289062, 0.640960693359375, -0.7592926025390625, 2.0685882568359375, -0.43334197998046875, 6.053741455078125, 1.7416610717773438, 0.2653312683105469, 2.4684600830078125, 3.3749160766601562, 6.276580810546875, 2.584320068359375, 4.178684234619141, 3.3867568969726562, -0.88531494140625, -1.151611328125, 2.153728485107422, 0.0257110595703125, 4.135459899902344, -1.217681884765625, 4.928550720214844, 1.7263164520263672, 0.8006210327148438, 0.9697837829589844, 1.2958984375, 2.50048828125, 2.4847335815429688, 0.885650634765625, 0.02727508544921875, -2.5081253051757812, 0.5524368286132812, 2.976776123046875, 1.7603302001953125, 4.649255752563477, 2.0177154541015625, -0.033588409423828125, 7.36822509765625, 1.0785675048828125, 0.9459915161132812, 1.5147705078125, 3.64447021484375, -0.21347808837890625, 1.094635009765625, 1.5039081573486328, 1.7540836334228516, 0.28989219665527344, 0.3453559875488281, 1.89398193359375, 0.4059104919433594, 0.4862709045410156, 0.6475334167480469, 3.2402114868164062, 1.4713134765625, -0.06150054931640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000379.npy"} +{"epoch": 0.5729402872260015, "step": 380, "batch_size": 64, "mean": 1.7422823905944824, "std": 2.145127058029175, "min": -4.0100860595703125, "p10": -0.42914505004882814, "median": 1.6105690002441406, "p90": 4.862982368469239, "max": 6.7131195068359375, "pos_frac": 0.8125, "sample": [1.977203369140625, 3.2770729064941406, 1.4705047607421875, 1.5842208862304688, -0.49527740478515625, -1.9104843139648438, 3.1316795349121094, 2.6453475952148438, -0.2702827453613281, 1.9969100952148438, 1.4376373291015625, -0.4275321960449219, -0.2305755615234375, 3.634002685546875, 1.1580734252929688, -0.575714111328125, 0.6361274719238281, 0.8611068725585938, 2.8905296325683594, 3.3686275482177734, 1.5734939575195312, 1.973663330078125, -0.4298362731933594, -4.0100860595703125, 5.685634613037109, 1.0763130187988281, 3.041595458984375, 4.656410217285156, 0.6382331848144531, 1.5387916564941406, 2.8554458618164062, 3.41168212890625, 2.5219573974609375, 0.8840484619140625, 6.2493133544921875, 0.503570556640625, 5.83154296875, 1.8084869384765625, 0.3822174072265625, 0.43512725830078125, 1.6369171142578125, -3.740642547607422, 4.951513290405273, 6.7131195068359375, 1.8706073760986328, 1.9213542938232422, 6.020133972167969, 0.9754486083984375, -0.2574005126953125, 2.4204044342041016, 1.4207382202148438, 0.5209102630615234, -0.3414154052734375, 5.902717590332031, 3.1583709716796875, 2.3632278442382812, 0.6079959869384766, 0.34063720703125, 2.851123809814453, 1.8433380126953125, 1.8752632141113281, 2.624776840209961, 0.1223602294921875, -1.08221435546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000380.npy"} +{"epoch": 0.5744520030234316, "step": 381, "batch_size": 64, "mean": 0.7539188265800476, "std": 1.7173871994018555, "min": -4.5873870849609375, "p10": -1.037679100036621, "median": 0.6672897338867188, "p90": 2.807337760925294, "max": 6.5968475341796875, "pos_frac": 0.703125, "sample": [-0.696075439453125, 0.003692626953125, 1.2947921752929688, -1.6930255889892578, -4.5873870849609375, 0.08917617797851562, 0.0745086669921875, 0.9603157043457031, 4.53326416015625, 0.9839515686035156, 3.091033935546875, -0.3026905059814453, 0.4394187927246094, 1.4652671813964844, -1.0707855224609375, 6.5968475341796875, 1.969512939453125, 0.6465415954589844, -0.9291610717773438, -0.6279563903808594, 0.03524589538574219, 0.016845703125, 1.6203804016113281, 1.1282501220703125, -1.776336669921875, 1.5424957275390625, -1.0465087890625, -1.1980667114257812, 1.7992668151855469, 0.4278144836425781, 2.0701751708984375, 1.087982177734375, 1.6720924377441406, 0.34578704833984375, -0.7442493438720703, 2.9052505493164062, 1.3193588256835938, 0.033512115478515625, 0.0395355224609375, 1.3990478515625, 1.9020004272460938, 0.6688079833984375, 2.577526092529297, 0.665771484375, 1.3318862915039062, 3.7330322265625, 2.5788745880126953, 1.6587238311767578, -0.12443351745605469, -1.0170764923095703, 0.9606704711914062, 2.467357635498047, 1.6023406982421875, -1.014617919921875, -0.06576919555664062, -0.7108917236328125, 3.5761489868164062, -1.7830352783203125, -0.3017120361328125, 0.9740333557128906, 0.8197441101074219, -0.7821121215820312, 0.27608489990234375, 3.338329315185547], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000381.npy"} +{"epoch": 0.5759637188208617, "step": 382, "batch_size": 64, "mean": 1.0126454830169678, "std": 1.6284832954406738, "min": -1.82720947265625, "p10": -0.9703830718994139, "median": 0.7821063995361328, "p90": 2.9452823638916015, "max": 6.062843322753906, "pos_frac": 0.734375, "sample": [-1.0699615478515625, 0.31513404846191406, 2.041259765625, 3.094280242919922, 2.913604736328125, -1.3206558227539062, 0.7704925537109375, 0.7184524536132812, 1.7919464111328125, 2.4230289459228516, 0.4478302001953125, 0.5373382568359375, -0.7298965454101562, 1.5839080810546875, 1.8004817962646484, -0.645538330078125, -1.3248348236083984, 2.3195343017578125, -1.82720947265625, 0.5572662353515625, -0.4486503601074219, 1.0846481323242188, 1.3157730102539062, -0.6147537231445312, 0.34282684326171875, 0.8835220336914062, 1.8442821502685547, 0.191009521484375, -0.25742340087890625, 5.496074676513672, 2.0696563720703125, 1.4497909545898438, -1.3410682678222656, -1.5093536376953125, 1.7429523468017578, -0.2363739013671875, -1.7930107116699219, 0.7844390869140625, 0.0619354248046875, 1.7676048278808594, 0.43392181396484375, 6.062843322753906, 0.64715576171875, 2.7522430419921875, 0.16593551635742188, 2.4447784423828125, -0.43570709228515625, -0.27202606201171875, 0.0410614013671875, 2.9588584899902344, -0.579620361328125, 2.002239227294922, -0.7380332946777344, 0.44574546813964844, 1.1423263549804688, 0.7797737121582031, 3.2731781005859375, 1.6109180450439453, 2.89093017578125, 3.9026565551757812, 1.0122032165527344, 3.4896697998046875, 1.7692794799804688, 1.7786407470703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000382.npy"} +{"epoch": 0.5774754346182918, "step": 383, "batch_size": 64, "mean": 1.4475116729736328, "std": 1.8969968557357788, "min": -3.473907470703125, "p10": -0.5161159515380858, "median": 1.3211593627929688, "p90": 4.018383216857911, "max": 6.2035675048828125, "pos_frac": 0.765625, "sample": [0.03795051574707031, 1.93048095703125, 1.3055419921875, -0.0491485595703125, 1.9284286499023438, -0.2946796417236328, 2.303028106689453, 1.414377212524414, -0.12774658203125, -2.031635284423828, 0.4703216552734375, 0.00506591796875, 2.0234031677246094, 1.9561786651611328, 3.825956344604492, 0.60723876953125, 2.2230377197265625, 0.002513885498046875, 4.1036224365234375, 1.5238227844238281, 1.53277587890625, 0.3232269287109375, 3.8403244018554688, 3.64251708984375, 4.094694137573242, -2.136993408203125, 1.2859878540039062, 0.7551422119140625, 2.1277618408203125, 3.1451263427734375, 4.289094924926758, -0.9987354278564453, 0.8040390014648438, 6.2035675048828125, 0.13030624389648438, 1.8368148803710938, 2.770214080810547, -3.473907470703125, 1.3367767333984375, 2.5188751220703125, -0.5679702758789062, 0.8917999267578125, 2.3336029052734375, 0.4264411926269531, 0.6519012451171875, -0.008932113647460938, 0.8896255493164062, 1.5408706665039062, -0.20616531372070312, 5.483772277832031, -0.3951225280761719, 4.879066467285156, 2.3211593627929688, 0.6376876831054688, -0.8695831298828125, 3.3301620483398438, 4.963905334472656, 1.250762939453125, -0.0552520751953125, -0.8914642333984375, 3.2788467407226562, -0.22086334228515625, 3.7924041748046875, 1.9987258911132812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000383.npy"} +{"epoch": 0.5789871504157218, "step": 384, "batch_size": 64, "mean": 1.0682566165924072, "std": 1.614412784576416, "min": -4.397064208984375, "p10": -0.8088220596313475, "median": 0.9327239990234375, "p90": 2.8498039245605478, "max": 5.216911315917969, "pos_frac": 0.78125, "sample": [0.798797607421875, 2.3226394653320312, 0.7638740539550781, 0.6991844177246094, -0.19488143920898438, 3.4853363037109375, 0.3544158935546875, 2.029815673828125, 1.8398208618164062, -0.2644004821777344, 1.8312835693359375, 0.2269439697265625, -0.28577423095703125, 1.2941665649414062, -0.9828262329101562, 0.46051025390625, 0.12822723388671875, 4.267204284667969, -0.8860931396484375, 3.130535125732422, -1.2744827270507812, 1.288909912109375, 0.854156494140625, -1.98681640625, -0.6285228729248047, 1.9193305969238281, 5.216911315917969, 1.5888442993164062, -0.13917160034179688, 1.395965576171875, 4.617570877075195, 2.3377456665039062, -0.2692413330078125, 0.6215152740478516, 1.7802200317382812, 0.24741744995117188, 4.2045440673828125, 0.7652301788330078, -4.397064208984375, 2.9474563598632812, 0.311431884765625, 2.10894775390625, 2.3157424926757812, -1.6376266479492188, 2.5619430541992188, 2.2374038696289062, 1.7351417541503906, 0.329681396484375, 0.4618263244628906, 1.2533283233642578, 0.682769775390625, 0.80126953125, 1.3766403198242188, 1.4293670654296875, 0.28679656982421875, -0.2414989471435547, 1.4949493408203125, 2.6219482421875, -1.2332687377929688, 1.01129150390625, 2.288848876953125, 0.7623786926269531, 1.7490768432617188, 1.5507354736328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000384.npy"} +{"epoch": 0.5804988662131519, "step": 385, "batch_size": 64, "mean": 1.3148208856582642, "std": 1.7485291957855225, "min": -2.648284912109375, "p10": -0.7072164535522459, "median": 1.3532915115356445, "p90": 3.235754394531251, "max": 7.819484710693359, "pos_frac": 0.8125, "sample": [-0.1484375, -0.5172309875488281, 1.8562698364257812, 2.337005615234375, 1.3617935180664062, 2.97869873046875, 0.3689117431640625, 1.5641937255859375, 0.44646453857421875, 0.884552001953125, -1.6398048400878906, 0.7832260131835938, 1.0803585052490234, 1.9105472564697266, 1.8089828491210938, 3.024578094482422, 4.004425048828125, 3.6248550415039062, 0.0075855255126953125, 1.3447895050048828, 0.11278533935546875, -2.573272705078125, 2.2123870849609375, 2.2147140502929688, 3.3162612915039062, 1.31658935546875, 2.2265357971191406, 1.52276611328125, 0.22772979736328125, 1.5133628845214844, 1.96209716796875, -2.648284912109375, 2.30999755859375, 0.6967010498046875, 1.2007026672363281, 1.7823104858398438, 0.00112152099609375, 0.6207809448242188, 1.2574481964111328, 1.809762954711914, -0.5317478179931641, -0.7824172973632812, 2.42181396484375, 3.8467483520507812, 0.03903961181640625, 4.974609375, 2.0700912475585938, -0.21262359619140625, -1.4701671600341797, -1.308349609375, 1.56951904296875, 1.2957725524902344, 0.34291839599609375, 1.67877197265625, 1.0521011352539062, 4.0342559814453125, 0.4362373352050781, 1.8095760345458984, -0.03607177734375, 7.819484710693359, -0.9096031188964844, 1.7777328491210938, 3.0186767578125, 3.0479049682617188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000385.npy"} +{"epoch": 0.582010582010582, "step": 386, "batch_size": 64, "mean": 1.2290531396865845, "std": 1.8740922212600708, "min": -2.82183837890625, "p10": -0.6519313812255859, "median": 1.1783380508422852, "p90": 3.5541496276855478, "max": 7.071990966796875, "pos_frac": 0.75, "sample": [7.071990966796875, 0.1464824676513672, -0.36037445068359375, 1.5585498809814453, 3.0402603149414062, 1.17559814453125, -0.38105010986328125, 3.64642333984375, 0.6558952331542969, 1.6175575256347656, 0.0638885498046875, -2.31182861328125, -1.5123043060302734, 0.17433929443359375, -0.664642333984375, 3.6813278198242188, 1.7979583740234375, 4.993398666381836, 2.2645721435546875, 0.624359130859375, 1.9205951690673828, 2.1389541625976562, 2.0364151000976562, -0.8162307739257812, -0.3196563720703125, 1.4086074829101562, 5.00871467590332, 0.2336273193359375, 1.84698486328125, -0.11635971069335938, 1.8780593872070312, 3.0479507446289062, -0.524993896484375, 0.393157958984375, -2.0350189208984375, -0.18672943115234375, 1.3688583374023438, 0.200775146484375, 1.1810779571533203, 2.5587158203125, 0.8444595336914062, -0.23683929443359375, 1.616384506225586, 5.4385223388671875, 2.7746963500976562, -0.6222724914550781, 0.290374755859375, 1.6908035278320312, -0.2519969940185547, 3.9941253662109375, 0.8909683227539062, 1.32421875, -1.460906982421875, 1.5474090576171875, 3.059112548828125, 1.2050857543945312, 0.20290374755859375, 0.9814910888671875, 2.58367919921875, 3.2524948120117188, 3.3388442993164062, 0.20402145385742188, 0.3077545166015625, -2.82183837890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000386.npy"} +{"epoch": 0.5835222978080121, "step": 387, "batch_size": 64, "mean": 1.5613082647323608, "std": 1.4970638751983643, "min": -2.5931396484375, "p10": -0.2578332901000976, "median": 1.3995170593261719, "p90": 3.0964111328125, "max": 5.0045166015625, "pos_frac": 0.84375, "sample": [2.592041015625, 0.7851066589355469, 2.869384765625, -0.010793685913085938, 0.9128265380859375, 3.0641326904296875, 1.9537811279296875, 0.24035263061523438, 1.848581314086914, 5.0045166015625, 1.1693000793457031, -0.3943634033203125, -0.29595184326171875, 0.3702392578125, 3.008220672607422, 1.0422439575195312, 1.415313720703125, 3.1102447509765625, 0.8876762390136719, 1.07073974609375, 2.62640380859375, 2.6116867065429688, 2.2929153442382812, 0.5618858337402344, -1.4442596435546875, 2.1917724609375, 0.6586532592773438, -0.3263225555419922, 2.2888221740722656, 1.1808509826660156, 1.0618820190429688, 0.0882720947265625, 0.8367156982421875, 4.479785919189453, 0.8603515625, 2.7543792724609375, 1.2476119995117188, -0.301727294921875, 1.5957393646240234, 0.40938568115234375, -0.16888999938964844, 3.0272064208984375, 2.0934066772460938, -2.5931396484375, -0.003162384033203125, 1.9529190063476562, 1.3837203979492188, 4.474464416503906, 0.5309944152832031, 2.630901336669922, -1.0282325744628906, 3.0538902282714844, 2.8140106201171875, 2.6997833251953125, 0.1715545654296875, 1.1862335205078125, 2.9537734985351562, 1.7131938934326172, 1.81048583984375, 3.967376708984375, 4.417755126953125, 1.2655620574951172, 3.629121780395508, 1.6224002838134766], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000387.npy"} +{"epoch": 0.5850340136054422, "step": 388, "batch_size": 64, "mean": 1.2679840326309204, "std": 2.0498664379119873, "min": -2.3985252380371094, "p10": -0.995630645751953, "median": 0.7608747482299805, "p90": 4.070112037658692, "max": 6.744720458984375, "pos_frac": 0.734375, "sample": [2.4297332763671875, 5.97515869140625, 2.15789794921875, 5.877475738525391, 0.0119781494140625, 2.9222869873046875, 2.872467041015625, 0.0005340576171875, -1.9209365844726562, -0.6192283630371094, 2.5998764038085938, 2.0723495483398438, -0.9279708862304688, 1.0891151428222656, -1.3042678833007812, 6.744720458984375, -1.4811439514160156, 3.839202880859375, 0.3228607177734375, 1.6767616271972656, 2.550750732421875, 0.3164520263671875, 2.8704376220703125, 0.43235015869140625, 2.9612560272216797, -2.3985252380371094, 0.49957275390625, 1.6166000366210938, 0.01053619384765625, 1.4011859893798828, -0.7554798126220703, -0.7094841003417969, -0.01908111572265625, 4.62469482421875, -0.11223411560058594, 1.4044036865234375, 0.028972625732421875, -1.344268798828125, 1.8136634826660156, -0.6856689453125, 3.1035385131835938, 5.1464996337890625, 0.0219573974609375, 3.7201690673828125, 3.108020782470703, 1.419912338256836, 0.129638671875, 1.071380615234375, 1.77142333984375, 4.169073104858398, 0.7450408935546875, 3.5178375244140625, -0.5679702758789062, 0.662261962890625, -2.123523712158203, -1.024627685546875, 4.4002227783203125, 1.5859832763671875, -0.32567405700683594, -0.031223297119140625, 0.5813941955566406, 0.31854248046875, 0.12938690185546875, 0.7767086029052734], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000388.npy"} +{"epoch": 0.5865457294028723, "step": 389, "batch_size": 64, "mean": 1.5020864009857178, "std": 1.9537131786346436, "min": -3.114459991455078, "p10": -0.7388311386108398, "median": 1.5492630004882812, "p90": 3.8438108444213874, "max": 5.939323425292969, "pos_frac": 0.78125, "sample": [-1.8190841674804688, -1.1168899536132812, 2.0117645263671875, 0.30918121337890625, 0.9440670013427734, 0.7876663208007812, 1.8007240295410156, 5.343086242675781, 4.64532470703125, 4.0846710205078125, 0.20928955078125, -0.8245620727539062, 3.43988037109375, 2.313882827758789, 2.4598159790039062, 3.9340343475341797, 2.23651123046875, 2.008373260498047, 2.5560302734375, 1.7810859680175781, 3.5896224975585938, 1.983673095703125, 0.48839569091796875, 3.2857818603515625, 3.0534210205078125, -0.68951416015625, 1.8232536315917969, 0.2677192687988281, 3.3413543701171875, 1.803152084350586, -0.74505615234375, 1.489065170288086, -0.5678176879882812, 3.633289337158203, 3.4803924560546875, 1.760101318359375, 5.939323425292969, 0.29638671875, -0.6979293823242188, -3.114459991455078, 3.0716400146484375, -0.10909843444824219, 1.0095272064208984, -0.30767822265625, 1.8106250762939453, -0.4965667724609375, 3.0490341186523438, 0.6187782287597656, -2.9099273681640625, 1.539215087890625, 0.9808807373046875, 1.0955562591552734, 3.42974853515625, 0.06973457336425781, -0.9592056274414062, 5.647911071777344, 4.702056884765625, 1.5593109130859375, 0.60321044921875, 3.1381683349609375, -0.7243061065673828, 0.7022018432617188, 0.6046981811523438, 0.48300933837890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000389.npy"} +{"epoch": 0.5880574452003023, "step": 390, "batch_size": 64, "mean": 1.3320645093917847, "std": 1.678733229637146, "min": -1.714599609375, "p10": -0.732942771911621, "median": 1.0116057395935059, "p90": 3.6923004150390626, "max": 5.657508850097656, "pos_frac": 0.75, "sample": [-1.714599609375, 0.6287212371826172, 1.9983901977539062, -0.2503204345703125, 3.7794628143310547, -0.29447174072265625, 1.4107589721679688, 2.2013282775878906, -0.26148223876953125, 1.7744522094726562, 0.0313720703125, -0.6332168579101562, 2.128528594970703, -0.45093727111816406, -0.9520950317382812, 5.657508850097656, 1.3910331726074219, -0.7756824493408203, 0.8741989135742188, -0.8844223022460938, 2.8848419189453125, -0.1205596923828125, 2.302579879760742, 3.6654052734375, 2.64727783203125, 2.549102783203125, 0.8957595825195312, -1.0124130249023438, -0.15288162231445312, 5.1122894287109375, 0.005096435546875, 1.1274518966674805, 3.3453807830810547, 3.1796722412109375, -0.4723491668701172, 0.2600250244140625, -1.4863319396972656, 3.703826904296875, 4.163116455078125, 0.5803737640380859, 4.059539794921875, 0.7586727142333984, 2.3359298706054688, 1.1581554412841797, 0.6891803741455078, 2.6005325317382812, 0.8093490600585938, 1.3598747253417969, 2.686676025390625, 0.6479644775390625, 2.7884521484375, 1.8427696228027344, 0.7406501770019531, 3.3375606536865234, 0.6377239227294922, 0.2664642333984375, 0.39998817443847656, 1.3786468505859375, -0.000743865966796875, 0.2348041534423828, 2.2620468139648438, 2.0641937255859375, -0.8949661254882812, 4.252471923828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000390.npy"} +{"epoch": 0.5895691609977324, "step": 391, "batch_size": 64, "mean": 1.5918623208999634, "std": 2.3532907962799072, "min": -3.6418914794921875, "p10": -1.4535974502563476, "median": 1.4644336700439453, "p90": 4.298081588745117, "max": 9.187911987304688, "pos_frac": 0.75, "sample": [1.0637664794921875, -3.6418914794921875, 4.263690948486328, 1.5386276245117188, 1.3647308349609375, 2.2109146118164062, -0.5253944396972656, 3.6457366943359375, 4.6243743896484375, 6.24078369140625, 2.4363632202148438, -2.636322021484375, 3.60198974609375, -0.4679718017578125, 5.796131134033203, 4.3128204345703125, -1.2744522094726562, 1.3284378051757812, -0.6193084716796875, 0.103485107421875, 1.2875595092773438, 1.9455947875976562, 2.0159149169921875, 2.7748336791992188, 0.7698745727539062, 0.3262004852294922, -1.5733413696289062, -1.4084148406982422, 2.9583282470703125, 2.4994468688964844, 9.187911987304688, 2.2454986572265625, 1.3902397155761719, -1.4081878662109375, 0.16009521484375, -0.45641326904296875, -0.26215171813964844, 2.842803955078125, 5.2194671630859375, 2.1210708618164062, 0.7301254272460938, 5.001930236816406, -1.8815536499023438, -1.8430824279785156, 1.9132003784179688, 1.3772506713867188, 4.058067321777344, 1.8019561767578125, 2.843414306640625, 3.530731201171875, -1.47296142578125, -1.9763412475585938, 3.744709014892578, 3.578601837158203, 3.0378952026367188, 3.798828125, 2.6386775970458984, 1.220560073852539, 0.1813507080078125, 0.6250267028808594, -0.3650054931640625, 1.6176185607910156, 0.6145515441894531, 1.1007919311523438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000391.npy"} +{"epoch": 0.5910808767951625, "step": 392, "batch_size": 64, "mean": 1.761289358139038, "std": 2.3629860877990723, "min": -3.5749988555908203, "p10": -0.9417901992797851, "median": 1.6169509887695312, "p90": 5.059535980224611, "max": 8.90728759765625, "pos_frac": 0.78125, "sample": [-1.9464950561523438, 0.10890960693359375, 2.0499191284179688, 2.2684783935546875, 0.76678466796875, 0.9252777099609375, 5.44898796081543, -0.2424774169921875, 5.438507080078125, 2.787252426147461, 0.4565887451171875, -0.780029296875, 2.031280517578125, 3.2661170959472656, 4.083232879638672, 1.7517242431640625, 0.6628494262695312, 2.0754165649414062, 5.5927886962890625, 3.0161590576171875, 1.101287841796875, 2.9177703857421875, 1.6119232177734375, -3.5749988555908203, 4.608970642089844, 1.570953369140625, 3.2211990356445312, 1.3964920043945312, 2.6752471923828125, -2.6173858642578125, 0.01956939697265625, 4.724494934082031, 2.1374664306640625, 0.4615478515625, -0.96771240234375, 3.3517379760742188, 1.213958740234375, 2.7908973693847656, 1.7888221740722656, 4.165332794189453, 5.606803894042969, 5.203125, -0.9963893890380859, 8.006637573242188, 0.2663135528564453, 0.9177665710449219, -0.9191341400146484, -0.1828460693359375, 4.090583801269531, 1.621978759765625, -0.5328369140625, 8.90728759765625, -0.228973388671875, 2.7098388671875, -0.7230148315429688, 0.1190948486328125, 2.39996337890625, -1.009613037109375, 0.604156494140625, 0.4947357177734375, 0.9796600341796875, 1.9863033294677734, -0.9514999389648438, 1.9937286376953125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000392.npy"} +{"epoch": 0.5925925925925926, "step": 393, "batch_size": 64, "mean": 1.5697075128555298, "std": 1.651499629020691, "min": -2.1165924072265625, "p10": -0.1446670532226562, "median": 1.4100990295410156, "p90": 4.264827728271485, "max": 5.025276184082031, "pos_frac": 0.859375, "sample": [1.3748931884765625, 0.3630180358886719, 4.348320007324219, 2.162151336669922, 4.3248748779296875, 3.0616073608398438, 1.2056503295898438, 0.96771240234375, -0.09340286254882812, 1.0797615051269531, 0.03845405578613281, 1.4453048706054688, 4.774639129638672, 2.1735267639160156, 2.4214038848876953, 0.18785476684570312, 1.5173873901367188, 2.3516769409179688, 0.3299102783203125, 2.567371368408203, 0.6404476165771484, 0.7488632202148438, 0.543701171875, 4.794258117675781, 1.202178955078125, 3.433807373046875, 0.4182472229003906, 5.025276184082031, 2.068817138671875, 2.3806686401367188, 2.7044296264648438, -0.6390228271484375, 3.0276336669921875, 4.632984161376953, 0.04528236389160156, 1.462127685546875, 0.04915046691894531, 3.1455230712890625, 1.45953369140625, -0.08519554138183594, 1.4470634460449219, 0.15264892578125, -0.16663742065429688, -0.9403343200683594, 0.9836349487304688, 1.7494316101074219, 1.931976318359375, 0.6658782958984375, 0.012456893920898438, 3.695556640625, 3.6131057739257812, 4.444709777832031, 1.994058609008789, -1.2013320922851562, 0.9691543579101562, 2.8996353149414062, 1.0091629028320312, -0.7398681640625, -0.409149169921875, -2.1165924072265625, 1.793375015258789, 4.124717712402344, 0.8083648681640625, 0.079437255859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000393.npy"} +{"epoch": 0.5941043083900227, "step": 394, "batch_size": 64, "mean": 1.4036844968795776, "std": 2.0321009159088135, "min": -2.143505096435547, "p10": -1.3275209426879881, "median": 1.1866464614868164, "p90": 4.826760864257814, "max": 6.951774597167969, "pos_frac": 0.78125, "sample": [-1.3933029174804688, 4.9900360107421875, 4.4457855224609375, 1.0937995910644531, 0.7708969116210938, 2.3665924072265625, -0.630615234375, 1.6378402709960938, 5.041404724121094, 0.9795665740966797, 5.82879638671875, 2.16741943359375, -0.5768852233886719, -1.0911293029785156, -1.5524559020996094, 1.3257179260253906, 1.1937789916992188, -2.143505096435547, 1.284423828125, 1.2049217224121094, 1.3086280822753906, -1.7498970031738281, 1.321249008178711, 2.6328964233398438, 2.9735050201416016, 5.367954254150391, 2.36309814453125, 2.251697540283203, 1.179513931274414, 0.08786773681640625, -1.3730907440185547, 0.4221954345703125, 6.951774597167969, 1.7304611206054688, 0.7625961303710938, 2.0856475830078125, 0.011541366577148438, -1.22119140625, 6.033294677734375, 0.19085311889648438, 2.9103145599365234, -2.0088539123535156, 0.499237060546875, 0.9920196533203125, 1.9766845703125, 5.282142639160156, -1.3882198333740234, 2.3245315551757812, -0.5631256103515625, 1.1728591918945312, -1.0114631652832031, 0.970184326171875, 1.1730422973632812, 0.6038055419921875, 2.26239013671875, 1.0750141143798828, 2.6267528533935547, 1.849905014038086, 0.9374961853027344, 0.685821533203125, -0.106536865234375, 2.426553726196289, 2.2740097045898438, 2.597564697265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000394.npy"} +{"epoch": 0.5956160241874527, "step": 395, "batch_size": 64, "mean": 1.2934160232543945, "std": 1.7405805587768555, "min": -2.490528106689453, "p10": -0.9644105911254882, "median": 1.1672191619873047, "p90": 3.5950607299804687, "max": 5.013586044311523, "pos_frac": 0.765625, "sample": [2.0553836822509766, 2.8480224609375, 2.5460662841796875, -0.07166290283203125, 3.1380844116210938, 2.99945068359375, -0.04943275451660156, 3.5126304626464844, 3.5846786499023438, -1.0372600555419922, 1.1302337646484375, 2.297271728515625, 3.974212646484375, 3.7945022583007812, 0.3291015625, 5.013586044311523, 1.8147125244140625, 2.0357818603515625, -0.894439697265625, 1.2387161254882812, 0.6172103881835938, 0.9351272583007812, 0.7030296325683594, 1.1320762634277344, 2.001150131225586, -0.5594959259033203, 2.6824417114257812, 2.3909988403320312, 1.6335315704345703, -1.438446044921875, 1.4728622436523438, 3.26507568359375, 2.3574981689453125, -0.20708847045898438, -1.2503509521484375, 3.0900039672851562, -2.490528106689453, 0.08374404907226562, 3.1245498657226562, -0.9943981170654297, -0.5912094116210938, 0.2636070251464844, 3.844085693359375, 0.13057708740234375, 4.119758605957031, 3.5995101928710938, 0.8152656555175781, 1.3359527587890625, -0.6858596801757812, -0.30950164794921875, 0.6520519256591797, 4.6678619384765625, 0.082061767578125, 0.9920997619628906, 2.4691314697265625, 1.8790740966796875, 0.16596603393554688, 0.2661151885986328, 1.2687969207763672, -2.466552734375, 1.140420913696289, -1.4014053344726562, 1.1940174102783203, 0.5381698608398438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000395.npy"} +{"epoch": 0.5971277399848829, "step": 396, "batch_size": 64, "mean": 1.4940763711929321, "std": 1.769687294960022, "min": -1.7030029296875, "p10": -0.5379468917846679, "median": 1.4396238327026367, "p90": 3.8821578979492197, "max": 6.031274795532227, "pos_frac": 0.765625, "sample": [4.184539794921875, 4.7670440673828125, 2.1575775146484375, 5.541465759277344, 3.6412830352783203, 6.031274795532227, 0.5334434509277344, 1.5173606872558594, 0.18350982666015625, 0.5993690490722656, 1.7817916870117188, -0.6813430786132812, 1.9290504455566406, 0.2520256042480469, 1.0586166381835938, 0.20236968994140625, 3.5093994140625, 3.2973575592041016, -0.5300655364990234, 1.7037162780761719, -0.41748046875, -0.5135936737060547, 5.715599060058594, 0.9373054504394531, 1.16693115234375, 2.7180938720703125, 1.2119331359863281, -1.7030029296875, 1.520263671875, 1.9766426086425781, 0.8575458526611328, -0.5413246154785156, 0.9679145812988281, 4.091583251953125, 1.94293212890625, 2.1684951782226562, 1.7223777770996094, 1.6900482177734375, 3.9722137451171875, 3.672027587890625, -0.5570106506347656, 1.979635238647461, -0.11947059631347656, 1.880401611328125, 0.23587799072265625, -0.7866668701171875, 0.35881805419921875, 0.0281219482421875, 1.8961029052734375, -0.5798301696777344, 1.7019271850585938, -1.4022789001464844, 1.361886978149414, -0.01587677001953125, -0.2665576934814453, 2.893085479736328, 1.9294281005859375, 2.5223655700683594, 0.9257984161376953, 0.6939239501953125, -0.4774932861328125, 3.6385135650634766, -0.4826698303222656, 3.426563262939453], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000396.npy"} +{"epoch": 0.5986394557823129, "step": 397, "batch_size": 64, "mean": 0.9081062078475952, "std": 2.217355489730835, "min": -7.662967681884766, "p10": -1.761421203613281, "median": 0.9890365600585938, "p90": 3.4814643859863286, "max": 5.506797790527344, "pos_frac": 0.71875, "sample": [0.19399070739746094, 1.534881591796875, -2.520719528198242, 1.5161514282226562, -1.1143417358398438, 0.8344497680664062, 2.505870819091797, 1.3820877075195312, 0.8953018188476562, 1.6871795654296875, 0.8410263061523438, 2.445636749267578, 1.9310302734375, 5.506797790527344, 3.0431137084960938, 2.7658615112304688, -3.684478759765625, -1.89483642578125, 0.22668838500976562, 0.024677276611328125, 3.062936782836914, -2.4712371826171875, 1.6552600860595703, -0.134185791015625, 1.1688385009765625, -2.3982696533203125, -0.849029541015625, 0.7276687622070312, 2.8763046264648438, 3.5249862670898438, 0.29737091064453125, 0.6173648834228516, 4.104461669921875, 3.379913330078125, 4.55748176574707, 1.322021484375, 2.4525489807128906, 4.6993408203125, 3.0322017669677734, 0.5222263336181641, 1.623382568359375, 2.484100341796875, 1.9997005462646484, 1.0203170776367188, -7.662967681884766, 1.354827880859375, 3.767791748046875, -1.0099945068359375, 0.13468551635742188, -1.4501190185546875, 1.2877464294433594, 0.9214706420898438, 0.972320556640625, -0.1797962188720703, -0.258331298828125, -0.12088394165039062, 0.7836112976074219, 1.1072406768798828, -1.1599578857421875, 1.0057525634765625, -2.4354209899902344, -1.052825927734375, -0.28163719177246094, 4.999208450317383], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000397.npy"} +{"epoch": 0.600151171579743, "step": 398, "batch_size": 64, "mean": 1.4355957508087158, "std": 1.6893250942230225, "min": -1.9839744567871094, "p10": -0.38080368041992185, "median": 1.0825481414794922, "p90": 3.840402221679688, "max": 5.475765228271484, "pos_frac": 0.78125, "sample": [0.4805450439453125, 0.6040840148925781, 1.3999176025390625, 0.0964813232421875, 1.2690620422363281, 0.7516975402832031, 4.967071533203125, 4.466339111328125, -0.8778953552246094, 5.475765228271484, 2.5276412963867188, 0.9872283935546875, -0.36322784423828125, -0.26477813720703125, 2.4418487548828125, -0.2479248046875, 2.3216514587402344, -1.467041015625, 1.109039306640625, -1.1277790069580078, 1.68048095703125, -0.10311508178710938, 4.772064208984375, 3.9023284912109375, -0.2552947998046875, 3.2329235076904297, 5.19549560546875, 1.3220443725585938, 1.5840339660644531, 1.1541748046875, 0.8925857543945312, 1.0291481018066406, -0.073638916015625, 3.3755950927734375, 1.880218505859375, -1.0193462371826172, -0.496551513671875, 3.6959075927734375, -0.1446075439453125, 1.0496330261230469, -0.388336181640625, 0.9239425659179688, 2.0555362701416016, 0.6812782287597656, 1.9635200500488281, 2.4904861450195312, 0.8739757537841797, 0.8341140747070312, 0.4359779357910156, 2.635343551635742, 0.14286041259765625, 5.099700927734375, 2.005115509033203, 1.0560569763183594, 1.6441497802734375, 0.39492034912109375, 2.4976959228515625, -1.9839744567871094, 2.3363265991210938, 0.8579120635986328, 2.146129608154297, 2.0404415130615234, 3.1484317779541016, 0.7627182006835938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000398.npy"} +{"epoch": 0.6016628873771731, "step": 399, "batch_size": 64, "mean": 1.6412285566329956, "std": 2.3538200855255127, "min": -2.3163223266601562, "p10": -0.7989486694335936, "median": 1.0020675659179688, "p90": 4.4313606262207035, "max": 9.503326416015625, "pos_frac": 0.78125, "sample": [0.21724700927734375, 0.8843841552734375, 1.1608428955078125, 2.62353515625, -0.942169189453125, 3.1129150390625, 3.2921142578125, 1.6301155090332031, -1.0197563171386719, -2.3163223266601562, -1.2816925048828125, 9.503326416015625, 0.8643360137939453, -1.8236732482910156, 1.5926055908203125, 0.11370849609375, 1.712646484375, 1.8018798828125, 0.925933837890625, 5.53021240234375, -1.41668701171875, 0.9141693115234375, -0.6468276977539062, 2.890899658203125, 4.9404296875, -0.8641433715820312, 1.7682952880859375, 9.312835693359375, -0.4465484619140625, 0.28224945068359375, 1.5218429565429688, 0.042095184326171875, 2.180267333984375, 0.4301261901855469, 0.17791748046875, -0.5008773803710938, -0.12351226806640625, 2.8298110961914062, 0.5370025634765625, 2.3103866577148438, 7.024269104003906, -0.2028331756591797, 3.652496337890625, 1.0782012939453125, 2.97332763671875, 0.6948509216308594, 2.182483673095703, 0.7240257263183594, -0.25316619873046875, 0.731414794921875, 3.9988346099853516, 4.333473205566406, 5.2902069091796875, 0.8324432373046875, 2.4031448364257812, 3.542327880859375, 1.1647720336914062, 4.4733123779296875, -0.0970916748046875, 0.3794746398925781, 4.033733367919922, 0.15831756591796875, 0.08724212646484375, 2.1114425659179688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000399.npy"} +{"epoch": 0.6031746031746031, "step": 400, "batch_size": 64, "mean": 1.0737799406051636, "std": 1.8462156057357788, "min": -2.256378173828125, "p10": -0.7502927780151367, "median": 0.5054397583007812, "p90": 3.6915838241577155, "max": 5.772357940673828, "pos_frac": 0.6875, "sample": [-0.4285736083984375, 0.405731201171875, 0.014301300048828125, 4.9836273193359375, 2.5588760375976562, 3.9744033813476562, -1.1083450317382812, 2.3886146545410156, 1.66119384765625, 3.773691177368164, 0.7338447570800781, -0.5209293365478516, 0.08647537231445312, 0.49016761779785156, -0.2749595642089844, 0.880615234375, 2.528961181640625, 4.303152084350586, 0.8363265991210938, 3.0071372985839844, 0.6534423828125, 0.5140609741210938, 3.1881256103515625, -0.4947185516357422, 0.971343994140625, 2.313070297241211, -0.4607677459716797, 1.2071533203125, 0.14189910888671875, 0.08762550354003906, -1.065338134765625, -0.3921031951904297, 1.73583984375, 1.1603469848632812, 5.772357940673828, -0.4450950622558594, 0.13481903076171875, -0.46481895446777344, 2.7502288818359375, 0.4359016418457031, 3.3736610412597656, 3.5, -2.0588226318359375, -0.360565185546875, 2.0344772338867188, 0.5714340209960938, 1.1500625610351562, -0.7130889892578125, 2.0801544189453125, 4.712852478027344, -0.4935798645019531, 0.26544952392578125, -0.7662372589111328, 0.49681854248046875, -1.2486629486083984, 5.625152587890625, 3.132488250732422, -2.256378173828125, -0.3310546875, 0.0582275390625, -0.9833335876464844, 0.4206123352050781, -0.0389404296875, 2.5135040283203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000400.npy"} +{"epoch": 0.6046863189720333, "step": 401, "batch_size": 64, "mean": 1.8959497213363647, "std": 2.1779561042785645, "min": -2.047283172607422, "p10": -0.39491615295410154, "median": 1.7367706298828125, "p90": 4.731573104858399, "max": 8.464340209960938, "pos_frac": 0.78125, "sample": [-1.1097564697265625, 3.3543014526367188, 3.25067138671875, 1.6927337646484375, -0.7492294311523438, 0.3588829040527344, 1.6391143798828125, 1.9228515625, 0.29911041259765625, -0.29544830322265625, 4.333198547363281, 0.12903594970703125, -1.5981483459472656, -0.33045196533203125, 2.0617752075195312, -0.09647369384765625, 4.4577484130859375, 4.851409912109375, 6.280454635620117, 4.378143310546875, -0.41063690185546875, 1.6548233032226562, 0.3957023620605469, 2.303302764892578, -1.31005859375, -2.047283172607422, 1.831298828125, 2.719257354736328, 1.1153373718261719, 1.7808074951171875, 2.581146240234375, 2.2967300415039062, 1.412343978881836, 4.226654052734375, -0.14354705810546875, 3.096466064453125, 0.027421951293945312, 1.5803184509277344, -0.3582344055175781, 2.8592071533203125, 0.2228546142578125, 4.444520950317383, 3.089263916015625, -0.7041893005371094, 0.734619140625, 2.2458038330078125, -0.289276123046875, 5.7248382568359375, 3.179443359375, 1.9478797912597656, 1.8132266998291016, 0.7012414932250977, 1.1486339569091797, 1.4583740234375, -0.3403892517089844, 1.5588932037353516, 5.305103302001953, 2.1290035247802734, 4.848926544189453, 7.4605560302734375, 2.3430538177490234, 0.23626327514648438, 3.1768150329589844, 8.464340209960938], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000401.npy"} +{"epoch": 0.6061980347694633, "step": 402, "batch_size": 64, "mean": 1.57255220413208, "std": 2.0196876525878906, "min": -2.0258331298828125, "p10": -0.6316221237182615, "median": 1.364212989807129, "p90": 4.470912170410159, "max": 6.971858978271484, "pos_frac": 0.78125, "sample": [3.4653377532958984, 3.5107498168945312, 2.5927085876464844, 2.038654327392578, 1.6973342895507812, 1.8792438507080078, 4.8080596923828125, -0.1341552734375, 1.6079025268554688, 5.887748718261719, 1.3543567657470703, 4.815361022949219, 1.832632064819336, -1.8283843994140625, 3.684234619140625, 1.5323638916015625, 0.78948974609375, 1.9206714630126953, 0.604248046875, 0.8686676025390625, 6.971858978271484, 0.10387802124023438, 0.13753509521484375, -1.558380126953125, 2.920135498046875, -0.08518028259277344, 1.1678543090820312, 5.5047149658203125, -0.3787879943847656, 0.7829551696777344, 5.8758697509765625, 6.2763824462890625, 1.3740692138671875, 0.40320587158203125, 3.0547027587890625, -0.05045318603515625, 2.221893310546875, 3.4997692108154297, 1.217132568359375, 0.7873878479003906, -0.41732215881347656, -1.6592826843261719, -2.0258331298828125, 3.626008987426758, 3.17364501953125, 0.2610645294189453, -0.36585044860839844, 0.97052001953125, 0.2618408203125, 0.5965347290039062, 2.2528514862060547, 1.6084136962890625, 2.315723419189453, 1.9586334228515625, 0.2029876708984375, -0.1915607452392578, 1.0348243713378906, -0.7234649658203125, -1.2022857666015625, 2.0988922119140625, 1.6365623474121094, 2.3397445678710938, -1.40447998046875, 1.1414108276367188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000402.npy"} +{"epoch": 0.6077097505668935, "step": 403, "batch_size": 64, "mean": 1.9954662322998047, "std": 2.306568145751953, "min": -3.130523681640625, "p10": -0.7831130981445312, "median": 1.6912555694580078, "p90": 4.731451797485351, "max": 8.2073974609375, "pos_frac": 0.8125, "sample": [-0.7926483154296875, 2.314971923828125, 3.42535400390625, 2.3434524536132812, 4.200981140136719, -0.36138916015625, 7.282867431640625, 1.8784141540527344, 0.31758880615234375, 2.8680496215820312, 4.260936737060547, -1.054840087890625, 5.61860466003418, 1.3579292297363281, 5.291864395141602, 1.6065216064453125, 1.6014156341552734, 0.9438800811767578, 0.8820877075195312, 0.4369468688964844, -0.7595691680908203, -1.0174064636230469, 8.2073974609375, 6.901237487792969, 0.198944091796875, 3.6981582641601562, 2.0070037841796875, 0.7949047088623047, 1.6866950988769531, 2.5529403686523438, -0.033657073974609375, 1.149383544921875, 1.6958160400390625, 2.0707626342773438, 1.928567886352539, -1.1795692443847656, 0.8856029510498047, 6.658531188964844, 0.6800270080566406, 1.618621826171875, 0.6263351440429688, 0.2741050720214844, 4.710247039794922, 1.3714027404785156, 3.306180953979492, 0.36016845703125, 2.322978973388672, 4.178611755371094, 3.313800811767578, 1.1871337890625, 4.483612060546875, 3.266674041748047, -3.130523681640625, -1.4658164978027344, 3.2666015625, -2.0312671661376953, 4.74053955078125, 3.6414642333984375, -0.588287353515625, 3.3423843383789062, -0.7608642578125, 1.5751800537109375, 3.65289306640625, 1.8989009857177734], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000403.npy"} +{"epoch": 0.6092214663643235, "step": 404, "batch_size": 64, "mean": 1.3852319717407227, "std": 1.9132604598999023, "min": -2.665729522705078, "p10": -1.0562820434570312, "median": 1.3370094299316406, "p90": 4.0245153427124025, "max": 5.999019622802734, "pos_frac": 0.765625, "sample": [0.99969482421875, 0.8114242553710938, 2.54425048828125, -1.12835693359375, 0.27059364318847656, -0.024845123291015625, 4.06982421875, 0.42637062072753906, -0.8164634704589844, -1.2447052001953125, 2.6217041015625, 1.836578369140625, -2.665729522705078, -1.7618675231933594, 0.11721420288085938, 2.1657867431640625, -0.42836570739746094, -2.4165706634521484, 2.9932632446289062, 3.113861083984375, 2.9516143798828125, 5.690757751464844, 2.811798095703125, 4.200325012207031, -0.705780029296875, 1.0764999389648438, 0.019580841064453125, 1.2431640625, 0.6598396301269531, 3.1004981994628906, 0.5164718627929688, 2.4193954467773438, -0.1278057098388672, 3.918794631958008, -1.0623016357421875, -0.261199951171875, 0.3145751953125, 1.3629989624023438, 0.23883819580078125, 0.7805442810058594, 1.7431716918945312, 1.3863983154296875, 1.9386787414550781, 0.753326416015625, 5.200222015380859, 3.203296661376953, 1.3110198974609375, 2.3624496459960938, 5.999019622802734, 0.9748725891113281, 0.163330078125, 5.0733642578125, 4.336578369140625, 1.8426666259765625, 2.1757850646972656, 2.0654296875, 2.865875244140625, -1.0705146789550781, -0.7491607666015625, -1.042236328125, 1.7964248657226562, 2.6078453063964844, 1.5967388153076172, 1.4879913330078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000404.npy"} +{"epoch": 0.6107331821617535, "step": 405, "batch_size": 64, "mean": 1.5051651000976562, "std": 2.268017053604126, "min": -2.9352340698242188, "p10": -0.8560075759887694, "median": 1.1609439849853516, "p90": 4.1251829147338865, "max": 10.236854553222656, "pos_frac": 0.703125, "sample": [3.43701171875, -0.18260574340820312, -0.8995037078857422, 10.236854553222656, 1.905303955078125, 1.7983779907226562, 4.1314239501953125, 2.7300872802734375, 0.26786041259765625, -2.9352340698242188, -0.3813629150390625, 0.9538555145263672, -0.6373367309570312, -1.2337017059326172, 2.0064239501953125, 0.6282119750976562, 5.38922119140625, -0.2693462371826172, -1.23931884765625, 1.675018310546875, 2.948284149169922, 2.757080078125, 0.9759368896484375, 4.31646728515625, -1.0413150787353516, 0.8344879150390625, 3.288177490234375, 3.5495662689208984, -0.6764144897460938, 1.7965850830078125, 1.0178871154785156, -0.5332984924316406, 0.6761932373046875, 2.0315017700195312, -0.09695053100585938, 2.4388465881347656, 1.1840400695800781, -0.12121772766113281, 1.6602630615234375, 5.225898742675781, 1.068166732788086, -0.2614612579345703, 1.2330093383789062, 1.308563232421875, 0.437957763671875, 0.6331081390380859, 0.558258056640625, 1.6166191101074219, 3.77325439453125, 2.283712387084961, -0.3158378601074219, 1.137847900390625, 3.8077468872070312, 0.7574081420898438, 4.110620498657227, 8.712381362915039, -1.628021240234375, 2.0017852783203125, 1.6175956726074219, -0.22364044189453125, -0.7545166015625, 4.2450408935546875, 1.5119438171386719, -0.9142303466796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000405.npy"} +{"epoch": 0.6122448979591837, "step": 406, "batch_size": 64, "mean": 1.7559325695037842, "std": 1.932299017906189, "min": -2.6197509765625, "p10": -0.48759784698486325, "median": 1.6341629028320312, "p90": 4.275764465332031, "max": 6.4035491943359375, "pos_frac": 0.765625, "sample": [2.4589767456054688, 0.6775665283203125, -0.38375091552734375, -0.9776229858398438, 1.83880615234375, 2.9616546630859375, 4.22222900390625, 4.2777557373046875, -1.8607730865478516, 4.2711181640625, 2.463695526123047, 1.2663688659667969, 1.1898155212402344, -0.266571044921875, 3.2721939086914062, 1.645782470703125, 4.40679931640625, 0.9310588836669922, 1.589141845703125, 6.4035491943359375, 2.7655868530273438, 1.3787384033203125, 2.1225204467773438, 4.415412902832031, 0.8828582763671875, 3.2532882690429688, -0.5600032806396484, 1.6477813720703125, -0.27747154235839844, -0.1958751678466797, -0.2735023498535156, -0.4922161102294922, -2.6197509765625, -0.7548503875732422, 3.0909957885742188, 3.3494300842285156, 0.2888526916503906, 3.3821182250976562, -0.4768218994140625, 4.913116455078125, 2.415740966796875, -0.3758811950683594, 0.9122543334960938, 5.803047180175781, 0.5403213500976562, 2.592205047607422, -0.8041801452636719, 0.7268562316894531, 3.860057830810547, 3.6597671508789062, 3.5869007110595703, -0.22314453125, 4.6649932861328125, 2.5574188232421875, 0.1687946319580078, 0.718292236328125, 1.6225433349609375, 2.7214508056640625, 0.24613571166992188, 1.1911697387695312, 3.5446624755859375, 0.237762451171875, 2.5457000732421875, 3.2388153076171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000406.npy"} +{"epoch": 0.6137566137566137, "step": 407, "batch_size": 64, "mean": 1.224708914756775, "std": 2.0374197959899902, "min": -4.84039306640625, "p10": -0.7366512298583984, "median": 1.0557489395141602, "p90": 3.8641494750976575, "max": 8.822853088378906, "pos_frac": 0.71875, "sample": [0.7423629760742188, 0.7050914764404297, 1.5411376953125, 4.157257080078125, -0.0559234619140625, 2.8477935791015625, 2.5507354736328125, -0.051593780517578125, -0.4764595031738281, 2.230804443359375, 3.3855819702148438, 0.059600830078125, -0.930419921875, -2.9694290161132812, -0.6418304443359375, 0.6291275024414062, 1.5584716796875, 1.944366455078125, 4.364154815673828, 2.877168655395508, -0.2289886474609375, -0.0213775634765625, 2.360095977783203, 1.20806884765625, -0.136962890625, 1.2079010009765625, 1.3939285278320312, 0.9750385284423828, 0.73468017578125, 8.822853088378906, 1.3686904907226562, 0.28244781494140625, 3.0040130615234375, 2.1067047119140625, 4.095550537109375, 0.87005615234375, 1.331695556640625, 4.241912841796875, -1.3645973205566406, 0.4437255859375, -1.4141387939453125, 5.1023101806640625, 2.3486175537109375, 0.11115264892578125, 2.8836822509765625, 3.537017822265625, 2.9065933227539062, 0.26857757568359375, 2.501373291015625, -0.5735206604003906, 0.7679443359375, -4.84039306640625, -0.237823486328125, 0.615875244140625, -0.853240966796875, 1.6427726745605469, 0.21376991271972656, 4.0043487548828125, -0.5941333770751953, -0.6620140075683594, -0.7686386108398438, 1.1364593505859375, 1.4341812133789062, 1.687164306640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000407.npy"} +{"epoch": 0.6152683295540439, "step": 408, "batch_size": 64, "mean": 1.8033206462860107, "std": 2.0076546669006348, "min": -1.786834716796875, "p10": -0.7500326156616208, "median": 1.5024337768554688, "p90": 4.615451812744141, "max": 6.4221954345703125, "pos_frac": 0.8125, "sample": [0.9478473663330078, 3.5525989532470703, 1.545074462890625, 2.463878631591797, -0.3153400421142578, -0.8450603485107422, -1.651885986328125, 1.0720996856689453, 2.6719799041748047, 1.1642227172851562, 2.259929656982422, 4.6021270751953125, -0.5283012390136719, 0.2742462158203125, 4.5374298095703125, 3.18646240234375, 0.5846366882324219, 0.34307861328125, 2.420011520385742, 0.5947113037109375, 0.6188735961914062, 0.10544586181640625, -1.0226173400878906, 2.1084766387939453, 6.4221954345703125, -1.786834716796875, 2.0764312744140625, 6.412986755371094, 4.621162414550781, 2.76177978515625, 3.252147674560547, 0.37448692321777344, 3.314180374145508, -0.07664108276367188, 1.7773818969726562, 4.128211975097656, 2.3709545135498047, -1.3262786865234375, 5.8038330078125, 0.844573974609375, 0.762054443359375, 2.2009658813476562, 2.594512939453125, 4.804416656494141, 5.2043914794921875, 0.8857059478759766, 3.502155303955078, -0.9112663269042969, 0.22038841247558594, 2.9981689453125, 3.014617919921875, 1.2069931030273438, 4.856742858886719, -1.2511310577392578, 1.4597930908203125, 3.7511138916015625, -0.21244430541992188, -0.2269287109375, 2.9758987426757812, 0.4993476867675781, 0.6651687622070312, 0.4129524230957031, 1.1504974365234375, 3.1879043579101562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000408.npy"} +{"epoch": 0.6167800453514739, "step": 409, "batch_size": 64, "mean": 0.9983994960784912, "std": 2.0508670806884766, "min": -3.6161117553710938, "p10": -1.2023141860961912, "median": 0.6788511276245117, "p90": 3.868869400024414, "max": 6.7998809814453125, "pos_frac": 0.71875, "sample": [-0.6085662841796875, 0.7027549743652344, 0.0489501953125, 0.0589599609375, 0.13171005249023438, -0.3215065002441406, 2.160825729370117, -0.7542762756347656, 0.8739471435546875, -1.8003616333007812, 0.3858299255371094, 3.8417205810546875, 0.3939323425292969, 6.7998809814453125, 1.3081703186035156, 0.399932861328125, 3.1726455688476562, -1.0103530883789062, -0.6494426727294922, 4.187107086181641, 3.880504608154297, -1.0267066955566406, 1.5106964111328125, 1.8281707763671875, 1.3961563110351562, -0.43121337890625, 0.49083709716796875, 2.3581771850585938, 1.5529937744140625, -2.4508514404296875, -0.31523895263671875, 1.3514022827148438, 1.9639434814453125, -1.3152618408203125, 2.3396453857421875, 0.6771259307861328, 3.638885498046875, 4.8489227294921875, 0.16779327392578125, 0.6805763244628906, -0.032360076904296875, 0.40618133544921875, 0.31157684326171875, -2.2235183715820312, 1.4742889404296875, 0.10888671875, 1.1516494750976562, 0.7725009918212891, -3.6161117553710938, 4.417871475219727, 0.16864395141601562, 2.0778274536132812, -0.20537948608398438, 2.1509628295898438, -3.272306442260742, 0.6577224731445312, 2.5630340576171875, -0.35990142822265625, 1.0407333374023438, 4.466339111328125, 5.910457611083984, -1.2775745391845703, 3.81292724609375, 0.9246940612792969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000409.npy"} +{"epoch": 0.618291761148904, "step": 410, "batch_size": 64, "mean": 1.2474339008331299, "std": 1.5462801456451416, "min": -2.8246917724609375, "p10": -0.6908046722412108, "median": 1.3432703018188477, "p90": 2.9652559280395514, "max": 5.724891662597656, "pos_frac": 0.8125, "sample": [0.8938179016113281, 0.8144111633300781, 1.6340599060058594, 0.34442901611328125, 3.8620147705078125, -1.630462646484375, 2.509449005126953, 0.3716239929199219, -1.2506256103515625, -1.8165168762207031, 1.9308891296386719, 1.0150375366210938, 0.4478130340576172, -0.516998291015625, 2.6563148498535156, 2.1964111328125, 3.326366424560547, 0.7988739013671875, 2.7532215118408203, 2.5221710205078125, 2.7721099853515625, 0.8862190246582031, -0.7438812255859375, 1.7310104370117188, 1.6662673950195312, 1.386474609375, -2.8246917724609375, -0.0314483642578125, 1.0522689819335938, -0.05908203125, 1.940887451171875, 0.7124252319335938, 0.6314945220947266, 2.532093048095703, 1.0920486450195312, 5.724891662597656, 2.7020263671875, -0.5669593811035156, 1.4170112609863281, 3.31396484375, 1.1285018920898438, -1.5684967041015625, 0.6140632629394531, 0.2934417724609375, 0.43746185302734375, 1.8901290893554688, 2.5513916015625, 0.53057861328125, 1.5429248809814453, 2.243377685546875, 1.8289947509765625, 1.400238037109375, 4.350227355957031, 2.4646759033203125, -0.27208709716796875, 0.44324493408203125, 1.4844398498535156, 1.338846206665039, 0.9849109649658203, 3.048032760620117, 3.5337905883789062, 1.3476943969726562, -1.3965911865234375, 1.418548583984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000410.npy"} +{"epoch": 0.6198034769463341, "step": 411, "batch_size": 64, "mean": 1.443709135055542, "std": 2.3693110942840576, "min": -3.1055908203125, "p10": -1.309610366821289, "median": 1.4003219604492188, "p90": 4.631036567687989, "max": 7.7900848388671875, "pos_frac": 0.703125, "sample": [-1.7228240966796875, 4.66900634765625, 4.830944061279297, 4.542440414428711, 0.28284454345703125, 0.8526077270507812, -1.00146484375, -2.4096603393554688, 1.3838882446289062, 0.14684677124023438, 2.1150054931640625, 3.446990966796875, 2.72955322265625, 1.4984474182128906, 4.8130950927734375, -0.172027587890625, 1.9131050109863281, -0.9893341064453125, 2.7554855346679688, 1.9201698303222656, 3.4093246459960938, 4.275688171386719, 4.1208343505859375, 0.4085979461669922, -2.1267623901367188, -0.043731689453125, 0.1382274627685547, 4.928947448730469, -0.5705146789550781, -3.1055908203125, -1.33062744140625, -1.920480728149414, 1.4167556762695312, 2.7883377075195312, 0.8627815246582031, 0.7649383544921875, -0.4354228973388672, 7.7900848388671875, 0.227294921875, 7.59466552734375, 2.3570556640625, 2.042102813720703, -1.1298828125, 3.1148452758789062, 2.2181167602539062, 4.276020050048828, -1.2605705261230469, 2.965606689453125, -0.3361072540283203, -0.2320709228515625, 0.2589130401611328, 0.2102508544921875, 2.370838165283203, 2.9139175415039062, -0.10790634155273438, 2.1758651733398438, 1.9406871795654297, 0.3140430450439453, 5.967357635498047, 2.5536575317382812, -0.49423980712890625, 0.4082183837890625, -2.452178955078125, 1.5243816375732422], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000411.npy"} +{"epoch": 0.6213151927437641, "step": 412, "batch_size": 64, "mean": 1.4864020347595215, "std": 1.9418601989746094, "min": -2.293701171875, "p10": -0.8756954193115233, "median": 1.3281211853027344, "p90": 4.257559967041016, "max": 6.14015007019043, "pos_frac": 0.734375, "sample": [-0.6069412231445312, 4.38433837890625, 1.1712646484375, 6.14015007019043, 2.3234405517578125, 1.6265602111816406, 1.7669601440429688, 1.8474254608154297, -1.6637077331542969, 0.7175025939941406, 2.1410789489746094, 0.6274147033691406, 0.578826904296875, 5.172462463378906, -0.39087867736816406, -1.0729598999023438, 2.356548309326172, -0.17721939086914062, -1.6619186401367188, -0.5781326293945312, 2.658447265625, 3.2266845703125, 1.9065322875976562, 0.8932571411132812, -0.2063140869140625, 1.3967666625976562, 3.0886383056640625, 1.7372055053710938, 2.310779571533203, 1.2594757080078125, 1.0267219543457031, 4.212127685546875, 1.1805248260498047, 4.73321533203125, -0.694305419921875, -0.4025459289550781, 2.41571044921875, -0.4401359558105469, 2.061124801635742, 0.8168563842773438, -1.3660755157470703, -2.293701171875, 3.9259414672851562, -0.6797103881835938, 0.47832298278808594, 3.9857101440429688, 4.277030944824219, 3.917327880859375, 5.300865173339844, -0.488616943359375, 2.2333145141601562, 3.167743682861328, 4.648929595947266, -1.5900192260742188, 2.02557373046875, 2.6295242309570312, 0.7026042938232422, 0.4727954864501953, -0.9534339904785156, 1.1806259155273438, 1.8616867065429688, 0.17298316955566406, 2.8626632690429688, 0.774658203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000412.npy"} +{"epoch": 0.6228269085411943, "step": 413, "batch_size": 64, "mean": 1.3262066841125488, "std": 2.304067850112915, "min": -2.976715087890625, "p10": -1.428926086425781, "median": 1.2913055419921875, "p90": 3.910872268676758, "max": 7.1158905029296875, "pos_frac": 0.703125, "sample": [0.014307022094726562, -0.4185333251953125, 2.7761306762695312, 0.846588134765625, -0.9551773071289062, -0.20837020874023438, 0.8502273559570312, 6.184719085693359, -2.0382156372070312, 0.70623779296875, -2.6064910888671875, 2.3375396728515625, 1.7785110473632812, -0.7258453369140625, -1.0994415283203125, 0.5899124145507812, -0.022235870361328125, 7.114902496337891, 0.37305259704589844, 1.7405147552490234, 5.8298187255859375, 1.6892776489257812, 2.206817626953125, 0.3245582580566406, 2.1817550659179688, 0.8727550506591797, 7.1158905029296875, 6.0225372314453125, 0.5421218872070312, 1.8630905151367188, 1.3132553100585938, 2.089662551879883, 1.5380706787109375, 1.757568359375, 5.6009063720703125, -1.5321197509765625, 1.167510986328125, 1.2693557739257812, 3.2435226440429688, -0.1385040283203125, -2.045989990234375, 3.1635665893554688, 2.6623306274414062, -2.0377120971679688, 1.9691314697265625, -0.9165153503417969, -2.976715087890625, -1.169891357421875, 1.2302932739257812, 1.3571815490722656, -0.0802459716796875, -2.6555328369140625, 2.79998779296875, -0.3014945983886719, 0.30512237548828125, 1.314849853515625, 1.5323600769042969, 2.8215789794921875, 2.317729949951172, 3.2567825317382812, -1.188140869140625, 3.560335159301758, 3.8063812255859375, 3.9556541442871094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000413.npy"} +{"epoch": 0.6243386243386243, "step": 414, "batch_size": 64, "mean": 1.542493462562561, "std": 2.092315673828125, "min": -3.119049072265625, "p10": -0.4509666442871093, "median": 1.3044424057006836, "p90": 4.21023750305176, "max": 8.016983032226562, "pos_frac": 0.796875, "sample": [2.788541793823242, -0.486541748046875, 3.3533706665039062, 4.384918212890625, -0.33464813232421875, 2.508272171020508, -0.1873626708984375, 4.380542755126953, 0.8486557006835938, 0.069488525390625, 1.9590301513671875, 1.3438835144042969, 1.846221923828125, -1.353179931640625, 3.2881317138671875, 1.6378822326660156, -2.744903564453125, -1.7390289306640625, 0.748748779296875, 1.0374908447265625, 2.591367721557617, 1.0907955169677734, 7.710868835449219, 3.8128585815429688, 5.634910583496094, 0.5570220947265625, 1.9170188903808594, 3.116304397583008, 1.3072280883789062, 1.4588203430175781, 3.4344444274902344, 1.0581207275390625, 1.8920249938964844, 1.0146026611328125, 3.08746337890625, 1.301656723022461, 3.2832870483398438, 0.26349639892578125, 1.60687255859375, 3.0385284423828125, 0.7019119262695312, -0.9540176391601562, 0.05530548095703125, 0.11790847778320312, 4.9415740966796875, -0.286590576171875, -0.174774169921875, 2.228425979614258, -3.119049072265625, -0.36795806884765625, 1.228271484375, 0.4163169860839844, -0.8405780792236328, 0.44819068908691406, 4.3881988525390625, 8.016983032226562, 0.06880569458007812, 0.4514923095703125, 1.8105545043945312, 1.7493629455566406, 1.4218292236328125, -0.17770004272460938, 0.5064697265625, 3.5614395141601562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000414.npy"} +{"epoch": 0.6258503401360545, "step": 415, "batch_size": 64, "mean": 1.2547565698623657, "std": 2.2463784217834473, "min": -4.125221252441406, "p10": -1.1360748291015623, "median": 1.0059452056884766, "p90": 4.214898490905765, "max": 6.6389923095703125, "pos_frac": 0.734375, "sample": [-0.142333984375, 2.908230781555176, 2.194131851196289, 2.4575939178466797, 1.4941902160644531, 0.9957790374755859, 3.1737289428710938, 4.66020393371582, 0.110137939453125, 2.166748046875, 2.1982955932617188, -3.3570785522460938, 2.7095565795898438, 3.065387725830078, -0.879150390625, 5.373012542724609, 6.597236633300781, 0.491790771484375, 3.4705734252929688, -0.3673267364501953, 0.862335205078125, 0.290069580078125, 0.5747299194335938, 0.13566017150878906, 2.5396881103515625, 6.6389923095703125, -2.9912338256835938, -0.0848236083984375, 2.178638458251953, 2.046966552734375, -1.8876190185546875, 2.62744140625, 1.0161113739013672, -1.246185302734375, -2.73193359375, 6.3776702880859375, 2.9564895629882812, 5.112878799438477, 2.0437164306640625, -2.1037063598632812, 1.3041763305664062, -0.8102569580078125, -0.6947364807128906, -4.125221252441406, 1.3559646606445312, 1.3832626342773438, 0.019016265869140625, 0.7978973388671875, 0.5444602966308594, 0.8491268157958984, -0.23213958740234375, 1.0854949951171875, -0.11916351318359375, 0.4858207702636719, 0.6644973754882812, 1.639739990234375, -0.020650863647460938, 2.2388763427734375, 0.2008056640625, -0.1949462890625, 0.17487335205078125, 2.034160614013672, 4.502168655395508, 3.5446014404296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000415.npy"} +{"epoch": 0.6273620559334845, "step": 416, "batch_size": 64, "mean": 1.1456834077835083, "std": 2.161367893218994, "min": -3.44012451171875, "p10": -1.2333694458007811, "median": 0.8525447845458984, "p90": 3.677678108215333, "max": 7.906717300415039, "pos_frac": 0.765625, "sample": [0.5066680908203125, -2.6820068359375, 0.4439811706542969, 3.912261962890625, -0.3116302490234375, 4.434700012207031, -0.99017333984375, -1.167327880859375, 2.6261043548583984, 4.468994140625, 0.614501953125, 6.880462646484375, 5.441986083984375, 0.3321418762207031, 0.09773445129394531, 1.502166748046875, 0.8007583618164062, 1.7411060333251953, -2.4301071166992188, -2.9262237548828125, 2.636028289794922, 1.9232139587402344, 0.8434181213378906, 1.0622787475585938, -0.4961566925048828, 2.4386367797851562, 0.249176025390625, 7.906717300415039, 3.7529830932617188, 2.1531829833984375, 0.8616714477539062, 1.258758544921875, 0.6859283447265625, -2.7993106842041016, 0.7831230163574219, -0.15475082397460938, 0.44971466064453125, -2.633626937866211, 0.6068401336669922, 0.6620445251464844, -1.2616729736328125, 2.2034072875976562, -0.5294647216796875, 0.7672824859619141, 0.12242889404296875, -0.19161605834960938, 3.5019664764404297, -3.44012451171875, 0.3511333465576172, 3.2899551391601562, 1.0102767944335938, 1.0362186431884766, -0.5306053161621094, 2.1878700256347656, 0.059539794921875, 1.2029247283935547, 2.015605926513672, 3.015594482421875, 1.9589920043945312, 2.4862136840820312, 2.026580810546875, 2.3805389404296875, 2.8993892669677734, 1.2753334045410156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000416.npy"} +{"epoch": 0.6288737717309146, "step": 417, "batch_size": 64, "mean": 1.6672537326812744, "std": 2.177006959915161, "min": -3.4105148315429688, "p10": -1.018569183349609, "median": 1.7413330078125, "p90": 3.9774856567382817, "max": 8.437660217285156, "pos_frac": 0.78125, "sample": [0.911529541015625, -0.34212684631347656, -0.8307037353515625, -3.4105148315429688, 3.5298538208007812, 2.3609085083007812, -2.5047607421875, 3.618724822998047, 1.7244873046875, 1.9334640502929688, 6.4064178466796875, 0.010406494140625, 1.9939651489257812, 2.479930877685547, 1.8329925537109375, -1.1261768341064453, 3.901782989501953, 1.4478912353515625, 0.8863754272460938, 3.5901660919189453, 1.7581787109375, 2.87762451171875, 2.113912582397461, 0.7074813842773438, 1.47998046875, -1.5773506164550781, 3.757242202758789, 2.058490753173828, 0.3430213928222656, 2.2244949340820312, 1.0503406524658203, 5.7823333740234375, 4.989873886108398, -0.3110504150390625, -1.884796142578125, 0.00547027587890625, -1.0990829467773438, 0.7301597595214844, 4.009929656982422, 1.1439056396484375, 3.609447479248047, 0.9687442779541016, 2.8488807678222656, 2.2571334838867188, -0.712799072265625, 4.390861511230469, 3.411916732788086, 1.2284774780273438, -0.12867355346679688, 2.7951126098632812, 1.1529541015625, 0.9527435302734375, 0.941253662109375, -2.407318115234375, 3.220470428466797, -0.5794296264648438, 2.8029098510742188, 4.3404083251953125, 1.1077804565429688, -0.4972095489501953, 3.856914520263672, 1.9122810363769531, 2.218944549560547, 8.437660217285156], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000417.npy"} +{"epoch": 0.6303854875283447, "step": 418, "batch_size": 64, "mean": 0.9409198760986328, "std": 2.433790445327759, "min": -3.5894393920898438, "p10": -1.7639057159423828, "median": 0.9602699279785156, "p90": 3.076918029785157, "max": 9.257217407226562, "pos_frac": 0.6875, "sample": [-0.30553436279296875, 4.016958236694336, 1.530853271484375, 1.1043701171875, 0.4836158752441406, 1.1604232788085938, 0.6762657165527344, 2.5153274536132812, 6.745708465576172, -1.3960113525390625, 2.5755767822265625, 8.348526000976562, -1.8115997314453125, 0.936248779296875, 1.1587104797363281, -1.0868453979492188, 2.1221923828125, -0.4026679992675781, 3.735431671142578, -0.00446319580078125, -0.4443531036376953, 9.257217407226562, 0.6888504028320312, 1.07635498046875, 2.2183303833007812, 1.1341476440429688, -1.2005615234375, 0.5973377227783203, 1.7403602600097656, 1.3931045532226562, -2.4322509765625, -3.3184814453125, 0.9842910766601562, 2.5972557067871094, 5.62645149230957, 0.1241912841796875, 0.00160980224609375, -1.7987060546875, 2.1508731842041016, 0.5478935241699219, -3.5894393920898438, -1.2368927001953125, -3.0338573455810547, -0.2572822570800781, 0.6215629577636719, -1.2889213562011719, 2.7093887329101562, 1.2740325927734375, -1.5507068634033203, 0.3591766357421875, 1.1213531494140625, -1.095855712890625, 2.484283447265625, 1.7787246704101562, 2.8662109375, -3.199798583984375, -1.6827049255371094, 3.1672210693359375, 1.3358688354492188, 2.7133560180664062, 1.231597900390625, 0.4221000671386719, 1.4018783569335938, 0.620574951171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000418.npy"} +{"epoch": 0.6318972033257747, "step": 419, "batch_size": 64, "mean": 1.5704445838928223, "std": 2.163996696472168, "min": -2.6255645751953125, "p10": -0.8725692749023437, "median": 1.3432292938232422, "p90": 3.828343963623047, "max": 10.014699935913086, "pos_frac": 0.796875, "sample": [0.7465019226074219, 4.2325286865234375, 3.4537887573242188, 0.2613029479980469, -2.1199588775634766, 0.8283348083496094, 2.972503662109375, 0.80731201171875, -0.466827392578125, 0.7100734710693359, -0.893218994140625, 3.028606414794922, 3.930633544921875, 0.44664764404296875, -0.3489990234375, -1.2612152099609375, 2.6135635375976562, 0.450897216796875, 3.3276748657226562, -1.4266738891601562, 1.5961380004882812, 1.3753242492675781, -0.6339950561523438, 1.8147087097167969, 1.4626846313476562, -0.08106613159179688, 3.0296630859375, -2.6255645751953125, 1.3988800048828125, 6.006568908691406, -0.6212501525878906, -0.8243865966796875, -1.2718925476074219, 3.75592041015625, 6.4905548095703125, 2.973165512084961, 2.885387420654297, 1.3927345275878906, 0.38224029541015625, 1.2435455322265625, 2.3431434631347656, 3.84686279296875, 1.3111343383789062, 1.6908798217773438, 2.9848976135253906, 0.65594482421875, 3.7851333618164062, 1.4229278564453125, 0.2959251403808594, 10.014699935913086, 3.682056427001953, 0.3969860076904297, 2.1464157104492188, 1.0698089599609375, 2.5743789672851562, 0.501983642578125, 2.390430450439453, 1.0826339721679688, 4.154754638671875, 0.192657470703125, 1.2139530181884766, 0.569427490234375, -2.276092529296875, 3.4146728515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000419.npy"} +{"epoch": 0.6334089191232048, "step": 420, "batch_size": 64, "mean": 1.126215934753418, "std": 2.3678033351898193, "min": -4.570281982421875, "p10": -1.5765804290771481, "median": 0.9549407958984375, "p90": 3.99971923828125, "max": 8.22137451171875, "pos_frac": 0.640625, "sample": [0.7646026611328125, -2.2265281677246094, 6.039613723754883, -0.035068511962890625, 5.4384765625, -0.8299980163574219, 1.6347503662109375, 0.9049167633056641, 1.4881134033203125, -3.307271957397461, -0.48773193359375, 2.338970184326172, -0.14892578125, 1.8097305297851562, -0.8509635925292969, 1.7911529541015625, 5.2359466552734375, 1.4093208312988281, 6.27783203125, 2.6079025268554688, 0.7912273406982422, -0.22365951538085938, 0.1386871337890625, 2.3017330169677734, -0.018873214721679688, 3.955413818359375, -1.111867904663086, -2.523904800415039, 1.0965709686279297, 0.9971294403076172, -1.784423828125, 0.5650253295898438, -0.09603118896484375, 0.12494277954101562, -1.1057205200195312, -0.3857688903808594, -1.6880264282226562, -2.0447006225585938, 3.1644058227539062, 1.8624496459960938, -0.3468132019042969, -4.570281982421875, -0.9434738159179688, 1.2012176513671875, 3.4876708984375, 0.9127521514892578, 1.0953292846679688, 4.087713241577148, 1.9864349365234375, 3.3790283203125, 2.6270828247070312, 1.7605361938476562, 2.4926223754882812, 0.3860454559326172, 1.0088920593261719, 1.5972785949707031, 4.018707275390625, -0.7370071411132812, 3.8133087158203125, 3.89874267578125, 0.48720550537109375, 8.22137451171875, -0.33945465087890625, -1.3165397644042969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000420.npy"} +{"epoch": 0.6349206349206349, "step": 421, "batch_size": 64, "mean": 1.0763235092163086, "std": 2.4556055068969727, "min": -5.097818374633789, "p10": -1.6186941146850584, "median": 0.9532852172851562, "p90": 4.408104705810548, "max": 8.4154052734375, "pos_frac": 0.640625, "sample": [-2.0072193145751953, 1.77447509765625, -1.1981678009033203, 1.6638202667236328, 1.3415069580078125, 2.1119232177734375, 3.5374984741210938, -0.267181396484375, -0.09265899658203125, 8.4154052734375, 5.5081634521484375, -1.156524658203125, 3.8150177001953125, 1.234100341796875, -1.7064170837402344, 0.2615070343017578, 1.2563247680664062, -1.36810302734375, 4.271156311035156, 1.0786514282226562, 4.023082733154297, 1.064971923828125, 0.5465984344482422, -0.4472808837890625, 1.2563152313232422, -1.4140071868896484, 3.15643310546875, -5.097818374633789, -1.2746505737304688, -2.2077980041503906, 0.5196876525878906, 5.912092208862305, 4.466796875, -0.2500629425048828, 1.2227096557617188, 2.6746368408203125, 4.6395111083984375, 2.120532989501953, -0.15875244140625, -0.09462356567382812, 4.9369659423828125, -0.05378150939941406, 0.6110763549804688, 0.8415985107421875, -2.124217987060547, 1.9278011322021484, 0.07134246826171875, 0.6468429565429688, -4.835845947265625, -0.065673828125, 2.7919921875, 3.573345184326172, -0.29302215576171875, 2.4633636474609375, 0.6314849853515625, -1.0407562255859375, 1.95648193359375, 0.7048473358154297, 1.6518592834472656, -1.3017234802246094, 2.4534835815429688, 5.019565582275391, 1.2713165283203125, -2.0852928161621094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000421.npy"} +{"epoch": 0.636432350718065, "step": 422, "batch_size": 64, "mean": 1.4837113618850708, "std": 2.3597402572631836, "min": -2.979705810546875, "p10": -1.0032974243164061, "median": 1.180135726928711, "p90": 4.5900339126586935, "max": 9.827468872070312, "pos_frac": 0.75, "sample": [5.222419738769531, 6.357147216796875, -1.1824016571044922, -0.787994384765625, 2.7178726196289062, -0.6303691864013672, 1.6476287841796875, -2.979705810546875, 1.3831443786621094, 9.827468872070312, 2.4882354736328125, 1.4871540069580078, -0.8377761840820312, 1.057525634765625, 6.51617431640625, -0.058254241943359375, 1.9949264526367188, 3.352825164794922, 0.1925506591796875, 1.079315185546875, 1.7582550048828125, 0.894287109375, -0.7575817108154297, 0.9386520385742188, 1.17095947265625, 1.34344482421875, 1.88250732421875, 2.3236541748046875, 5.993560791015625, 1.8688507080078125, 0.44170379638671875, 3.6675033569335938, 2.639434814453125, -1.3833122253417969, 0.8103065490722656, 1.1893119812011719, 4.818967819213867, 0.4736061096191406, 4.055854797363281, 0.570892333984375, -1.0584945678710938, -0.011989593505859375, 1.8588790893554688, 3.8145408630371094, 3.6907424926757812, -2.0885467529296875, -0.7520751953125, 1.5729293823242188, 0.5620574951171875, 1.7581863403320312, -0.7360954284667969, 6.121429443359375, 0.16971588134765625, -0.8745040893554688, 0.22018814086914062, 1.514129638671875, 0.49555397033691406, -2.5809783935546875, 0.9259872436523438, 3.82049560546875, 1.9086856842041016, 1.0506343841552734, -1.8659286499023438, 1.8932361602783203], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000422.npy"} +{"epoch": 0.6379440665154951, "step": 423, "batch_size": 64, "mean": 1.4967900514602661, "std": 2.2601211071014404, "min": -3.6981201171875, "p10": -0.9620056152343749, "median": 1.6773004531860352, "p90": 4.429719543457032, "max": 6.8578338623046875, "pos_frac": 0.71875, "sample": [0.2136688232421875, -1.468048095703125, 1.8280410766601562, -0.21503067016601562, 6.624866485595703, 0.2584552764892578, 1.0320396423339844, -3.6981201171875, 6.8348846435546875, 1.0026397705078125, -0.18597412109375, 3.5086841583251953, -0.404388427734375, -1.021331787109375, 1.670684814453125, 0.7499237060546875, -0.5176925659179688, -2.3043212890625, 4.446533203125, -0.823577880859375, 0.9405937194824219, 2.1147384643554688, -1.902069091796875, -0.5791110992431641, -0.6726703643798828, 4.2892303466796875, 2.5929183959960938, 0.43922996520996094, -2.73541259765625, -0.027202606201171875, 3.83905029296875, 6.8578338623046875, 1.8314590454101562, 3.4190521240234375, 3.312589645385742, 2.1065750122070312, 5.227447509765625, 1.9687347412109375, 5.071147918701172, 3.1024856567382812, 2.0768165588378906, 0.9498062133789062, -0.3560943603515625, 2.248138427734375, 1.7818832397460938, 2.512767791748047, 3.0765609741210938, 1.114980697631836, 0.10342025756835938, 1.6839160919189453, 2.956695556640625, 1.7845535278320312, 1.1045207977294922, 1.7959938049316406, 1.7758102416992188, 5.6444854736328125, 1.7441177368164062, -0.28314208984375, -1.3829574584960938, 4.3904876708984375, 2.197265625, -0.1216888427734375, 0.24166488647460938, 0.026002883911132812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000423.npy"} +{"epoch": 0.6394557823129252, "step": 424, "batch_size": 64, "mean": 1.554780125617981, "std": 2.1484878063201904, "min": -2.8357162475585938, "p10": -0.6772521972656249, "median": 1.1289710998535156, "p90": 4.693726348876955, "max": 7.6572265625, "pos_frac": 0.796875, "sample": [-0.17003250122070312, 4.321968078613281, 0.107330322265625, 3.5976428985595703, 1.3660659790039062, 3.4034690856933594, 7.6572265625, 1.460540771484375, -1.399078369140625, -2.105010986328125, -0.8478679656982422, 1.6433944702148438, 1.0834712982177734, -2.8357162475585938, 6.0015106201171875, 1.3394851684570312, 3.2980728149414062, -0.696533203125, 0.3943023681640625, 0.6320266723632812, 0.5669498443603516, 2.9830360412597656, 2.0676651000976562, 0.34392547607421875, -0.4890785217285156, 1.50421142578125, 5.206756591796875, -0.63226318359375, 5.975624084472656, 1.175100326538086, 2.48992919921875, 0.444427490234375, -1.6323928833007812, 0.5548229217529297, 4.351005554199219, 0.036834716796875, 0.921142578125, 4.840606689453125, 3.1186447143554688, 1.1221237182617188, 0.2161121368408203, 3.67230224609375, 0.8668327331542969, 3.58551025390625, 1.1358184814453125, 5.1443634033203125, 2.4147109985351562, 0.3319721221923828, -1.3964920043945312, 0.07623100280761719, 2.2146034240722656, 1.6881141662597656, 0.377166748046875, 3.4648876190185547, -0.38603973388671875, 0.38849639892578125, 0.631744384765625, -0.5330810546875, 4.0731353759765625, 4.953126907348633, -0.28148651123046875, 2.184906005859375, 0.2565956115722656, 1.2250595092773438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000424.npy"} +{"epoch": 0.6409674981103552, "step": 425, "batch_size": 64, "mean": 1.4651916027069092, "std": 1.974876046180725, "min": -2.2912063598632812, "p10": -1.1175262451171872, "median": 1.159088134765625, "p90": 4.535622024536133, "max": 7.630882263183594, "pos_frac": 0.828125, "sample": [2.9785919189453125, 1.3268585205078125, 0.45728302001953125, 1.3691558837890625, 4.788185119628906, -0.4099769592285156, 4.8417205810546875, 1.0164451599121094, 1.3799591064453125, 0.7814178466796875, 0.8856201171875, -1.5291881561279297, 2.3067493438720703, 1.159271240234375, -1.2497673034667969, 1.6074752807617188, 0.5170822143554688, 3.1505126953125, 1.158905029296875, 0.3362312316894531, -0.8089637756347656, 2.4695053100585938, 0.7276649475097656, 0.230682373046875, 2.6802444458007812, -0.13002395629882812, 1.6955451965332031, 4.73321533203125, 0.29156494140625, 7.630882263183594, 2.7400970458984375, 0.4799003601074219, 2.2290496826171875, 0.6525726318359375, 1.9868316650390625, 0.9372291564941406, 0.11029815673828125, 4.875299453735352, 2.2247352600097656, 0.46446990966796875, 2.6949539184570312, -1.8445816040039062, 1.4172744750976562, 0.985107421875, 0.9958724975585938, 0.5837039947509766, 1.3643646240234375, 0.2326984405517578, 2.8646163940429688, 1.97869873046875, 3.241954803466797, 0.4751758575439453, -1.5876693725585938, 2.3966827392578125, -2.2912063598632812, 0.5653209686279297, 4.440498352050781, 4.576389312744141, -1.3945388793945312, 3.9749526977539062, -0.37337493896484375, 1.51220703125, 5.9148712158203125, -2.0450439453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000425.npy"} +{"epoch": 0.6424792139077853, "step": 426, "batch_size": 64, "mean": 0.6476625800132751, "std": 2.0228476524353027, "min": -7.11004638671875, "p10": -1.4242818832397461, "median": 0.7129669189453125, "p90": 2.621413993835451, "max": 6.224185943603516, "pos_frac": 0.671875, "sample": [0.6065940856933594, 0.3271598815917969, 1.6500701904296875, 3.15576171875, 1.5792312622070312, 2.8071212768554688, -0.1294708251953125, 6.224185943603516, 1.1166915893554688, 1.236602783203125, -1.3639774322509766, -0.6784820556640625, 0.680450439453125, -1.9505538940429688, -0.077178955078125, 0.3980712890625, 1.7509536743164062, -1.509674072265625, 1.3964805603027344, -0.22933578491210938, -1.0916595458984375, 2.1669082641601562, -0.4473876953125, -0.701873779296875, -1.4501266479492188, 0.2896575927734375, 0.8062343597412109, -7.11004638671875, 1.9843692779541016, 1.1422576904296875, 1.9740753173828125, 3.606586456298828, 1.248189926147461, -0.2159271240234375, -3.5264434814453125, 0.7290267944335938, -1.0860671997070312, 0.6599788665771484, 0.6969070434570312, 1.1220703125, 5.645811080932617, 1.922698974609375, -2.3217201232910156, -2.584808349609375, -0.9992523193359375, 4.835773468017578, 0.499908447265625, 0.6021156311035156, -0.3951911926269531, 0.90167236328125, 0.8210315704345703, 1.8294601440429688, 0.3356170654296875, 1.8778839111328125, 0.7871246337890625, -1.1729354858398438, 2.1880970001220703, 2.137725830078125, 0.069610595703125, 1.7754364013671875, 1.928436279296875, 0.87664794921875, 2.9272003173828125, -0.8253707885742188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000426.npy"} +{"epoch": 0.6439909297052154, "step": 427, "batch_size": 64, "mean": 1.350417137145996, "std": 2.198519468307495, "min": -3.331501007080078, "p10": -1.1092742919921874, "median": 1.2488536834716797, "p90": 3.9670160293579113, "max": 8.370361328125, "pos_frac": 0.703125, "sample": [2.006999969482422, -0.11249542236328125, -0.4544715881347656, -2.4333953857421875, -0.897247314453125, 2.55731201171875, 1.9265251159667969, -0.8473091125488281, 4.093162536621094, -1.1587905883789062, 4.388236999511719, -0.8843193054199219, -2.191883087158203, -0.9937362670898438, 1.4408187866210938, 1.27947998046875, 1.7402572631835938, 2.6034698486328125, 0.2584228515625, 1.5403594970703125, -0.07934188842773438, 1.2182273864746094, 3.576446533203125, 1.8621902465820312, 1.2032470703125, 2.136270523071289, 0.8494606018066406, 3.0853195190429688, 0.5548629760742188, 1.9878082275390625, 3.500448226928711, 0.9364891052246094, 1.202789306640625, 0.6537265777587891, 2.512603759765625, 5.018283843994141, -1.3044967651367188, -3.331501007080078, 5.410896301269531, 2.9127349853515625, 0.23492431640625, -0.27957916259765625, 1.9521484375, -1.4030303955078125, -0.176422119140625, -0.8349895477294922, 1.8996238708496094, 2.1463623046875, 0.23095703125, -1.5126571655273438, 3.6726741790771484, 4.51348876953125, 0.5421905517578125, 1.5394515991210938, 1.3447494506835938, 1.9332199096679688, 1.1674728393554688, 2.129901885986328, 3.5451812744140625, -0.005584716796875, 0.44384765625, -0.5782966613769531, 8.370361328125, 7.7828369140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000427.npy"} +{"epoch": 0.6455026455026455, "step": 428, "batch_size": 64, "mean": 1.6753690242767334, "std": 2.0527849197387695, "min": -2.65374755859375, "p10": -0.6407749176025389, "median": 1.1108942031860352, "p90": 4.185004043579101, "max": 7.12322998046875, "pos_frac": 0.796875, "sample": [-2.65374755859375, 4.075813293457031, 2.9591751098632812, -0.5131244659423828, 0.012523651123046875, 0.8664817810058594, 1.9559288024902344, -0.8499221801757812, -2.0152549743652344, 1.644622802734375, 4.15191650390625, -1.118133544921875, -0.20359039306640625, -0.35457611083984375, 3.8753795623779297, 2.42291259765625, 5.7926483154296875, 0.4915504455566406, 0.33187103271484375, 0.71923828125, 2.4976959228515625, 0.9489288330078125, 0.16373443603515625, 3.256214141845703, 2.9269866943359375, 1.1019630432128906, 0.8173332214355469, -0.7363433837890625, 1.8675994873046875, 1.2267608642578125, 3.0015106201171875, 0.2216949462890625, 0.26138877868652344, 1.7931175231933594, 1.296875, 6.624687194824219, 2.7968063354492188, 4.545234680175781, 0.701904296875, -0.16220855712890625, 3.68011474609375, 7.12322998046875, 2.2942962646484375, 2.3198165893554688, 0.592529296875, -0.972808837890625, 3.4018707275390625, 0.9713058471679688, 0.865234375, -0.20355224609375, 0.6618232727050781, 0.83416748046875, 0.3211517333984375, -0.23682403564453125, 4.199184417724609, 3.5304203033447266, 4.088222503662109, 4.733545303344727, 2.9041061401367188, 1.1198253631591797, 5.67303466796875, 2.685831069946289, 0.5889739990234375, -0.6954822540283203], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000428.npy"} +{"epoch": 0.6470143613000756, "step": 429, "batch_size": 64, "mean": 1.4457752704620361, "std": 2.0933351516723633, "min": -3.7006301879882812, "p10": -0.9405498504638672, "median": 1.4070463180541992, "p90": 3.8593084335327177, "max": 8.83038330078125, "pos_frac": 0.796875, "sample": [2.113525390625, -1.17999267578125, 0.32315826416015625, 0.5772705078125, 0.14800262451171875, 1.00537109375, 1.56884765625, 4.39971923828125, -0.9522056579589844, 0.6339797973632812, 1.6469192504882812, -2.043182373046875, 0.98638916015625, 2.538249969482422, 0.4127235412597656, 1.068939208984375, -3.7006301879882812, 1.0126190185546875, 2.5316009521484375, 1.78582763671875, 3.10357666015625, 0.6239433288574219, 2.1006011962890625, 2.7615909576416016, 1.7846145629882812, 4.176368713378906, 4.353799819946289, 3.1195011138916016, 2.2750930786132812, -2.7435684204101562, -0.5563430786132812, -0.9133529663085938, 0.3595542907714844, -0.4280357360839844, 2.7938804626464844, 1.1740303039550781, -0.7929229736328125, 4.429294586181641, -0.4761810302734375, 0.21527099609375, 2.0074195861816406, 2.8043365478515625, 0.7311439514160156, 2.013853073120117, 4.992794036865234, -1.0003738403320312, -1.6827735900878906, 0.798797607421875, 1.2452449798583984, 1.9561767578125, 0.5746269226074219, 6.6728057861328125, 3.0387802124023438, -0.899139404296875, 2.1335411071777344, 1.7321624755859375, 3.0968894958496094, 0.40752410888671875, 8.83038330078125, 2.5813217163085938, 0.6204948425292969, 1.9243011474609375, 2.6394500732421875, 3.0720138549804688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000429.npy"} +{"epoch": 0.6485260770975056, "step": 430, "batch_size": 64, "mean": 1.3011209964752197, "std": 2.009727954864502, "min": -2.655181884765625, "p10": -0.7468582153320311, "median": 0.961090087890625, "p90": 3.6129875183105487, "max": 8.255626678466797, "pos_frac": 0.765625, "sample": [-1.4739227294921875, 0.03592681884765625, -0.088348388671875, 0.12044525146484375, -1.5672607421875, 2.536113739013672, 0.029005050659179688, 5.612392425537109, 0.8154850006103516, 1.0330581665039062, -0.5359973907470703, 3.7959213256835938, 3.1597824096679688, 0.7359180450439453, 5.080467224121094, 1.8330612182617188, 2.5831146240234375, 0.9752349853515625, 4.0999603271484375, 0.10258102416992188, 0.067047119140625, 5.4352264404296875, 0.9469451904296875, 0.16097259521484375, -0.82757568359375, 5.680509567260742, -0.07086944580078125, 3.1861419677734375, 1.2197723388671875, 1.9778289794921875, 1.5583972930908203, 1.4750823974609375, 1.7112960815429688, 1.945892333984375, -0.5568904876708984, 8.255626678466797, 2.8590354919433594, 2.5500030517578125, -1.8921890258789062, 2.73870849609375, 1.9626121520996094, 2.1917495727539062, 0.6795444488525391, 0.4378395080566406, 0.114837646484375, 2.7509021759033203, -0.1810150146484375, 0.09151649475097656, -2.655181884765625, -1.6890792846679688, -0.05548095703125, 1.5533084869384766, -0.25327301025390625, 2.4766616821289062, 0.5073471069335938, 2.1861190795898438, 2.5009307861328125, 1.0671024322509766, 0.7940330505371094, 2.5106201171875, 0.682952880859375, -1.7823200225830078, -0.5585174560546875, 0.6346359252929688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000430.npy"} +{"epoch": 0.6500377928949358, "step": 431, "batch_size": 64, "mean": 1.3228061199188232, "std": 2.06062650680542, "min": -2.667938232421875, "p10": -0.7479446411132813, "median": 1.1413021087646484, "p90": 4.105998992919924, "max": 8.491592407226562, "pos_frac": 0.75, "sample": [-2.309764862060547, 2.151052474975586, 1.410675048828125, -0.3723888397216797, 0.162322998046875, 2.8385543823242188, 5.2503814697265625, 0.9997406005859375, 1.417877197265625, -0.7520828247070312, -0.5886631011962891, 5.519039154052734, 1.702545166015625, 0.19977951049804688, 0.7129840850830078, 1.9710979461669922, 4.708641052246094, -0.24843788146972656, 3.665802001953125, 1.7627410888671875, 1.2348251342773438, -0.8889236450195312, 0.8130569458007812, 6.17344856262207, -2.1812820434570312, 4.369380950927734, 4.294654846191406, -0.43284034729003906, 0.8133392333984375, -0.48783111572265625, 0.9475021362304688, 1.0477790832519531, 2.201904296875, 3.087890625, -1.037017822265625, 0.31603240966796875, 2.9266281127929688, -0.6328201293945312, -2.667938232421875, 0.2747993469238281, 1.2945709228515625, -0.7382888793945312, -2.3338851928710938, 1.510711669921875, 0.6069221496582031, 0.5679130554199219, 2.338743209838867, 2.2345123291015625, 1.3328876495361328, 8.491592407226562, 0.706268310546875, 1.2351760864257812, 1.6675567626953125, 2.0063743591308594, 1.3721790313720703, 2.1518821716308594, 0.2631072998046875, -0.12437629699707031, -0.20853805541992188, 3.2249908447265625, 3.284717559814453, 0.9346179962158203, 2.1725616455078125, 0.29291534423828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000431.npy"} +{"epoch": 0.6515495086923658, "step": 432, "batch_size": 64, "mean": 1.5187186002731323, "std": 2.1048715114593506, "min": -2.5944366455078125, "p10": -0.7440109252929688, "median": 1.2841300964355469, "p90": 4.240437316894531, "max": 7.034027099609375, "pos_frac": 0.734375, "sample": [0.41025543212890625, -1.2615547180175781, 0.6912002563476562, -2.4818344116210938, 0.7248249053955078, 1.0541934967041016, -0.6309432983398438, 1.53515625, -0.6796188354492188, 3.9126758575439453, 1.1469745635986328, -2.4731178283691406, -0.441986083984375, 0.85406494140625, 2.7911834716796875, -2.5944366455078125, 2.5873870849609375, -0.1624755859375, 5.194427490234375, 7.034027099609375, -1.1070480346679688, 2.4299850463867188, 4.262725830078125, 2.000213623046875, 0.7841053009033203, 3.6004867553710938, 0.6853561401367188, 4.268341064453125, 1.6226959228515625, -0.696746826171875, -0.5566825866699219, -0.4266929626464844, 0.279815673828125, -0.5251541137695312, 1.91229248046875, 1.2619476318359375, 4.492378234863281, 2.5133514404296875, 1.348236083984375, 2.3814163208007812, -1.0171623229980469, 2.6705284118652344, 3.6369781494140625, 0.244659423828125, 1.983795166015625, -0.7642669677734375, -0.3662853240966797, 3.8270931243896484, 0.5441474914550781, 5.859580993652344, 2.970510482788086, 1.1541023254394531, 2.939849853515625, 2.8278961181640625, 3.559051513671875, 5.531272888183594, 1.4305038452148438, 2.7412681579589844, 4.1884307861328125, -0.6710700988769531, 0.1812286376953125, 4.085548400878906, 0.59259033203125, 1.3063125610351562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000432.npy"} +{"epoch": 0.6530612244897959, "step": 433, "batch_size": 64, "mean": 1.802809476852417, "std": 2.481127977371216, "min": -2.472503662109375, "p10": -0.7046930313110351, "median": 0.9535026550292969, "p90": 5.5784700393676765, "max": 9.377645492553711, "pos_frac": 0.765625, "sample": [1.0625839233398438, 3.1961612701416016, -0.095550537109375, 0.6212196350097656, 2.1264610290527344, 0.13824462890625, 0.5232620239257812, 4.5826568603515625, -0.06801605224609375, 1.3895263671875, 0.74285888671875, 0.8257789611816406, 0.1751251220703125, -1.9920654296875, 6.065555572509766, -0.9701919555664062, 1.4675102233886719, 3.6962013244628906, 1.8861236572265625, -0.739715576171875, 3.9525699615478516, 0.15631866455078125, 0.21537017822265625, 2.2999324798583984, -2.472503662109375, 0.48581886291503906, 3.7863922119140625, 0.03028106689453125, 3.085824966430664, 3.2850799560546875, 0.2440166473388672, 0.85491943359375, -0.24654579162597656, 4.392845153808594, 2.1215171813964844, -0.6974391937255859, -0.8612136840820312, 0.8136749267578125, 2.08367919921875, 9.377645492553711, 5.414451599121094, 3.1815147399902344, 6.338329315185547, -0.1541290283203125, 3.2454452514648438, -1.9545059204101562, -0.011976242065429688, 3.3513259887695312, 0.19704437255859375, 6.385227203369141, 0.9310073852539062, 1.1677932739257812, 0.9759979248046875, 0.01683807373046875, -0.09798431396484375, 5.648763656616211, 1.6853446960449219, -0.1568145751953125, 4.123340606689453, 4.285003662109375, 6.007377624511719, 0.037952423095703125, 7.928348541259766, -0.7078018188476562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000433.npy"} +{"epoch": 0.654572940287226, "step": 434, "batch_size": 64, "mean": 1.6889312267303467, "std": 2.4596903324127197, "min": -3.6135482788085938, "p10": -1.5737174987792968, "median": 1.8131017684936523, "p90": 4.4789430618286135, "max": 10.983898162841797, "pos_frac": 0.8125, "sample": [-3.6135482788085938, 1.7436141967773438, 3.0848541259765625, 1.5442047119140625, 2.230987548828125, 4.508396148681641, -1.5937423706054688, 2.1136932373046875, -0.9501190185546875, 1.2529983520507812, 0.7604560852050781, -2.915863037109375, -3.3547134399414062, 2.4202117919921875, 2.443164825439453, 1.0048751831054688, 3.906421661376953, 2.0188426971435547, 4.89471435546875, 4.278553009033203, 1.21514892578125, -0.1553821563720703, 5.917236328125, 0.8154983520507812, -2.4459781646728516, 2.393646240234375, 4.410219192504883, 2.1856536865234375, 0.6499137878417969, 2.157167434692383, -0.4107780456542969, 2.521038055419922, 1.5735931396484375, 10.983898162841797, 6.08380126953125, 1.8519973754882812, -1.5269927978515625, 2.1806793212890625, 2.290252685546875, 0.24033737182617188, 4.008533477783203, -1.82720947265625, 1.7742061614990234, 2.0875587463378906, 2.4673080444335938, 0.5578460693359375, 0.1194305419921875, 5.042304992675781, 4.313148498535156, 2.4627685546875, 0.09108924865722656, 5.114799499511719, -1.6908226013183594, 0.34102630615234375, 0.02075958251953125, 0.594146728515625, 2.485157012939453, 3.830047607421875, 1.4551372528076172, -1.0805397033691406, 2.2729454040527344, 1.2631912231445312, 2.5317134857177734, 1.1481075286865234], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000434.npy"} +{"epoch": 0.656084656084656, "step": 435, "batch_size": 64, "mean": 1.4080252647399902, "std": 2.026890516281128, "min": -2.9368896484375, "p10": -0.9880626678466796, "median": 1.3154783248901367, "p90": 3.8169502258300785, "max": 7.697536468505859, "pos_frac": 0.8125, "sample": [3.5597381591796875, 0.46142005920410156, 0.06509017944335938, 0.6736850738525391, -1.26275634765625, 0.6684951782226562, 1.9408721923828125, 0.31687355041503906, 2.66290283203125, -0.25431060791015625, 1.3535995483398438, 2.8788490295410156, -1.0360488891601562, 2.614574432373047, 3.4970836639404297, 4.186367034912109, 1.3441543579101562, -2.9368896484375, 0.16526031494140625, 4.620231628417969, 1.569580078125, 0.3039531707763672, 2.1049938201904297, 4.8439483642578125, 0.8413238525390625, 0.5523185729980469, -0.5923957824707031, 0.427032470703125, 3.8380508422851562, 2.7728042602539062, 1.10406494140625, 3.2874908447265625, -2.6423072814941406, 1.3593902587890625, 1.1991996765136719, 2.5451812744140625, 0.108734130859375, 2.563018798828125, 1.7472476959228516, 1.4127349853515625, 2.1996002197265625, 5.2957763671875, 7.697536468505859, 3.7677154541015625, 0.07695388793945312, 1.3984756469726562, 0.5690269470214844, 0.013916015625, -0.8760948181152344, 5.693023681640625, -1.5548553466796875, 1.5331039428710938, 1.2868022918701172, 0.13668060302734375, 1.9343948364257812, 1.2031974792480469, 3.5028610229492188, -1.262420654296875, -2.3623275756835938, 2.0675201416015625, -0.020725250244140625, 0.358184814453125, 3.3404159545898438, -0.750701904296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000435.npy"} +{"epoch": 0.6575963718820862, "step": 436, "batch_size": 64, "mean": 1.4969241619110107, "std": 2.0142571926116943, "min": -2.4546356201171875, "p10": -1.1089580535888672, "median": 1.4562616348266602, "p90": 4.467567825317383, "max": 5.861175537109375, "pos_frac": 0.765625, "sample": [1.2099075317382812, 1.1564979553222656, 4.712677001953125, 0.32125282287597656, 1.125711441040039, 0.8057174682617188, 1.2120513916015625, 3.6179580688476562, -1.0170822143554688, 1.2345352172851562, 2.1713523864746094, 3.489900588989258, 1.0585517883300781, 2.70367431640625, 1.0324897766113281, 4.1257476806640625, 5.861175537109375, 4.649955749511719, -2.0965728759765625, 4.501537322998047, 1.9639320373535156, 2.4855117797851562, -1.107086181640625, 1.7423171997070312, 1.8394851684570312, -1.0181045532226562, 5.2462615966796875, 0.7817478179931641, 2.4013824462890625, 4.076719284057617, 0.4749946594238281, -0.404449462890625, -2.2151432037353516, -0.93829345703125, 0.20995521545410156, 1.4998550415039062, 2.029083251953125, 0.17423248291015625, -0.36505126953125, -0.639923095703125, 2.5849609375, 1.5977020263671875, 2.220531463623047, -0.25946807861328125, -1.1627616882324219, 1.4239826202392578, -1.1449737548828125, 5.590850830078125, 1.4885406494140625, 4.7587890625, 2.280773162841797, 0.9010658264160156, 1.628326416015625, 1.5126495361328125, 0.19079208374023438, 3.238445281982422, 3.100004196166992, -1.3191375732421875, 2.1073455810546875, 0.459747314453125, -2.4546356201171875, 4.3883056640625, -1.1097602844238281, 3.6666030883789062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000436.npy"} +{"epoch": 0.6591080876795162, "step": 437, "batch_size": 64, "mean": 1.4886364936828613, "std": 2.755743980407715, "min": -5.3017425537109375, "p10": -1.5642585754394531, "median": 0.8312397003173828, "p90": 5.535349464416504, "max": 9.058181762695312, "pos_frac": 0.703125, "sample": [5.4041595458984375, 4.052452087402344, 2.280029296875, 1.9079818725585938, 0.2158203125, -1.6029853820800781, 4.793510437011719, 2.98480224609375, 9.058181762695312, 0.7294502258300781, 2.9628753662109375, 1.0380477905273438, -5.3017425537109375, 2.161529541015625, -0.3946533203125, -0.8878173828125, 3.57427978515625, 1.6692695617675781, 0.4539756774902344, -0.73236083984375, 6.021520614624023, 7.294677734375, 4.329319000244141, -1.6925621032714844, -1.4738960266113281, 1.5422744750976562, 2.564422607421875, -1.2753772735595703, -1.9279251098632812, -2.6742095947265625, -1.33416748046875, -0.7069282531738281, -0.459869384765625, -0.732391357421875, 1.7624034881591797, 5.761964797973633, 2.9740142822265625, -0.48477935791015625, 0.6384429931640625, 0.5755214691162109, 1.449951171875, 0.40773582458496094, -0.6491622924804688, 4.1923370361328125, 4.085044860839844, 3.666597366333008, 0.16327667236328125, 0.9239463806152344, 6.504539489746094, -1.9042587280273438, 0.09587669372558594, 5.591573715209961, 0.7385330200195312, 6.155555725097656, 2.599254608154297, 0.5982112884521484, 1.3955001831054688, 2.6811065673828125, -3.0735626220703125, 0.033008575439453125, -0.6306686401367188, 0.44974517822265625, 0.34539794921875, 4.383941650390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000437.npy"} +{"epoch": 0.6606198034769464, "step": 438, "batch_size": 64, "mean": 1.4018330574035645, "std": 1.985513687133789, "min": -2.8400325775146484, "p10": -0.9044551849365233, "median": 1.1741275787353516, "p90": 4.3783143997192395, "max": 5.428131103515625, "pos_frac": 0.75, "sample": [5.047874450683594, -0.7650909423828125, 5.428131103515625, -1.7413921356201172, 0.10033416748046875, -1.4109001159667969, -0.13779830932617188, 1.6732444763183594, 0.5224857330322266, 2.320037841796875, 1.8067588806152344, 4.714988708496094, -0.9297981262207031, -0.8453216552734375, 1.6989669799804688, 0.3201274871826172, 3.042348861694336, -2.8400325775146484, 1.978994369506836, 1.142547607421875, 3.909404754638672, 2.31768798828125, -0.6296615600585938, 3.1346588134765625, 2.7686519622802734, 1.9734516143798828, 1.797637939453125, -0.3255119323730469, 4.477643966674805, 5.2695159912109375, 1.98284912109375, -0.20880126953125, 3.2374420166015625, 0.9364757537841797, 0.34665870666503906, 3.0495681762695312, 0.9529571533203125, -1.5357913970947266, 0.7502956390380859, 2.2297439575195312, 3.7512950897216797, 1.0426177978515625, 1.3095417022705078, 3.222320556640625, 2.4671783447265625, 0.9763336181640625, 2.9379730224609375, 0.8655548095703125, 0.2340087890625, 0.3977088928222656, -0.6138687133789062, -0.023691177368164062, 1.6959114074707031, 4.843921661376953, 1.57000732421875, -1.7352104187011719, 5.1141204833984375, 0.6253662109375, -2.6219635009765625, 4.14654541015625, 0.6804656982421875, -0.5441246032714844, 1.2057075500488281, 0.6082077026367188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000438.npy"} +{"epoch": 0.6621315192743764, "step": 439, "batch_size": 64, "mean": 1.5173695087432861, "std": 1.9992190599441528, "min": -3.6185302734375, "p10": -0.8619056701660154, "median": 1.4781570434570312, "p90": 3.685220336914063, "max": 6.6800384521484375, "pos_frac": 0.796875, "sample": [1.0409622192382812, -0.018825531005859375, 0.7935771942138672, -1.1391754150390625, 2.7882442474365234, 1.739339828491211, 0.6390209197998047, 1.0381927490234375, 1.4928512573242188, 1.860198974609375, 5.598072052001953, -0.6239776611328125, 3.2686824798583984, 0.9446029663085938, 1.11474609375, 3.3636322021484375, 6.6800384521484375, -0.04184722900390625, 2.16204833984375, -1.0710906982421875, 1.0693893432617188, 2.588888168334961, 2.6862335205078125, -1.2099056243896484, 2.4364280700683594, 2.4548416137695312, 5.1943359375, -0.9638748168945312, 0.47237205505371094, 3.521007537841797, -0.5938243865966797, 3.7298660278320312, -0.46463775634765625, -0.135223388671875, 2.150339126586914, 3.3579177856445312, 0.47107696533203125, 1.3827133178710938, -3.6185302734375, 0.7494411468505859, -2.9508056640625, 2.8950042724609375, 4.138427734375, 6.152862548828125, 2.119943618774414, 3.5810470581054688, 1.5350341796875, 3.057403564453125, 0.0714111328125, 0.8355503082275391, 1.7167701721191406, 1.3256378173828125, 1.4634628295898438, 2.0915985107421875, 1.9340019226074219, 4.2890472412109375, 1.7197265625, 0.3799591064453125, -3.194732666015625, 0.38700103759765625, 1.8068275451660156, 2.408203125, 1.2193679809570312, 1.2207565307617188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000439.npy"} +{"epoch": 0.6636432350718064, "step": 440, "batch_size": 64, "mean": 1.744425654411316, "std": 2.122314691543579, "min": -2.707489013671875, "p10": -0.6567296981811523, "median": 1.3361434936523438, "p90": 4.97602653503418, "max": 6.9001007080078125, "pos_frac": 0.796875, "sample": [-0.6278705596923828, 2.4269866943359375, 4.692344665527344, 5.0074462890625, 5.116228103637695, 6.9001007080078125, 5.317256927490234, -0.13548851013183594, 4.902713775634766, -0.669097900390625, 0.1236419677734375, 6.496118545532227, -0.6884956359863281, 4.1623992919921875, -0.6051979064941406, -0.057834625244140625, 3.1641159057617188, 1.4008712768554688, -1.0485572814941406, 0.9996566772460938, 2.705951690673828, -0.15219497680664062, 0.47936248779296875, 3.1355514526367188, 2.6239967346191406, 2.627239227294922, 1.3703689575195312, 2.0765533447265625, 0.3727893829345703, 1.3019180297851562, 0.9402084350585938, 1.4746246337890625, 0.759857177734375, 0.741973876953125, 0.3420391082763672, 0.67950439453125, 1.5427322387695312, -2.707489013671875, 1.1618080139160156, 4.243509292602539, 1.8810310363769531, 2.734384536743164, 1.7826004028320312, 3.3848066329956055, -2.2340660095214844, 0.5395126342773438, 5.399452209472656, 1.194061279296875, 1.8255043029785156, 0.22855377197265625, 0.710662841796875, -0.9566211700439453, 3.2009735107421875, 0.30436134338378906, 1.5304622650146484, 0.32864952087402344, -0.5028343200683594, -0.7159023284912109, 3.3746719360351562, 0.9764480590820312, 0.7530479431152344, 5.89283561706543, 3.1619338989257812, 4.251068115234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000440.npy"} +{"epoch": 0.6651549508692366, "step": 441, "batch_size": 64, "mean": 1.786143183708191, "std": 1.8317595720291138, "min": -2.0777015686035156, "p10": -0.38713397979736325, "median": 1.2961673736572266, "p90": 4.6025630950927745, "max": 5.964000701904297, "pos_frac": 0.84375, "sample": [2.3558502197265625, 3.3635730743408203, 5.850433349609375, 2.9345779418945312, 0.4955615997314453, -0.17202377319335938, 3.751039505004883, 0.8494796752929688, 2.8036041259765625, 0.36716461181640625, -1.1280479431152344, 2.2250213623046875, 1.9439239501953125, 5.043693542480469, 0.7509899139404297, 1.28643798828125, 1.9841957092285156, -0.0401611328125, -2.0777015686035156, 5.212165832519531, 1.1257476806640625, 2.714221954345703, 0.931976318359375, 4.157051086425781, 1.0703582763671875, 5.833944320678711, 1.01507568359375, 1.280740737915039, 5.964000701904297, 0.872100830078125, -0.7394790649414062, 1.9121246337890625, -0.5573310852050781, 1.3058967590332031, 0.5883445739746094, -0.9642906188964844, 1.212921142578125, 3.8911190032958984, 4.781160354614258, 0.742706298828125, 0.966094970703125, 3.5115966796875, 0.51361083984375, 1.9284706115722656, 1.7832489013671875, 1.70892333984375, 2.3882827758789062, 0.44368553161621094, 0.5424957275390625, -0.40103912353515625, 3.8288726806640625, 0.5106563568115234, 4.67578125, 4.431720733642578, -0.3546886444091797, 2.0121383666992188, 0.8670730590820312, 1.6732940673828125, 1.6822967529296875, 1.2437515258789062, -0.8885288238525391, 3.2744064331054688, 1.8699722290039062, 1.1628780364990234], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000441.npy"} +{"epoch": 0.6666666666666666, "step": 442, "batch_size": 64, "mean": 1.418702483177185, "std": 2.0952389240264893, "min": -2.8916854858398438, "p10": -1.0279359817504883, "median": 1.2157907485961914, "p90": 4.107723236083985, "max": 8.036203384399414, "pos_frac": 0.75, "sample": [0.4397125244140625, 2.61187744140625, 2.0467071533203125, -0.23917388916015625, 0.4614524841308594, 6.8447723388671875, 4.924560546875, 3.5568885803222656, 1.2584037780761719, 2.716400146484375, 2.135570526123047, 3.5428543090820312, 5.5228424072265625, 0.669342041015625, 1.9010276794433594, 2.2233810424804688, -0.13959121704101562, 0.8856658935546875, 0.7986526489257812, -0.9798908233642578, 1.01025390625, 1.8697471618652344, 2.488372802734375, 1.4088058471679688, 2.0459747314453125, 4.186378479003906, -2.20526123046875, 1.3162002563476562, -0.8151760101318359, 3.9241943359375, 1.173177719116211, 0.6423110961914062, 0.7398910522460938, 1.541168212890625, 1.09759521484375, -0.20166015625, -0.49701690673828125, 0.9561576843261719, 8.036203384399414, 4.913417816162109, 1.4808673858642578, -1.0485267639160156, 0.35153961181640625, 1.6396903991699219, -1.122488021850586, -1.1613273620605469, -1.8993606567382812, 4.299324035644531, -1.92425537109375, -0.16259765625, 3.349578857421875, 2.3722457885742188, -2.8916854858398438, 1.4694595336914062, -0.8370208740234375, -0.5884532928466797, 1.045419692993164, 0.2283172607421875, 0.4847259521484375, 3.052478790283203, 2.2870635986328125, 3.215421676635742, 0.2518463134765625, 2.0925064086914062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000442.npy"} +{"epoch": 0.6681783824640968, "step": 443, "batch_size": 64, "mean": 1.946547269821167, "std": 2.488295793533325, "min": -3.633411407470703, "p10": -0.7216308593749998, "median": 1.6841182708740234, "p90": 5.139032554626465, "max": 9.076278686523438, "pos_frac": 0.828125, "sample": [0.190032958984375, 4.266975402832031, 3.208221435546875, 3.96759033203125, 0.3998565673828125, 5.0167694091796875, 2.14007568359375, -3.633411407470703, 1.1156158447265625, 0.09607696533203125, -0.783905029296875, -0.9418830871582031, 1.049966812133789, 1.652444839477539, 0.8422698974609375, 2.796051025390625, -0.1981658935546875, 2.2904129028320312, 2.4295654296875, 3.0501785278320312, 6.497222900390625, -3.0495223999023438, -1.4658355712890625, 4.010765075683594, 0.4533824920654297, 6.253746032714844, 9.076278686523438, 0.9811038970947266, 6.533851623535156, 5.2432403564453125, 2.9868011474609375, 1.8267326354980469, 1.3852119445800781, 2.8622474670410156, 3.39178466796875, 4.1184844970703125, 4.294925689697266, 3.3573226928710938, 4.645389556884766, 3.6137123107910156, 0.5589523315429688, 2.1880264282226562, 0.51116943359375, 2.5990219116210938, 5.191431045532227, 3.830738067626953, 0.90130615234375, 0.8113975524902344, 1.0679473876953125, -0.43306541442871094, -0.24560165405273438, -2.7892990112304688, 1.152313232421875, 1.3819808959960938, 0.2668266296386719, 0.4288749694824219, -2.8974761962890625, 2.293548583984375, 4.675666809082031, 1.7157917022705078, 0.21755027770996094, -0.576324462890625, 0.16686248779296875, 5.5897979736328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000443.npy"} +{"epoch": 0.6696900982615268, "step": 444, "batch_size": 64, "mean": 0.7623450756072998, "std": 2.309938907623291, "min": -4.743339538574219, "p10": -2.0441370010375977, "median": 0.4503774642944336, "p90": 3.905819892883301, "max": 7.095703125, "pos_frac": 0.625, "sample": [4.485897064208984, 0.7057228088378906, -0.6756744384765625, -0.05912971496582031, 1.943450927734375, 2.636554718017578, 1.9536685943603516, -4.041534423828125, -1.4684867858886719, 3.8540096282958984, 0.43364715576171875, 4.0274810791015625, -0.399505615234375, -2.1376190185546875, -0.3000335693359375, 2.619140625, 4.0259552001953125, 0.8475627899169922, 0.5779323577880859, -3.310272216796875, 3.259979248046875, 2.5297775268554688, 0.4425506591796875, 1.1132965087890625, -2.4417266845703125, 0.33376312255859375, 0.5403614044189453, -0.35578346252441406, -0.427215576171875, -1.6796112060546875, 3.9280242919921875, 0.4582042694091797, 2.1937484741210938, 0.9386062622070312, 1.0779266357421875, 2.4656829833984375, 0.053638458251953125, 1.9787788391113281, -0.4868621826171875, -3.4369049072265625, 0.42345428466796875, 7.095703125, -2.074695587158203, -0.8248329162597656, 0.3264179229736328, 2.857481002807617, 1.078216552734375, 6.909515380859375, 2.9868850708007812, 0.24117279052734375, 2.0251502990722656, -0.6349639892578125, 0.5680503845214844, -0.3555145263671875, 2.3947296142578125, -0.5174942016601562, -0.7104949951171875, 0.3982429504394531, 2.1604385375976562, -1.9728336334228516, -4.743339538574219, 3.9523468017578125, -0.9705162048339844, -0.02803802490234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000444.npy"} +{"epoch": 0.671201814058957, "step": 445, "batch_size": 64, "mean": 1.4694093465805054, "std": 2.159475564956665, "min": -2.9917755126953125, "p10": -1.0625158309936524, "median": 1.3101310729980469, "p90": 4.325223541259766, "max": 7.656715393066406, "pos_frac": 0.75, "sample": [2.4177322387695312, 4.112823486328125, 2.2971878051757812, -1.0671844482421875, -2.8621292114257812, 1.1315383911132812, 5.177501678466797, 1.5100555419921875, 1.9840087890625, 1.3199005126953125, -0.92181396484375, -1.0516223907470703, -0.42792510986328125, -0.68408203125, -2.41668701171875, 4.458032608032227, -1.0781402587890625, 0.6711654663085938, -0.2705230712890625, 0.4699134826660156, 0.1060791015625, 2.7546005249023438, -1.9654006958007812, -0.41925048828125, -2.9917755126953125, 2.7132415771484375, 1.0281486511230469, 4.023481369018555, 0.8634109497070312, 0.28212738037109375, -1.6188468933105469, -0.9645938873291016, 1.6803665161132812, 2.929790496826172, 7.656715393066406, 3.099700927734375, 2.2455825805664062, 4.495271682739258, 0.5903778076171875, 0.7403564453125, 1.9842453002929688, 4.416252136230469, 0.7951145172119141, 3.78369140625, 2.769186019897461, 3.242218017578125, 3.70892333984375, 0.10589790344238281, 2.425230026245117, -0.34665870666503906, 1.260833740234375, 1.2188949584960938, 0.0706787109375, 0.5045166015625, 2.6573944091796875, 3.1897430419921875, 5.639928817749023, 2.663921356201172, -0.3684215545654297, 1.6591720581054688, 1.3003616333007812, 2.142200469970703, 5.4351959228515625, 1.7645416259765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000445.npy"} +{"epoch": 0.672713529856387, "step": 446, "batch_size": 64, "mean": 2.1193606853485107, "std": 2.0408482551574707, "min": -1.762481689453125, "p10": -0.34830055236816404, "median": 1.845285415649414, "p90": 4.4722942352294925, "max": 8.991317749023438, "pos_frac": 0.859375, "sample": [0.4069862365722656, 0.07056427001953125, -1.1856765747070312, 2.032470703125, 1.1844863891601562, 3.6487274169921875, 0.6868247985839844, -1.762481689453125, 8.991317749023438, 5.7345123291015625, 1.7487659454345703, 1.4211959838867188, 1.8092269897460938, 2.174671173095703, 3.095804214477539, 7.002967834472656, 2.7247695922851562, 2.1901779174804688, 4.471778869628906, 3.0137405395507812, 0.1457653045654297, 2.91290283203125, 0.7696685791015625, 2.5859527587890625, 3.8416805267333984, 2.1867198944091797, 1.4397850036621094, 3.160266876220703, 3.76025390625, -0.9884567260742188, 3.924304962158203, 0.8303623199462891, -0.308868408203125, 1.6161422729492188, -0.3652000427246094, -0.7823944091796875, 0.8126430511474609, -0.8548870086669922, 3.2070465087890625, 0.9073715209960938, 1.7661285400390625, 6.2713165283203125, 3.260089874267578, 1.3289756774902344, 4.0196075439453125, 0.6140537261962891, 0.6467437744140625, 4.472515106201172, -0.7454166412353516, 1.627614974975586, 2.571636199951172, 0.5812568664550781, 2.6516571044921875, 3.8358421325683594, 2.775541305541992, 4.762407302856445, 1.8813438415527344, 2.047016143798828, 1.3094863891601562, 5.4075164794921875, -0.03930473327636719, 1.1775360107421875, 3.3868789672851562, 1.766754150390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000446.npy"} +{"epoch": 0.674225245653817, "step": 447, "batch_size": 64, "mean": 2.084477424621582, "std": 2.5167579650878906, "min": -7.748954772949219, "p10": -0.4261245727539061, "median": 2.110675811767578, "p90": 4.989966011047365, "max": 10.014030456542969, "pos_frac": 0.84375, "sample": [5.126579284667969, 2.6560592651367188, 2.7142887115478516, 0.4720916748046875, 0.9236221313476562, -1.35040283203125, 1.4807357788085938, -2.2501907348632812, -2.086944580078125, 1.3404693603515625, 1.5662307739257812, 0.2713165283203125, 1.4730300903320312, 5.309967041015625, 1.9504928588867188, 6.874916076660156, 1.2956581115722656, 2.3521728515625, 1.435098648071289, 1.6661453247070312, 2.2935714721679688, -0.1753387451171875, 2.202239990234375, -2.1566619873046875, 0.568878173828125, 4.405242919921875, 4.3774566650390625, 4.5119781494140625, 3.2235107421875, 3.39556884765625, 3.3532638549804688, 3.041910171508789, 1.8643550872802734, 2.619720458984375, 2.615001678466797, 2.288726806640625, 6.2521514892578125, 2.8710174560546875, 4.708051681518555, -0.4949951171875, 3.3582077026367188, 1.6871185302734375, 4.231781005859375, -0.2654266357421875, 0.7457122802734375, 0.335693359375, 0.46379852294921875, 5.36016845703125, 3.0911788940429688, 2.0246124267578125, -0.01660919189453125, 3.8594837188720703, -0.7318000793457031, 3.5549163818359375, 1.4484291076660156, 3.43359375, 2.1967391967773438, -7.748954772949219, 0.1521587371826172, 10.014030456542969, 5.110786437988281, 1.5203857421875, 2.708221435546875, 1.885354995727539], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000447.npy"} +{"epoch": 0.6757369614512472, "step": 448, "batch_size": 64, "mean": 1.6992576122283936, "std": 2.1200873851776123, "min": -2.8913707733154297, "p10": -0.7124692916870111, "median": 1.6550064086914062, "p90": 4.129475212097168, "max": 8.055896759033203, "pos_frac": 0.84375, "sample": [3.5746307373046875, -2.369729995727539, 4.138383865356445, 0.14044189453125, 3.1176681518554688, 0.936370849609375, 0.1860198974609375, 2.6290512084960938, 2.0560302734375, 0.444305419921875, 3.412750244140625, 2.483245849609375, 0.66876220703125, 1.0775146484375, 0.216033935546875, 3.6583023071289062, 0.03241539001464844, 4.1086883544921875, 3.0766677856445312, 0.6318836212158203, -0.1519317626953125, 8.055896759033203, 2.3267440795898438, 0.6038665771484375, 0.9441318511962891, -0.060009002685546875, 2.5390453338623047, 1.9836463928222656, 2.396209716796875, 4.323760986328125, -1.7600250244140625, 0.0337066650390625, 1.3350372314453125, 1.4764404296875, 0.12700653076171875, 2.8672256469726562, -1.1489944458007812, 2.2861499786376953, 4.9035491943359375, -1.6458320617675781, 2.723550796508789, -2.8913707733154297, 1.8716506958007812, 0.3247222900390625, 1.9493560791015625, 2.67523193359375, 1.8335723876953125, 3.9746322631835938, 2.2992305755615234, -0.13010597229003906, -0.9526996612548828, 0.97027587890625, 0.5888748168945312, 4.055686950683594, 0.1643524169921875, 5.41033935546875, 0.319580078125, -1.7568283081054688, 4.825643539428711, 2.449371337890625, 3.5927886962890625, 1.0654449462890625, 0.6135406494140625, 7.120582580566406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000448.npy"} +{"epoch": 0.6772486772486772, "step": 449, "batch_size": 64, "mean": 0.9630191326141357, "std": 1.7466691732406616, "min": -3.4180145263671875, "p10": -0.9418544769287108, "median": 1.0497989654541016, "p90": 2.8272148132324224, "max": 6.6576385498046875, "pos_frac": 0.71875, "sample": [-0.12088203430175781, 0.749053955078125, -3.4180145263671875, 3.1730194091796875, -0.10581207275390625, -0.820343017578125, 1.5925331115722656, 1.2620162963867188, -0.8139419555664062, 2.2499542236328125, 1.0635833740234375, 0.7007904052734375, 1.6794166564941406, -0.06610679626464844, 0.4351348876953125, 3.4265289306640625, 5.79052734375, 1.2752971649169922, 0.3983421325683594, 1.0740776062011719, 1.1991081237792969, 1.7818279266357422, 4.04156494140625, 3.959339141845703, -0.9939308166503906, 1.4709434509277344, 1.9346923828125, 1.8103790283203125, 1.4468002319335938, 0.8181991577148438, 2.8995208740234375, 0.1468963623046875, 0.36299896240234375, -2.722867965698242, 2.3324661254882812, 0.835418701171875, 6.6576385498046875, -0.34732818603515625, 1.5643730163574219, 1.5759048461914062, 0.2054424285888672, 0.9777069091796875, -0.17484664916992188, 2.5954132080078125, -0.1641998291015625, 1.5658302307128906, -0.4701824188232422, 1.33837890625, 1.0360145568847656, 0.3184814453125, 2.3580169677734375, -1.60260009765625, -0.25787353515625, 0.7759208679199219, -2.0831451416015625, 1.31280517578125, 2.6585006713867188, -2.0936508178710938, 1.9358482360839844, 1.1547603607177734, 1.2117843627929688, -1.1942291259765625, 0.22121238708496094, -0.29128265380859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000449.npy"} +{"epoch": 0.6787603930461074, "step": 450, "batch_size": 64, "mean": 1.3581254482269287, "std": 2.1181087493896484, "min": -2.739664077758789, "p10": -0.9499786376953125, "median": 0.9300823211669922, "p90": 4.2313476562500005, "max": 7.5927886962890625, "pos_frac": 0.734375, "sample": [0.4995555877685547, -0.20080947875976562, -1.1555824279785156, 0.24658203125, 4.081512451171875, 2.82818603515625, 1.1572418212890625, 0.573883056640625, 3.334930419921875, 5.5459136962890625, 2.6460189819335938, 0.7780494689941406, 3.208406448364258, 0.6344375610351562, -0.017602920532226562, 5.0489349365234375, -0.4034576416015625, 0.5022239685058594, 1.8949508666992188, 7.5927886962890625, -0.8475570678710938, 1.9093742370605469, 1.3934326171875, -0.43433380126953125, -0.9413909912109375, -1.3366279602050781, 5.169166564941406, 4.8770751953125, 1.4378509521484375, -1.3775615692138672, 0.9871673583984375, 3.574371337890625, 0.7337360382080078, 2.5278377532958984, -2.739664077758789, 3.6934585571289062, 2.4091567993164062, -0.2495250701904297, 3.91748046875, 1.1179656982421875, 1.3079795837402344, 5.8394622802734375, 0.645538330078125, -2.6756153106689453, 0.6928787231445312, 0.8729972839355469, 1.1607208251953125, 1.3061752319335938, 0.54229736328125, -0.3715858459472656, 0.47296714782714844, -0.9536590576171875, 0.9988346099853516, 1.3334369659423828, 4.295562744140625, 1.9268875122070312, -0.4575347900390625, 2.8814525604248047, 0.3203620910644531, 3.8309288024902344, -0.3270835876464844, 0.20825958251953125, -2.015230178833008, 0.46642303466796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000450.npy"} +{"epoch": 0.6802721088435374, "step": 451, "batch_size": 64, "mean": 1.251347541809082, "std": 2.142186164855957, "min": -3.348541259765625, "p10": -1.101588439941406, "median": 1.1685914993286133, "p90": 3.920891571044922, "max": 6.19146728515625, "pos_frac": 0.71875, "sample": [3.7438316345214844, 0.3110809326171875, 2.2157363891601562, 1.1583404541015625, 2.288360595703125, -1.1801910400390625, -2.3601303100585938, 1.823028564453125, 1.178842544555664, 3.7757568359375, -1.47906494140625, 1.725250244140625, 3.2008132934570312, 0.6480255126953125, 1.8819923400878906, 1.2114105224609375, 1.2550430297851562, 1.8474578857421875, 0.24387741088867188, 2.809255599975586, 0.5737705230712891, -2.5680885314941406, -0.12711334228515625, 6.19146728515625, 2.633333206176758, -2.8036041259765625, 1.2362823486328125, -3.348541259765625, -2.9927215576171875, -0.1728191375732422, 1.3393993377685547, 3.652740478515625, -0.7282371520996094, -0.2252025604248047, 2.143260955810547, 0.48683929443359375, 4.605510711669922, 3.8116607666015625, 0.3784809112548828, 2.2216262817382812, 0.10518836975097656, 3.9677047729492188, 0.18323898315429688, -0.7075653076171875, 1.244363784790039, 0.505096435546875, 1.9399833679199219, 6.1169891357421875, 5.2093505859375, 0.9242019653320312, 3.1611328125, 0.3567962646484375, 4.06353759765625, -0.6197662353515625, -0.918182373046875, 1.0916519165039062, 3.3540096282958984, 0.7087020874023438, -0.3449592590332031, 3.566509246826172, 4.877874374389648, -0.6054344177246094, -0.59796142578125, -0.1029815673828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000451.npy"} +{"epoch": 0.6817838246409675, "step": 452, "batch_size": 64, "mean": 1.4862233400344849, "std": 2.030317783355713, "min": -3.3724517822265625, "p10": -0.8308303833007811, "median": 1.3602895736694336, "p90": 4.2221611022949235, "max": 5.8158416748046875, "pos_frac": 0.75, "sample": [1.5961990356445312, 0.37882232666015625, 1.361593246459961, 4.3807830810546875, 0.46256256103515625, 3.6322498321533203, 1.8120841979980469, 3.7969131469726562, 2.5892295837402344, 5.174995422363281, -0.10666275024414062, 4.9571075439453125, -3.3724517822265625, 3.8520431518554688, 3.0996170043945312, 0.734344482421875, 5.7283477783203125, 1.8901214599609375, 1.8513545989990234, 3.244903564453125, 1.5217971801757812, -0.46714019775390625, 1.212646484375, 1.1958160400390625, 0.0060253143310546875, -0.5499324798583984, 1.9378166198730469, 1.6851348876953125, 0.5708160400390625, 0.07241058349609375, -0.25739479064941406, 5.8158416748046875, -1.3696784973144531, 3.280773162841797, -0.012760162353515625, 3.8263778686523438, 2.113811492919922, 1.8653793334960938, -0.47283935546875, 0.41856956481933594, 1.0681419372558594, -1.7466583251953125, 3.7489700317382812, 1.2553672790527344, 2.8399810791015625, 0.6539974212646484, 4.3987274169921875, 3.6807937622070312, 2.1200027465820312, -0.37628173828125, 0.8494033813476562, -1.1412773132324219, 1.484354019165039, 2.5094146728515625, -0.7099037170410156, -0.8826560974121094, 0.7908859252929688, 1.3589859008789062, 2.6205272674560547, -1.3607254028320312, -0.11679840087890625, 4.755775451660156, -3.2636642456054688, 1.1232986450195312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000452.npy"} +{"epoch": 0.6832955404383976, "step": 453, "batch_size": 64, "mean": 1.3545395135879517, "std": 2.293470621109009, "min": -2.9840087890625, "p10": -1.0824085235595702, "median": 0.7776575088500977, "p90": 4.642979240417481, "max": 6.480682373046875, "pos_frac": 0.671875, "sample": [1.8921737670898438, 3.057239532470703, -0.9401321411132812, -0.4846458435058594, 6.3917083740234375, -0.0169677734375, 0.18886184692382812, 1.4931678771972656, 3.713958740234375, -1.89788818359375, -2.9840087890625, 0.9487571716308594, 2.827880859375, -1.0394439697265625, 4.659217834472656, 1.8033294677734375, 4.3785247802734375, -1.0993423461914062, 2.2524337768554688, 1.9114723205566406, 0.3330879211425781, -0.9683761596679688, 0.497100830078125, -0.1580810546875, 0.0203857421875, -0.1390228271484375, -1.6339797973632812, 4.60508918762207, -1.0428962707519531, 1.599111557006836, 5.989166259765625, 0.5624027252197266, 1.7739887237548828, -1.6008682250976562, 0.3316230773925781, 0.8692798614501953, 4.191404342651367, 1.908050537109375, -0.40380859375, 6.0995941162109375, 0.4193878173828125, 5.271080017089844, -0.7424163818359375, -1.337310791015625, -0.129425048828125, 1.2024459838867188, -0.8517303466796875, -0.4629096984863281, 0.5737648010253906, 1.96295166015625, 3.7986221313476562, 0.21194076538085938, 0.9293251037597656, 3.254453659057617, 1.1639175415039062, -1.48748779296875, -0.4363059997558594, 6.480682373046875, 3.4841842651367188, 0.114227294921875, 3.523284912109375, 4.001855850219727, 5.170402526855469, 0.68603515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000453.npy"} +{"epoch": 0.6848072562358276, "step": 454, "batch_size": 64, "mean": 1.7468867301940918, "std": 2.326810836791992, "min": -4.945465087890625, "p10": -0.5289897918701172, "median": 1.3758811950683594, "p90": 4.994063568115235, "max": 6.845386505126953, "pos_frac": 0.765625, "sample": [4.339506149291992, -1.8311138153076172, -0.13274383544921875, 3.0492782592773438, -4.945465087890625, -0.3909149169921875, 2.2052688598632812, -0.492218017578125, 0.9562644958496094, 1.4554214477539062, 0.7712211608886719, 3.901844024658203, 2.336273193359375, -0.15811920166015625, 3.2829132080078125, 2.1628646850585938, 3.4281654357910156, 4.419822692871094, 3.36029052734375, 5.226959228515625, 0.07519149780273438, -1.9528579711914062, -0.4556446075439453, 4.4393157958984375, 4.791297912597656, 5.748321533203125, -1.8933868408203125, 0.790191650390625, 2.3131637573242188, 0.1908721923828125, -0.520111083984375, 2.946319580078125, 4.263496398925781, -0.5327949523925781, 1.5898780822753906, 0.9211483001708984, 5.609123229980469, -0.47223472595214844, 6.845386505126953, 0.9528217315673828, 0.4043312072753906, -0.02425384521484375, 1.0110435485839844, 0.3670072555541992, 0.6605815887451172, 0.87451171875, 4.650932312011719, 3.2738494873046875, 0.8992938995361328, 2.0092811584472656, 1.2963409423828125, 1.8251113891601562, 3.8478736877441406, 6.246061325073242, 1.8386754989624023, -1.4238815307617188, 1.0937175750732422, 5.080963134765625, 2.8615570068359375, 5.166423797607422, 1.8440589904785156, 0.07141494750976562, 0.6259689331054688, -1.2951316833496094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000454.npy"} +{"epoch": 0.6863189720332578, "step": 455, "batch_size": 64, "mean": 1.6916615962982178, "std": 2.1701319217681885, "min": -3.2919349670410156, "p10": -0.6504756927490233, "median": 1.4846916198730469, "p90": 4.513197326660157, "max": 8.052947998046875, "pos_frac": 0.734375, "sample": [3.78643798828125, 2.2899169921875, 2.842357635498047, 3.001983642578125, 1.2821731567382812, -0.2959728240966797, 5.0071258544921875, 4.698772430419922, 0.0709075927734375, 1.0152511596679688, 0.48693275451660156, -0.47992706298828125, 1.7317085266113281, 0.9438743591308594, 0.832305908203125, 7.7038421630859375, 1.6555404663085938, 0.019134521484375, 2.1983413696289062, -0.0815887451171875, 4.174247741699219, 0.9862918853759766, 2.3620834350585938, -0.3509693145751953, -0.0173187255859375, 4.1175537109375, 1.0392913818359375, 4.130882263183594, 1.1568374633789062, -0.29561614990234375, 2.3801193237304688, 0.44994544982910156, 4.53662109375, 1.4278182983398438, 1.0328350067138672, -0.7235679626464844, -3.2919349670410156, 2.227567672729492, 3.9357681274414062, 4.4585418701171875, -1.5373611450195312, -1.0496292114257812, 1.661651611328125, -1.9289703369140625, 2.0132389068603516, 4.9242706298828125, 1.54156494140625, -0.00102996826171875, 2.0970687866210938, -0.2169952392578125, 8.052947998046875, 2.2001266479492188, -1.1331520080566406, 3.434152603149414, -0.85052490234375, 0.8917083740234375, 3.852275848388672, 1.5434341430664062, -0.095947265625, 1.1018314361572266, -0.15409088134765625, 1.8281211853027344, 2.4582786560058594, 5.187255859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000455.npy"} +{"epoch": 0.6878306878306878, "step": 456, "batch_size": 64, "mean": 1.507042646408081, "std": 1.9617642164230347, "min": -3.9775238037109375, "p10": -0.6913459777832031, "median": 1.3410224914550781, "p90": 3.7332290649414066, "max": 6.577770233154297, "pos_frac": 0.78125, "sample": [3.616943359375, -0.6040115356445312, -0.08975982666015625, 1.8352279663085938, 5.2159423828125, -0.5067291259765625, -0.4971809387207031, 2.224414825439453, -0.1553192138671875, 6.577770233154297, 1.987518310546875, 1.1713676452636719, 1.9884796142578125, 6.123313903808594, 3.4442291259765625, 1.3934783935546875, -1.9127655029296875, 2.3976669311523438, 0.8993721008300781, 1.2885665893554688, -0.7872467041015625, 1.016815185546875, 1.8700790405273438, 0.8574028015136719, 2.709897994995117, 0.31322288513183594, -1.8703079223632812, -0.427001953125, 1.4577102661132812, -0.7287750244140625, 2.434673309326172, 0.5366363525390625, 0.9315967559814453, 0.666778564453125, 2.0295257568359375, 2.9698410034179688, 0.7436065673828125, -1.0222396850585938, 2.5954513549804688, 0.28478431701660156, 0.2777862548828125, 1.1179008483886719, -0.008970260620117188, 3.094146728515625, 0.1491680145263672, -3.9775238037109375, 5.067028045654297, 1.8037452697753906, -0.8508834838867188, 0.16321182250976562, 3.7830657958984375, 4.809856414794922, 0.7793807983398438, 3.2504730224609375, 2.1098098754882812, 3.4557838439941406, 3.1509246826171875, 0.7235031127929688, 2.346019744873047, 5.117835998535156, 1.600442886352539, 1.0121078491210938, 1.6917877197265625, 2.8031539916992188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000456.npy"} +{"epoch": 0.6893424036281179, "step": 457, "batch_size": 64, "mean": 1.2110450267791748, "std": 1.8252229690551758, "min": -2.9617462158203125, "p10": -0.8312353134155274, "median": 1.0589408874511719, "p90": 3.4765052795410165, "max": 6.44915771484375, "pos_frac": 0.71875, "sample": [0.275390625, -1.32952880859375, 0.1793193817138672, 2.3360366821289062, 1.431814193725586, -0.7319450378417969, 1.5157623291015625, 0.216339111328125, -0.7481117248535156, 0.675262451171875, 0.4056510925292969, 5.5499267578125, 1.8155632019042969, 2.6561126708984375, -1.1676502227783203, 0.23049163818359375, 2.665874481201172, 2.3388290405273438, 0.3858013153076172, 3.2136154174804688, 2.4163665771484375, 1.1191787719726562, 4.454608917236328, -1.70166015625, 0.3217124938964844, 2.6285343170166016, -0.8445911407470703, 2.13275146484375, -2.9617462158203125, -0.09798812866210938, -1.7853164672851562, -0.8000717163085938, 1.0805587768554688, 2.1299972534179688, 1.0053443908691406, 1.037322998046875, -0.0272064208984375, 3.1932830810546875, -1.0499801635742188, 0.9335060119628906, 3.791881561279297, -0.7504348754882812, 2.2651309967041016, 4.7160491943359375, -0.5988731384277344, -0.113128662109375, 3.58917236328125, -0.2733325958251953, 2.012645721435547, 2.7588977813720703, 3.840343475341797, 0.7654514312744141, 6.44915771484375, -0.3828086853027344, 2.174306869506836, 2.814088821411133, 1.0922164916992188, 0.9468307495117188, 1.2025928497314453, 0.07160568237304688, 2.4697418212890625, 1.4617881774902344, 2.1290035247802734, -0.024602890014648438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000457.npy"} +{"epoch": 0.690854119425548, "step": 458, "batch_size": 64, "mean": 1.7431399822235107, "std": 2.147977590560913, "min": -4.973846435546875, "p10": -0.5054288864135742, "median": 1.7498207092285156, "p90": 4.1565589904785165, "max": 6.636577606201172, "pos_frac": 0.796875, "sample": [1.0642356872558594, 2.3531723022460938, 0.33228492736816406, 3.9334869384765625, 1.3248405456542969, 5.576343536376953, -0.06261444091796875, -4.973846435546875, 0.05522346496582031, -0.052577972412109375, -0.48687171936035156, -0.5133819580078125, -2.7842559814453125, 6.1374969482421875, 3.6537933349609375, 0.7935600280761719, 2.167938232421875, 3.1743316650390625, 1.6282424926757812, 3.1627197265625, 1.5508441925048828, 3.5112228393554688, 1.9554061889648438, -1.1878585815429688, 2.8709716796875, 2.8187255859375, 3.1078643798828125, 2.26190185546875, 1.256500244140625, 1.7446746826171875, 3.5863037109375, 0.7279224395751953, 1.0155448913574219, -0.9092025756835938, 0.910308837890625, -0.3917579650878906, 2.07342529296875, 1.7549667358398438, 3.8578338623046875, 6.636577606201172, 1.8034343719482422, 0.5852165222167969, -2.5286636352539062, 0.22948646545410156, 4.29302978515625, 1.1268749237060547, 1.4136104583740234, -0.12104988098144531, 5.066591262817383, 4.0345001220703125, -1.3175296783447266, 3.0690765380859375, 2.9080810546875, 2.422119140625, -0.1495380401611328, 5.892721176147461, 0.44654083251953125, 3.987110137939453, 0.5974578857421875, 2.0508880615234375, 1.3649864196777344, 2.277374267578125, 4.208869934082031, 2.2634735107421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000458.npy"} +{"epoch": 0.6923658352229781, "step": 459, "batch_size": 64, "mean": 1.8602561950683594, "std": 2.2425215244293213, "min": -1.8162078857421875, "p10": -1.0073205947875974, "median": 1.5994758605957031, "p90": 4.856965637207032, "max": 8.642234802246094, "pos_frac": 0.828125, "sample": [1.5436172485351562, 3.127941131591797, -1.8162078857421875, 1.0769500732421875, -1.5136604309082031, 0.964508056640625, 2.011495590209961, 3.148468017578125, 0.256988525390625, 1.740762710571289, 5.464111328125, -0.29685211181640625, 3.397113800048828, -1.1168708801269531, 3.7289962768554688, 7.562591552734375, 1.0251007080078125, 3.300628662109375, 2.1705856323242188, 1.9211502075195312, 2.3456039428710938, 1.8765411376953125, 1.17974853515625, 1.1505584716796875, 0.4027729034423828, 3.336669921875, 0.0586395263671875, 1.65533447265625, 1.9990177154541016, 4.227695465087891, 5.2018280029296875, 0.10544586181640625, -0.7517032623291016, 1.2222480773925781, -0.32515716552734375, 1.6682357788085938, 0.6710624694824219, -0.7140884399414062, 4.7034454345703125, -1.2405052185058594, 0.9072952270507812, 3.6771697998046875, 2.2105941772460938, 0.5716495513916016, 5.0176544189453125, 0.27753448486328125, 3.9067306518554688, -1.5999183654785156, -1.68646240234375, 5.099094390869141, 0.18864822387695312, 1.0553321838378906, -1.37677001953125, 4.236289978027344, 2.2149810791015625, 0.2532196044921875, 2.7203445434570312, 0.5357208251953125, 4.694709777832031, 4.922760009765625, 4.458049774169922, 0.2731361389160156, 8.642234802246094, 1.3855819702148438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000459.npy"} +{"epoch": 0.6938775510204082, "step": 460, "batch_size": 64, "mean": 1.9620935916900635, "std": 2.3397936820983887, "min": -1.5915756225585938, "p10": -0.522845458984375, "median": 1.4681282043457031, "p90": 5.392320060729981, "max": 8.502792358398438, "pos_frac": 0.8125, "sample": [4.274261474609375, 3.2164993286132812, 8.502792358398438, -0.9179840087890625, 0.1652069091796875, 6.750003814697266, 7.5001678466796875, 1.5252151489257812, 3.5674285888671875, -0.357452392578125, 0.16892242431640625, 1.8003425598144531, 0.6538619995117188, 2.077716827392578, 0.03054046630859375, 0.5859146118164062, 1.7136421203613281, 1.6897296905517578, -0.6296844482421875, 6.246501922607422, 0.7451744079589844, 5.148506164550781, 1.7168197631835938, 2.63043212890625, 6.2840118408203125, 3.3775787353515625, 0.6901969909667969, 0.4404449462890625, 3.468090057373047, 0.8319931030273438, 0.344146728515625, 6.011348724365234, 3.4379730224609375, 0.4812774658203125, 0.37424278259277344, 1.7420196533203125, 1.8633403778076172, -0.12232208251953125, 0.963104248046875, 1.1241779327392578, 5.181028366088867, 3.6591110229492188, -0.42766571044921875, 0.08849525451660156, 3.418987274169922, 3.8425140380859375, -0.5636367797851562, -1.0770721435546875, 0.5429534912109375, -1.5915756225585938, -0.28882598876953125, 1.411041259765625, 3.948699951171875, 0.25811195373535156, -0.7171096801757812, 1.7191162109375, -0.10443878173828125, 5.40484619140625, -0.8638420104980469, 0.1259002685546875, 3.15008544921875, 2.01544189453125, 0.9625396728515625, 5.363092422485352], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000460.npy"} +{"epoch": 0.6953892668178382, "step": 461, "batch_size": 64, "mean": 1.7409393787384033, "std": 2.18622088432312, "min": -2.640167236328125, "p10": -0.5816675186157226, "median": 1.6697978973388672, "p90": 4.569816398620606, "max": 7.110588073730469, "pos_frac": 0.734375, "sample": [2.2791824340820312, 2.110546112060547, 1.4107131958007812, 1.484588623046875, 5.379444122314453, 3.6541290283203125, 2.7525863647460938, 2.5499744415283203, 3.6623306274414062, -0.3059062957763672, -2.640167236328125, 3.7659225463867188, 1.6731109619140625, 1.1332569122314453, 2.4635982513427734, 0.0890045166015625, -0.3394317626953125, 4.358346939086914, 0.3019237518310547, 1.6772346496582031, 1.6664848327636719, 3.5310020446777344, 3.4639968872070312, 2.01263427734375, 0.8283348083496094, 4.297185897827148, -0.9423599243164062, 0.9158897399902344, -1.17333984375, 0.7678852081298828, 1.96392822265625, -1.4309921264648438, -2.5090484619140625, 1.7118244171142578, 1.56890869140625, 2.13623046875, 5.086669921875, -0.38510894775390625, 0.5070877075195312, 6.3611907958984375, 3.50213623046875, 6.171010971069336, 2.5810394287109375, 1.178680419921875, 0.4826087951660156, -0.5053596496582031, 0.9652824401855469, -0.5806541442871094, 2.9222469329833984, 2.4000473022460938, 3.72454833984375, 2.7684803009033203, 0.021875381469726562, -0.07237625122070312, 4.6604461669921875, -0.0792999267578125, 7.110588073730469, -0.4822731018066406, -0.5821018218994141, -1.4251689910888672, 4.115779876708984, -0.5718994140625, 5.613639831542969, -0.33795166015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000461.npy"} +{"epoch": 0.6969009826152683, "step": 462, "batch_size": 64, "mean": 1.9741871356964111, "std": 2.1197919845581055, "min": -2.490509033203125, "p10": -0.2513904571533203, "median": 1.6350936889648438, "p90": 4.191741180419922, "max": 9.666778564453125, "pos_frac": 0.859375, "sample": [5.287481307983398, 4.294033050537109, 4.114950180053711, 2.342742919921875, -0.3958892822265625, 0.8183212280273438, 2.2646255493164062, 2.3115577697753906, 0.671539306640625, 0.4613151550292969, -0.258880615234375, 6.325206756591797, 4.204826354980469, 1.5737991333007812, 1.4670562744140625, 4.159648895263672, 0.3829345703125, 2.872100830078125, 1.4343147277832031, 2.0293807983398438, 1.4631690979003906, 1.44036865234375, 7.5294952392578125, 1.3748626708984375, 1.5307769775390625, 1.0543479919433594, 1.7962570190429688, 2.00665283203125, -0.13863754272460938, 3.465423583984375, 1.3962860107421875, -0.512786865234375, 2.2657470703125, 3.119039535522461, 4.352104187011719, -2.03643798828125, -0.23391342163085938, 3.9505615234375, 2.419584274291992, 1.3317222595214844, 2.4263229370117188, 2.89971923828125, 2.643352508544922, 0.6217422485351562, 0.812164306640625, 2.9580116271972656, 2.4581069946289062, 1.1281242370605469, 3.461864471435547, 2.1592464447021484, 0.5377349853515625, -2.490509033203125, 4.020545959472656, 3.541116714477539, 4.1612091064453125, 0.5615921020507812, 0.7843303680419922, 9.666778564453125, 1.6963882446289062, 0.24061965942382812, -2.392831802368164, -0.9928016662597656, 0.3817253112792969, 1.127737045288086], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000462.npy"} +{"epoch": 0.6984126984126984, "step": 463, "batch_size": 64, "mean": 1.5237159729003906, "std": 1.9406721591949463, "min": -1.8279228210449219, "p10": -0.7266281127929686, "median": 1.056661605834961, "p90": 4.1991975784301765, "max": 6.641639709472656, "pos_frac": 0.734375, "sample": [-0.5055198669433594, 3.0793991088867188, 1.8151473999023438, -1.5179290771484375, 3.5708084106445312, 2.4083690643310547, 0.1333789825439453, 4.610054016113281, 3.059772491455078, 2.157978057861328, -0.3382530212402344, 2.0453948974609375, 0.7305049896240234, 4.361656188964844, 4.558982849121094, -0.79913330078125, -0.36148834228515625, -0.43792724609375, -0.10521888732910156, 0.8037948608398438, -0.8002109527587891, 0.163665771484375, 0.318695068359375, 2.902202606201172, 2.889202117919922, 3.5233154296875, 1.0291213989257812, 3.7074966430664062, -1.6997451782226562, -1.0620555877685547, 0.6972808837890625, -0.06949996948242188, 3.185943603515625, 4.278715133666992, 0.602508544921875, 2.5389251708984375, 1.2600860595703125, 0.698333740234375, 0.9384078979492188, 3.84124755859375, 1.0842018127441406, 0.6203689575195312, 0.009490966796875, -0.5574493408203125, -0.2690410614013672, -1.3451728820800781, 6.641639709472656, 1.2394485473632812, 4.319732666015625, 4.0136566162109375, 2.982086181640625, -0.2635955810546875, 1.5946311950683594, 0.09820175170898438, 0.1155242919921875, 3.777618408203125, 4.49519157409668, 3.451873779296875, 2.462158203125, 0.5156135559082031, -1.8279228210449219, 2.4836273193359375, -0.08076286315917969, 3.7432899475097656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000463.npy"} +{"epoch": 0.6999244142101285, "step": 464, "batch_size": 64, "mean": 1.3953003883361816, "std": 2.3356926441192627, "min": -4.15625, "p10": -1.5909805297851562, "median": 1.3533172607421875, "p90": 4.426829147338868, "max": 6.8247528076171875, "pos_frac": 0.703125, "sample": [-1.7691802978515625, 0.8790435791015625, 1.3299713134765625, -2.5218582153320312, 4.567779541015625, -0.842254638671875, -1.5644378662109375, 4.3141937255859375, 1.5341072082519531, 1.004486083984375, 3.463470458984375, -1.008209228515625, 5.791404724121094, 1.3865299224853516, 1.5832977294921875, -1.60235595703125, 1.98565673828125, -0.2504920959472656, 1.7628860473632812, 2.4519195556640625, 1.98773193359375, 6.8247528076171875, -1.6693878173828125, 2.2987442016601562, -0.04143524169921875, 4.050048828125, 1.0114822387695312, 2.9159774780273438, 2.6078109741210938, 1.5452957153320312, 1.2599945068359375, 0.45989227294921875, 3.6145782470703125, 5.9851531982421875, -4.15625, 1.1205902099609375, 3.940288543701172, 1.8671035766601562, -2.3797149658203125, -0.49359130859375, 4.1689300537109375, 0.46193504333496094, 1.4391326904296875, 4.020729064941406, 0.460662841796875, 1.3766632080078125, 0.5996017456054688, 0.6163921356201172, -1.4357757568359375, 4.6258087158203125, 2.407510757446289, 3.7548904418945312, -3.3619384765625, -0.7073020935058594, -0.29334259033203125, -0.9701347351074219, 0.9510879516601562, 4.475101470947266, 1.2187671661376953, 5.4820098876953125, -0.1467151641845703, 1.9231586456298828, 3.1787490844726562, -0.19171905517578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000464.npy"} +{"epoch": 0.7014361300075586, "step": 465, "batch_size": 64, "mean": 1.4749336242675781, "std": 2.2719950675964355, "min": -2.4767799377441406, "p10": -1.5286901473999024, "median": 1.1440353393554688, "p90": 4.606141662597658, "max": 6.6609649658203125, "pos_frac": 0.75, "sample": [-2.1549224853515625, 4.32391357421875, 1.3696670532226562, 0.51873779296875, 1.913055419921875, 3.2449188232421875, 0.001102447509765625, 6.4877166748046875, -1.5415802001953125, 2.4714889526367188, 0.2213287353515625, 3.6112289428710938, 3.590423583984375, 5.061126708984375, -1.4986133575439453, 3.6782073974609375, -1.1252288818359375, 3.3175010681152344, 2.765960693359375, 0.7295455932617188, 0.9152297973632812, -2.0286026000976562, 2.0233707427978516, 4.7270965576171875, 3.5440216064453125, 3.9459609985351562, -0.046894073486328125, -2.3181495666503906, -1.2217864990234375, 1.1233444213867188, 1.2269210815429688, -2.3981246948242188, 0.308074951171875, 2.4381484985351562, 2.0253753662109375, 3.1550750732421875, -1.0198688507080078, -0.1272411346435547, -0.7881145477294922, 1.0928268432617188, -2.3932418823242188, 0.9485015869140625, 1.0954742431640625, 2.6915664672851562, -0.7346343994140625, -2.4767799377441406, 0.5340156555175781, 1.2085304260253906, 3.2188453674316406, 5.474273681640625, 0.6468353271484375, 5.077301025390625, 1.0793075561523438, 5.673851013183594, 0.8412857055664062, -0.19106674194335938, 0.22599411010742188, 1.1078529357910156, 1.1647262573242188, 1.8515586853027344, 6.6609649658203125, 1.180938720703125, 2.9101943969726562, 3.0372161865234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000465.npy"} +{"epoch": 0.7029478458049887, "step": 466, "batch_size": 64, "mean": 1.096935749053955, "std": 1.8046488761901855, "min": -2.8179931640625, "p10": -0.8743089675903317, "median": 0.845036506652832, "p90": 3.4836605072021487, "max": 5.8575439453125, "pos_frac": 0.71875, "sample": [-0.36285400390625, 1.8165435791015625, -1.76556396484375, -0.472412109375, 1.1395034790039062, 0.41205787658691406, -0.1077728271484375, 0.15567398071289062, 0.9176483154296875, -2.8179931640625, 0.4553680419921875, 1.4956588745117188, 3.6414947509765625, -0.5034713745117188, 0.7840652465820312, -2.230438232421875, 0.8169517517089844, 5.373077392578125, 1.4900054931640625, 5.093452453613281, 2.7177772521972656, 1.6929645538330078, 3.5108985900878906, -0.3427448272705078, 1.486328125, 1.2105712890625, 2.4587783813476562, -0.6152629852294922, 5.8575439453125, 1.5047531127929688, 0.48097801208496094, -0.3974761962890625, -0.9853286743164062, 0.7484054565429688, 1.7625617980957031, 1.6746139526367188, 3.3025970458984375, 0.6729087829589844, -1.2429237365722656, 1.0872573852539062, -0.45160675048828125, 0.43988037109375, 0.9422206878662109, -1.6415252685546875, 3.42010498046875, 0.6730308532714844, -0.564300537109375, 2.3838653564453125, 5.4365386962890625, 0.6997337341308594, -1.3791732788085938, 4.149078369140625, -0.13713836669921875, 2.3451995849609375, 0.35338592529296875, -0.04558372497558594, 1.5657787322998047, 0.8731212615966797, 0.5853519439697266, 2.2679100036621094, 2.283079147338867, 1.4397506713867188, 2.1647262573242188, 0.48426055908203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000466.npy"} +{"epoch": 0.7044595616024187, "step": 467, "batch_size": 64, "mean": 1.7906594276428223, "std": 2.265979766845703, "min": -2.5134429931640625, "p10": -0.6145034790039062, "median": 1.816141128540039, "p90": 4.739654541015627, "max": 8.811870574951172, "pos_frac": 0.765625, "sample": [0.6292839050292969, 8.811870574951172, 2.2827606201171875, 2.7695770263671875, -0.17893218994140625, 0.5623760223388672, -0.5578041076660156, 0.3507499694824219, 3.0705108642578125, 2.4407997131347656, 2.575103759765625, -0.6165313720703125, 2.639087677001953, -0.4578285217285156, 2.5509796142578125, 5.256513595581055, -0.3798942565917969, 1.6704788208007812, 0.9595184326171875, 0.265716552734375, 1.9693450927734375, -1.3332290649414062, -0.23729896545410156, 4.99737548828125, 4.352596282958984, -0.33840179443359375, 4.316558837890625, 1.9142036437988281, -1.8125228881835938, 3.052764892578125, -0.7974643707275391, 3.5665359497070312, 0.27153968811035156, 1.75714111328125, -1.6468048095703125, 1.0042343139648438, 3.1587371826171875, 3.391387939453125, 2.27484130859375, 3.1649169921875, -0.5633506774902344, 0.0279388427734375, 0.9251861572265625, 1.5063858032226562, 1.2679615020751953, 1.529449462890625, -1.3363876342773438, -0.609771728515625, 1.94140625, 4.905536651611328, 7.76812744140625, 1.905181884765625, 2.5317001342773438, 0.2054290771484375, 1.8938522338867188, 2.1914138793945312, -2.5134429931640625, 5.886077880859375, 1.002939224243164, 6.9770965576171875, 1.624481201171875, 1.9951324462890625, 3.9939193725585938, 1.8751411437988281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000467.npy"} +{"epoch": 0.7059712773998488, "step": 468, "batch_size": 64, "mean": 2.040811061859131, "std": 2.2755420207977295, "min": -2.311494827270508, "p10": -0.5377960205078125, "median": 2.040994644165039, "p90": 5.073587799072266, "max": 9.980976104736328, "pos_frac": 0.796875, "sample": [2.2616233825683594, -0.5781517028808594, 2.169994354248047, 5.110687255859375, 2.170919418334961, -0.010951995849609375, 0.24811744689941406, 2.8499755859375, 1.98260498046875, 0.6766738891601562, 3.244150161743164, 5.537452697753906, 3.8587799072265625, -0.3424701690673828, 1.276153564453125, 3.112957000732422, -0.23991012573242188, -1.5876617431640625, 0.8883056640625, 2.397787094116211, 5.191551208496094, 1.1924591064453125, 3.3780059814453125, 5.1257171630859375, 0.20594406127929688, -0.07233238220214844, 4.792444229125977, 0.9024009704589844, -1.4281768798828125, -1.6987686157226562, 4.961650848388672, -0.546356201171875, -0.517822265625, -2.311494827270508, 2.109220504760742, -1.0064315795898438, 2.6874542236328125, 2.0370712280273438, 0.7951812744140625, 3.1365585327148438, 0.7526397705078125, 0.63751220703125, 2.848909378051758, 4.15277099609375, 2.731830596923828, 1.1191291809082031, 1.1449108123779297, 4.073951721191406, 1.7124347686767578, 1.2412166595458984, 4.448234558105469, 4.987022399902344, 2.0449180603027344, 2.68634033203125, 6.4810638427734375, 0.483001708984375, 0.23213958740234375, 9.980976104736328, 2.0453109741210938, 4.510412216186523, 5.1782684326171875, 2.8624496459960938, -0.1143035888671875, 0.409454345703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000468.npy"} +{"epoch": 0.7074829931972789, "step": 469, "batch_size": 64, "mean": 1.710775375366211, "std": 2.1486048698425293, "min": -2.4246826171875, "p10": -0.3828392028808593, "median": 1.252044677734375, "p90": 4.134150314331055, "max": 7.502590179443359, "pos_frac": 0.8125, "sample": [2.4457550048828125, 2.94024658203125, 3.387725830078125, 1.4271240234375, 1.1060409545898438, 1.0968246459960938, 0.1934814453125, 0.8096103668212891, 3.070314407348633, -0.8303985595703125, 2.2425537109375, 3.7202224731445312, 1.2129058837890625, -2.085235595703125, 0.714599609375, 3.1003170013427734, 0.6515274047851562, 0.8608875274658203, 3.0056114196777344, 4.168872833251953, 4.644832611083984, 1.3560256958007812, 0.6219863891601562, -0.20516204833984375, 3.342498779296875, -0.8194122314453125, 1.0648956298828125, 0.5168952941894531, -0.4132061004638672, 0.02486419677734375, -1.8222236633300781, 0.13465499877929688, 3.317596435546875, 7.502590179443359, 3.8311614990234375, 0.18896484375, -0.014039993286132812, 0.39331817626953125, 3.3874568939208984, 7.429386138916016, 4.053131103515625, 3.0040664672851562, 1.4846324920654297, 2.9014053344726562, 0.4113616943359375, 0.7528152465820312, 2.910125732421875, -0.19866180419921875, 7.017341613769531, 1.5772933959960938, 0.173675537109375, -0.19453048706054688, 5.797367095947266, 0.04716300964355469, 5.781097412109375, -0.8912887573242188, 2.4667701721191406, 1.4137763977050781, 2.3610877990722656, -2.4246826171875, 1.7089080810546875, -0.3119831085205078, 0.635498046875, 1.2911834716796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000469.npy"} +{"epoch": 0.708994708994709, "step": 470, "batch_size": 64, "mean": 1.5718051195144653, "std": 2.1021814346313477, "min": -5.3111724853515625, "p10": -0.8721372604370117, "median": 1.8411359786987305, "p90": 4.075896835327149, "max": 6.969276428222656, "pos_frac": 0.78125, "sample": [2.3457565307617188, 2.516876220703125, 6.969276428222656, -5.3111724853515625, -0.177581787109375, 4.93121337890625, 4.685707092285156, 0.9915924072265625, 3.9073448181152344, 3.9580078125, 1.9566688537597656, -0.8957767486572266, 2.803302764892578, 1.8453445434570312, 2.74725341796875, 0.7242965698242188, 4.106082916259766, 0.0793914794921875, 0.3829231262207031, 1.8369274139404297, 0.4069938659667969, 2.890594482421875, 1.2293033599853516, -1.5772247314453125, 2.6836395263671875, 0.8566627502441406, 6.273191452026367, 2.1573944091796875, 0.259368896484375, -0.8169784545898438, 2.4001827239990234, 0.5147666931152344, 4.214439392089844, 0.918609619140625, -0.35293006896972656, -0.33716583251953125, -0.2579193115234375, -0.05930900573730469, 2.15045166015625, -1.9583702087402344, 2.2082061767578125, 2.776622772216797, 3.5168609619140625, 0.3914375305175781, -1.4698944091796875, -1.1777725219726562, 2.1907730102539062, 2.4503860473632812, 2.276611328125, 3.5635757446289062, 0.6044120788574219, 2.7582855224609375, -1.3859786987304688, 1.3566627502441406, 0.7863616943359375, 2.1954269409179688, 0.46333885192871094, 4.846527099609375, 0.7654953002929688, 2.2933712005615234, 4.005462646484375, 0.11433982849121094, 3.3127174377441406, -0.2468414306640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000470.npy"} +{"epoch": 0.7105064247921391, "step": 471, "batch_size": 64, "mean": 1.796998381614685, "std": 2.2461397647857666, "min": -6.263740539550781, "p10": -0.7710716247558593, "median": 1.5982189178466797, "p90": 4.570439910888672, "max": 7.2318115234375, "pos_frac": 0.828125, "sample": [0.6170997619628906, 3.3141708374023438, 3.970539093017578, 2.498760223388672, 0.5359725952148438, -1.2736663818359375, 3.2616424560546875, 2.5273056030273438, 5.773338317871094, 3.3532028198242188, 2.942668914794922, 0.20067596435546875, 4.104438781738281, 1.2423782348632812, -0.9979209899902344, -0.4394397735595703, 1.4138126373291016, 5.453529357910156, 4.4620208740234375, -0.6939697265625, 0.6300773620605469, 3.7899818420410156, 2.2246780395507812, 1.806732177734375, 1.9850502014160156, -6.263740539550781, 3.5775890350341797, 1.3664703369140625, 5.4749298095703125, 0.5912551879882812, 2.4505615234375, 1.3228607177734375, 0.2772712707519531, 0.8372516632080078, 0.8982429504394531, 1.0138320922851562, 4.11029052734375, 1.0248870849609375, 1.0416030883789062, 5.480339050292969, 3.1715850830078125, -0.6867523193359375, 4.616905212402344, 4.6508331298828125, 0.1086883544921875, 1.0951690673828125, 3.639923095703125, 2.248363494873047, -1.7461967468261719, 3.2434005737304688, -1.81671142578125, 2.893695831298828, 0.18294525146484375, 1.0605850219726562, 1.8272857666015625, -0.4108543395996094, 2.042083740234375, 1.5986747741699219, 1.5977630615234375, 7.2318115234375, 1.303466796875, -0.8041152954101562, -1.4997444152832031, 3.5523681640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000471.npy"} +{"epoch": 0.7120181405895691, "step": 472, "batch_size": 64, "mean": 1.6558947563171387, "std": 2.309941053390503, "min": -3.421661376953125, "p10": -1.3246658325195313, "median": 1.5816917419433594, "p90": 4.96196746826172, "max": 7.375102996826172, "pos_frac": 0.78125, "sample": [2.1013717651367188, 2.6306495666503906, 1.3539581298828125, 2.2977142333984375, 3.299701690673828, 3.3701210021972656, 1.5827178955078125, 2.5001373291015625, 2.958740234375, 5.039947509765625, 1.8157310485839844, 6.2646636962890625, 1.3052520751953125, -1.2722320556640625, 1.1688175201416016, 5.38752555847168, 0.9717502593994141, 2.5444793701171875, 7.375102996826172, 0.9788818359375, -2.2065162658691406, 2.335479736328125, -1.4548416137695312, 3.5940093994140625, -1.347137451171875, 1.60882568359375, 0.1131134033203125, 5.109344482421875, 6.198894500732422, -1.0069694519042969, 1.4810943603515625, 1.5806655883789062, 1.1075305938720703, -3.4093246459960938, -0.5956497192382812, 4.7800140380859375, 2.2894515991210938, 2.250457763671875, 0.3136177062988281, 0.8835220336914062, 2.263795852661133, 5.513603210449219, -2.48626708984375, 3.0315208435058594, -3.421661376953125, -1.6742286682128906, 2.8825721740722656, 0.2905006408691406, -1.0677871704101562, 1.3481063842773438, 0.7425174713134766, 0.6936855316162109, 4.421012878417969, -0.191009521484375, 0.7146835327148438, -0.5437545776367188, 2.5955162048339844, 2.2234573364257812, 2.9685115814208984, -0.711181640625, 2.0539016723632812, 1.4832344055175781, 1.0228691101074219, 4.523052215576172], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000472.npy"} +{"epoch": 0.7135298563869993, "step": 473, "batch_size": 64, "mean": 1.826924443244934, "std": 2.637139320373535, "min": -3.3104400634765625, "p10": -1.4329355239868162, "median": 1.3038787841796875, "p90": 5.235687255859376, "max": 8.988487243652344, "pos_frac": 0.765625, "sample": [5.319000244140625, 3.9168853759765625, 0.6229839324951172, -1.6985950469970703, -1.1988162994384766, 7.0077056884765625, 1.0230560302734375, 3.1931915283203125, 0.7398147583007812, 3.8485031127929688, 4.471546173095703, 0.2023601531982422, -0.9165458679199219, 1.769073486328125, 0.055110931396484375, -2.460308074951172, 5.5463409423828125, 3.8670425415039062, 2.1589584350585938, 3.6330184936523438, -1.9421348571777344, -0.4491729736328125, 4.19512939453125, -1.481353759765625, -3.3104400634765625, 2.7096023559570312, 3.035968780517578, 5.041290283203125, 1.2944526672363281, 0.6591949462890625, 3.7216644287109375, 0.44710731506347656, -2.0010929107666016, -1.1534881591796875, -1.3199596405029297, 4.447622299194336, 8.30499267578125, 6.125030517578125, 2.125804901123047, 0.7138195037841797, 1.14166259765625, 1.1533622741699219, 4.5841064453125, 8.988487243652344, 0.3044853210449219, 1.3133049011230469, 0.45714378356933594, 1.0268783569335938, 1.6001052856445312, 2.0748348236083984, 2.2164039611816406, 0.8525543212890625, 3.7091102600097656, 1.4395751953125, -0.002796173095703125, 2.145893096923828, 4.80908203125, -0.24785614013671875, -0.25493621826171875, 1.04925537109375, 0.43902587890625, 5.606157302856445, 2.705352783203125, -2.452392578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000473.npy"} +{"epoch": 0.7150415721844293, "step": 474, "batch_size": 64, "mean": 1.487779140472412, "std": 2.25960111618042, "min": -2.93487548828125, "p10": -1.2340421676635742, "median": 1.0669307708740234, "p90": 4.52098274230957, "max": 6.480438232421875, "pos_frac": 0.71875, "sample": [5.280906677246094, 2.473529815673828, 2.1806411743164062, -1.5635108947753906, -0.98138427734375, -0.4408683776855469, 3.2523345947265625, 4.556385040283203, 2.4365615844726562, -0.3128700256347656, -0.6184310913085938, 3.09576416015625, -2.0043869018554688, 2.1632461547851562, 2.4593276977539062, 5.741703033447266, 0.8498306274414062, 0.24033546447753906, -0.7300262451171875, 2.69256591796875, -1.2433605194091797, -0.8438663482666016, 3.9166908264160156, 2.0847434997558594, 3.0893402099609375, 0.1729259490966797, 0.2632465362548828, -2.93487548828125, 0.8052101135253906, -0.01311492919921875, 6.327724456787109, 1.5670394897460938, -1.8720283508300781, 0.571807861328125, -1.2122993469238281, 4.022911071777344, 0.06669998168945312, 2.6335372924804688, 2.6717071533203125, -0.6639022827148438, 0.6246871948242188, 1.30908203125, 3.597015380859375, 2.320770263671875, 0.41791534423828125, 5.0679168701171875, 6.480438232421875, 0.9552764892578125, 3.7217979431152344, -0.1840991973876953, 4.074563980102539, 4.438377380371094, 0.710113525390625, 0.9984626770019531, 1.1353988647460938, -1.7299461364746094, 4.7631378173828125, -0.9360275268554688, 1.5237922668457031, 3.33599853515625, 3.7513160705566406, -2.4787368774414062, 0.3938751220703125, 0.7449455261230469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000474.npy"} +{"epoch": 0.7165532879818595, "step": 475, "batch_size": 64, "mean": 2.3050661087036133, "std": 2.287398338317871, "min": -1.8331146240234375, "p10": -0.5489334106445313, "median": 1.9433708190917969, "p90": 5.66020965576172, "max": 8.279594421386719, "pos_frac": 0.84375, "sample": [0.06072425842285156, 1.8970298767089844, 2.7300491333007812, 1.9229507446289062, 2.994709014892578, 1.3078460693359375, 2.7399253845214844, 4.592948913574219, 3.056560516357422, -1.8331146240234375, 0.7695388793945312, 0.28093719482421875, 3.71246337890625, 5.325347900390625, 2.077880859375, 3.0318603515625, 2.864187240600586, 1.4371452331542969, 1.1637077331542969, 2.9328460693359375, 1.6788558959960938, 1.431243896484375, 5.1618194580078125, 0.5481834411621094, 2.1393508911132812, 1.4129753112792969, -0.2501983642578125, 6.6372833251953125, 1.7418212890625, -0.5206069946289062, 4.862724304199219, 4.744667053222656, 5.7565460205078125, 8.279594421386719, 1.2700691223144531, -1.0642166137695312, 1.753122329711914, -0.5610733032226562, 1.0356369018554688, -0.6281299591064453, 6.285186767578125, 4.097686767578125, 0.7231616973876953, 0.3920440673828125, 2.3858184814453125, -0.8877754211425781, -1.1245765686035156, 2.0487709045410156, 7.02490234375, 0.6961746215820312, 5.835277557373047, 6.489959716796875, 3.697296142578125, 1.6983261108398438, 2.907421112060547, 0.5517349243164062, -1.7633285522460938, 3.747467041015625, 5.4354248046875, 2.7458858489990234, 1.4542350769042969, 1.9637908935546875, -0.4898223876953125, 3.113950729370117], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000475.npy"} +{"epoch": 0.7180650037792895, "step": 476, "batch_size": 64, "mean": 1.4578593969345093, "std": 2.4392237663269043, "min": -4.170286178588867, "p10": -1.2385658264160155, "median": 1.3776359558105469, "p90": 4.146881675720215, "max": 8.0914306640625, "pos_frac": 0.71875, "sample": [4.185218811035156, 3.571918487548828, -1.4248981475830078, 1.5631332397460938, -1.6418228149414062, 3.0946731567382812, -1.063232421875, 5.578601837158203, 8.0914306640625, 1.4870071411132812, 3.820220947265625, -0.06570243835449219, 6.035127639770508, 3.569150924682617, 3.2962303161621094, 0.29444122314453125, 0.2075347900390625, 0.67669677734375, 1.9709510803222656, 2.3377227783203125, -0.831268310546875, 0.20475006103515625, 2.6563587188720703, 2.1966514587402344, 3.7449874877929688, -4.170286178588867, -1.2041778564453125, 0.4545555114746094, -0.4878692626953125, 1.0588264465332031, 6.960903167724609, -3.9326019287109375, 1.0089035034179688, -0.3830432891845703, 2.007274627685547, -1.2533035278320312, -0.1202850341796875, -3.248504638671875, 3.1414108276367188, 0.3643035888671875, -0.5640106201171875, 3.2744140625, 3.1664886474609375, 1.288970947265625, 2.3969497680664062, 0.7983169555664062, 1.4663009643554688, 0.6356277465820312, 0.654693603515625, 4.3883514404296875, 2.0707626342773438, -2.839508056640625, 2.779876708984375, 1.189361572265625, 2.951679229736328, 4.309059143066406, -0.9780406951904297, 0.46178436279296875, 3.5409584045410156, 3.187774658203125, -0.4737052917480469, 4.057428359985352, 2.429210662841797, -0.6417350769042969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000476.npy"} +{"epoch": 0.7195767195767195, "step": 477, "batch_size": 64, "mean": 1.4951432943344116, "std": 2.413088083267212, "min": -3.807079315185547, "p10": -1.9019317626953125, "median": 1.2776317596435547, "p90": 4.433393096923829, "max": 7.141300201416016, "pos_frac": 0.71875, "sample": [-1.8331451416015625, -0.5289688110351562, 1.8427505493164062, 0.59783935546875, 4.3448944091796875, 1.2219200134277344, 0.6825485229492188, 0.17397499084472656, -1.321868896484375, -0.6758880615234375, 3.7391281127929688, 1.0390129089355469, -2.4718990325927734, 1.343027114868164, 2.6091079711914062, 1.0359134674072266, -1.3029632568359375, 4.471321105957031, 4.1297607421875, 0.28333473205566406, 0.3097686767578125, 3.2274112701416016, 1.8662185668945312, 1.333343505859375, 3.664888381958008, -0.3059883117675781, -1.9586257934570312, 2.9028358459472656, 2.2547683715820312, 0.9196910858154297, -2.7332611083984375, -0.0767669677734375, 1.8034954071044922, -0.29471588134765625, -3.807079315185547, 0.17228317260742188, -1.9314117431640625, 1.144287109375, 1.7457122802734375, 1.0351009368896484, 7.141300201416016, -0.03504180908203125, 3.6032867431640625, 5.530601501464844, 2.3920555114746094, 4.588447570800781, -1.1935577392578125, 4.947601318359375, 2.9318923950195312, -1.93707275390625, 4.2723388671875, 0.5880889892578125, 5.90087890625, 3.6966590881347656, 4.5889739990234375, 4.324188232421875, -2.652923583984375, 2.98883056640625, 3.626190185546875, 4.129390716552734, 0.2737274169921875, 3.239917755126953, -0.9833526611328125, 3.074991226196289], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000477.npy"} +{"epoch": 0.7210884353741497, "step": 478, "batch_size": 64, "mean": 1.4085867404937744, "std": 2.122764825820923, "min": -2.448028564453125, "p10": -1.1111791610717772, "median": 1.367691993713379, "p90": 4.641084098815918, "max": 6.065685272216797, "pos_frac": 0.65625, "sample": [-0.07600212097167969, 5.149253845214844, -0.5797882080078125, 3.7377185821533203, 1.6725444793701172, -0.5904998779296875, 4.585914611816406, 4.130378723144531, -1.0338249206542969, 2.7190475463867188, -1.0160770416259766, 0.43843841552734375, 1.5784149169921875, 5.208320617675781, -0.6699981689453125, 1.7603950500488281, 0.6250762939453125, 0.5543365478515625, -0.3756866455078125, 2.0039520263671875, -0.7852096557617188, -0.5606212615966797, 1.781829833984375, 1.734222412109375, 3.8034133911132812, -2.448028564453125, 1.8557415008544922, 2.478302001953125, 3.6508255004882812, -1.5978317260742188, 3.3939590454101562, -0.1680622100830078, -1.5494918823242188, -1.9824676513671875, -0.6206245422363281, 4.7736968994140625, 3.477130889892578, -0.428497314453125, 3.2363967895507812, 1.8647842407226562, 4.664728164672852, -0.0698394775390625, -1.3962249755859375, 1.2951221466064453, 5.251766204833984, 1.6493377685546875, 0.3259429931640625, 3.1092376708984375, 1.4402618408203125, -0.5470619201660156, 1.8806953430175781, 3.3007640838623047, -1.2208747863769531, 1.675750732421875, -0.324676513671875, 5.700599670410156, 6.065685272216797, 1.1193466186523438, 1.2753257751464844, 1.5996017456054688, 0.8088188171386719, -1.1443309783935547, 0.9410171508789062, 1.0171794891357422], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000478.npy"} +{"epoch": 0.7226001511715797, "step": 479, "batch_size": 64, "mean": 1.5997782945632935, "std": 2.6954545974731445, "min": -5.336582183837891, "p10": -1.5123588562011718, "median": 1.3551959991455078, "p90": 4.882189178466798, "max": 8.938098907470703, "pos_frac": 0.71875, "sample": [2.934032440185547, 8.938098907470703, -1.9978790283203125, -1.5941123962402344, 0.2902641296386719, 1.9620113372802734, 7.531078338623047, 1.2925682067871094, 4.489448547363281, -0.0614471435546875, 2.0237865447998047, -1.39990234375, 0.23102569580078125, -0.6692447662353516, 0.30816650390625, 3.2981109619140625, 4.3330230712890625, 3.0211181640625, 0.2529563903808594, 0.5832958221435547, -0.1483917236328125, -0.2758903503417969, 5.050506591796875, 2.9639244079589844, 0.9421463012695312, -1.7917900085449219, 1.4923629760742188, 0.135589599609375, -1.1834487915039062, 1.4178237915039062, 3.342437744140625, 2.2257423400878906, -0.045867919921875, 7.146942138671875, 7.331207275390625, 0.29358863830566406, -2.818714141845703, -0.5460968017578125, 1.4238948822021484, 2.7698974609375, 8.674591064453125, -5.336582183837891, -1.5605545043945312, 1.2584362030029297, 3.583456039428711, 5.659566879272461, 2.386005401611328, -0.889190673828125, 0.6235923767089844, 0.7017135620117188, 1.6810760498046875, 2.8400650024414062, -0.28493309020996094, 3.886688232421875, 0.9516754150390625, 2.266956329345703, 2.836759567260742, 1.4942989349365234, 0.7244167327880859, 2.2627334594726562, -0.05866813659667969, -1.5807037353515625, 1.9451446533203125, 2.8270034790039062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000479.npy"} +{"epoch": 0.7241118669690099, "step": 480, "batch_size": 64, "mean": 1.559873104095459, "std": 2.458871603012085, "min": -6.3629608154296875, "p10": -0.985198974609375, "median": 1.5345096588134766, "p90": 4.708281707763675, "max": 6.148231506347656, "pos_frac": 0.796875, "sample": [2.40863037109375, -1.875762939453125, 2.33074951171875, -1.2862777709960938, 2.111146926879883, 3.0434093475341797, 1.8289470672607422, -5.564022064208984, 1.1818923950195312, 5.889892578125, -0.28949737548828125, 1.4574813842773438, 3.446765899658203, 1.1647491455078125, 1.06256103515625, -0.93927001953125, 0.817962646484375, 1.1108474731445312, -0.683197021484375, 0.5104179382324219, -2.051624298095703, 3.9555435180664062, 1.604522705078125, -0.9180831909179688, 3.972564697265625, 5.006467819213867, 5.30792236328125, 3.7111892700195312, 6.148231506347656, 2.7530441284179688, 3.530834197998047, 0.32708168029785156, 1.4493331909179688, 1.4644966125488281, 1.87164306640625, 1.7799549102783203, 3.7069931030273438, 1.1873321533203125, -6.3629608154296875, 1.0371475219726562, 5.979103088378906, 3.2922286987304688, 2.1283397674560547, 0.18751907348632812, 2.008697509765625, 4.012514114379883, 1.4566459655761719, -0.7779617309570312, 2.3599166870117188, 1.2658958435058594, 1.6624202728271484, -3.4736175537109375, 5.5467376708984375, 2.827606201171875, 1.2728195190429688, 3.0850276947021484, 2.928682327270508, 1.824544906616211, 0.46649932861328125, -0.19738197326660156, 0.1417083740234375, 5.595344543457031, 0.03441619873046875, -1.0048828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000480.npy"} +{"epoch": 0.7256235827664399, "step": 481, "batch_size": 64, "mean": 1.7691867351531982, "std": 2.5597195625305176, "min": -2.876100540161133, "p10": -0.9831123352050781, "median": 1.3527660369873047, "p90": 4.459379196166992, "max": 12.723457336425781, "pos_frac": 0.796875, "sample": [1.7294673919677734, 0.47139739990234375, 7.908840179443359, 0.9696731567382812, -1.8865413665771484, 2.431428909301758, 4.430072784423828, 1.8457565307617188, -1.0041160583496094, -0.7167892456054688, 4.2042236328125, 1.3384475708007812, 0.6584396362304688, 3.36236572265625, 0.8209152221679688, 5.456939697265625, 2.3158645629882812, 1.3405342102050781, 1.8056869506835938, 2.0066909790039062, -2.4293365478515625, -0.17412948608398438, 4.4719390869140625, 1.1519603729248047, 0.4225196838378906, -0.2939643859863281, 1.75164794921875, 2.8653621673583984, 0.8724403381347656, 1.8070755004882812, -0.3180427551269531, 3.3668689727783203, 0.5621414184570312, 1.368316650390625, 0.8968505859375, 1.2079620361328125, 3.2238540649414062, 1.425445556640625, 0.7553882598876953, 1.20330810546875, -0.985870361328125, 1.3649978637695312, 2.335783004760742, 6.500358581542969, 0.4168243408203125, 5.724514007568359, 1.2831573486328125, 1.4031219482421875, 12.723457336425781, 2.3940067291259766, 3.392364501953125, 1.0294189453125, 0.056133270263671875, -0.9766769409179688, -1.341848373413086, 2.8797607421875, -2.876100540161133, 2.6367263793945312, 3.4958248138427734, 6.69036865234375, 3.3720626831054688, 1.176870346069336, -2.434783935546875, -0.6594276428222656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000481.npy"} +{"epoch": 0.72713529856387, "step": 482, "batch_size": 64, "mean": 1.3626246452331543, "std": 2.293668270111084, "min": -4.366905212402344, "p10": -1.8167884826660154, "median": 1.51226806640625, "p90": 4.009041595458985, "max": 8.1248779296875, "pos_frac": 0.75, "sample": [-1.3773040771484375, -0.00213623046875, 0.488739013671875, 2.3240089416503906, 0.912506103515625, -0.7898902893066406, 2.9561233520507812, -2.7666015625, -0.10666275024414062, 4.704864501953125, 1.1685504913330078, 3.3455047607421875, 1.847076416015625, 1.2670745849609375, 1.5054588317871094, 1.7214889526367188, 1.6458740234375, 2.0874786376953125, -2.276599884033203, 2.5637664794921875, 5.6951446533203125, 1.978118896484375, 8.1248779296875, -0.31353759765625, -0.7468605041503906, 2.3461666107177734, 4.5156402587890625, 2.270040512084961, -0.5291633605957031, 0.7338542938232422, -1.7030715942382812, 1.505401611328125, -2.2297420501708984, 1.1072063446044922, 3.8065643310546875, 2.1685638427734375, 4.095817565917969, 0.14335250854492188, 1.6822662353515625, -2.1741180419921875, 5.827606201171875, 0.903106689453125, 1.8457260131835938, 0.6007843017578125, 1.5190773010253906, 2.304044723510742, 3.2744064331054688, 2.3918380737304688, 6.49871826171875, -2.3790435791015625, 2.2956085205078125, 1.8588485717773438, 2.840190887451172, 1.3562469482421875, 2.7824649810791016, 0.6040401458740234, -4.366905212402344, 3.2046051025390625, -1.8655242919921875, -1.365264892578125, 1.963653564453125, 0.4835929870605469, 0.72296142578125, 0.21135330200195312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000482.npy"} +{"epoch": 0.7286470143613001, "step": 483, "batch_size": 64, "mean": 1.850631594657898, "std": 2.539609670639038, "min": -4.108070373535156, "p10": -0.8473583221435547, "median": 1.701822280883789, "p90": 5.057091140747072, "max": 8.856369018554688, "pos_frac": 0.671875, "sample": [-0.9343910217285156, 5.6523284912109375, -0.5684947967529297, 0.5192680358886719, 3.3617210388183594, 7.611328125, 2.8911476135253906, 0.2885894775390625, -0.521240234375, -1.1388015747070312, 1.6678466796875, 2.5689849853515625, 4.741973876953125, 5.552925109863281, -0.8122291564941406, 4.656465530395508, 0.790435791015625, -1.1414260864257812, -0.4387359619140625, 3.7407608032226562, 5.1490478515625, 4.842525482177734, 4.582099914550781, -0.1277618408203125, 3.2213211059570312, 1.7155799865722656, 3.5987167358398438, -0.8551254272460938, 1.2200393676757812, -0.5836029052734375, 1.5679550170898438, -1.9674606323242188, 7.0738525390625, 0.482391357421875, 4.389373779296875, 4.05389404296875, 1.6880645751953125, -4.108070373535156, 2.3738574981689453, -0.7706069946289062, 0.44371795654296875, 1.889303207397461, 1.7473678588867188, -0.4256706237792969, -1.0462265014648438, 2.364513397216797, 8.856369018554688, 4.0396881103515625, 2.3890533447265625, 3.4664363861083984, 1.3557891845703125, 3.6999969482421875, 1.2053985595703125, 5.33660888671875, -0.8166446685791016, -0.21149253845214844, -0.30914306640625, 2.200777053833008, 2.4182510375976562, 2.289276123046875, -0.284332275390625, -0.8292350769042969, -0.62396240234375, 3.2500343322753906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000483.npy"} +{"epoch": 0.7301587301587301, "step": 484, "batch_size": 64, "mean": 1.2873001098632812, "std": 1.9544739723205566, "min": -3.6701889038085938, "p10": -0.753826904296875, "median": 1.194478988647461, "p90": 3.4138656616210947, "max": 7.591503143310547, "pos_frac": 0.765625, "sample": [0.315185546875, 1.8881378173828125, -3.6701889038085938, 1.6933670043945312, 7.591503143310547, 1.0087642669677734, 0.50836181640625, -1.856292724609375, 4.48431396484375, -1.0580368041992188, 1.209442138671875, 0.4832763671875, 2.314695358276367, 1.2632255554199219, 1.4496173858642578, 3.1370201110839844, -0.173004150390625, -0.7541427612304688, 1.0675506591796875, 2.9116973876953125, -0.21109771728515625, 1.5554351806640625, 3.5221176147460938, 1.939748764038086, 1.2930984497070312, 2.2381420135498047, 1.5540618896484375, 0.15595245361328125, 1.5224952697753906, 0.08876228332519531, 0.25728797912597656, 3.1612777709960938, -0.120635986328125, -0.8958511352539062, 4.472541809082031, -0.605133056640625, 1.8652019500732422, 1.2077140808105469, 1.9863624572753906, 0.11751556396484375, 1.0751991271972656, -0.7140731811523438, 5.643955230712891, -0.347686767578125, 3.6148529052734375, 1.0791759490966797, 0.952911376953125, 0.69403076171875, 1.7471084594726562, 1.181243896484375, 6.568096160888672, 1.6951580047607422, 2.868988037109375, -1.13580322265625, -0.07361602783203125, -0.7530899047851562, 2.8591957092285156, 0.65087890625, 0.7963466644287109, 1.714080810546875, 0.99017333984375, 2.8047332763671875, 2.5153427124023438, -2.9594802856445312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000484.npy"} +{"epoch": 0.7316704459561603, "step": 485, "batch_size": 64, "mean": 1.9372669458389282, "std": 2.373443603515625, "min": -2.9358291625976562, "p10": -0.5460830688476562, "median": 1.5299415588378906, "p90": 5.3059432983398445, "max": 8.572967529296875, "pos_frac": 0.78125, "sample": [2.1044845581054688, 1.1160411834716797, 3.4301795959472656, 3.3381195068359375, 1.5377655029296875, 1.7997360229492188, 1.8072738647460938, 0.5623226165771484, 4.533233642578125, -0.4707469940185547, -2.9358291625976562, 1.1478652954101562, 6.660371780395508, 2.7586593627929688, 0.18260955810546875, 4.3696136474609375, 1.9593391418457031, 5.0831451416015625, 7.1636810302734375, 1.4125442504882812, 1.0494022369384766, 2.306427001953125, 0.2313232421875, 8.572967529296875, -0.2375774383544922, 0.13568878173828125, -1.3761978149414062, 1.374176025390625, 1.1316757202148438, 2.1227340698242188, -0.6928176879882812, -0.08204269409179688, 3.026031494140625, -0.3587760925292969, 2.9800338745117188, -1.10601806640625, 4.9007568359375, 5.40142822265625, 1.3035736083984375, 2.8604736328125, 2.2246780395507812, -2.5322914123535156, 1.5221176147460938, 1.5947799682617188, 1.1449432373046875, -0.4033622741699219, -0.47705078125, 4.5670623779296875, 7.05842399597168, 1.2764434814453125, 0.12958145141601562, 1.9072761535644531, 2.2653656005859375, 5.907258987426758, 2.712635040283203, 4.574085235595703, 1.0598983764648438, 5.669532775878906, -0.5756683349609375, -0.13657569885253906, -0.97393798828125, 0.3945198059082031, 1.3594284057617188, 2.6122665405273438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000485.npy"} +{"epoch": 0.7331821617535903, "step": 486, "batch_size": 64, "mean": 1.6815694570541382, "std": 2.7620608806610107, "min": -4.56690788269043, "p10": -1.0568981170654297, "median": 1.2253608703613281, "p90": 5.538944816589355, "max": 11.560089111328125, "pos_frac": 0.765625, "sample": [1.0181503295898438, 1.4922904968261719, 3.29449462890625, 2.038053512573242, -0.192291259765625, 0.09144973754882812, 2.2230148315429688, 11.560089111328125, 1.9108505249023438, -1.1041107177734375, -0.9467353820800781, 3.245025634765625, 0.8147811889648438, 0.8243694305419922, 3.569293975830078, 1.4325714111328125, 2.6755599975585938, 3.9459056854248047, -0.8105831146240234, 2.8560333251953125, 1.7910881042480469, 0.7048683166503906, -2.0130081176757812, 1.48919677734375, 6.232295989990234, -2.620819091796875, 0.809722900390625, -1.8920745849609375, 2.9786128997802734, -2.4521408081054688, 5.5340728759765625, 0.4121284484863281, 0.5491104125976562, -4.56690788269043, 2.5511951446533203, -0.5792922973632812, 0.8791847229003906, 4.021308898925781, -0.9372692108154297, 0.9254112243652344, 0.49947357177734375, 5.285591125488281, 2.6519241333007812, 6.906768798828125, 5.541032791137695, 0.12869644165039062, 1.939910888671875, 2.779012680053711, 3.13153076171875, 6.67152214050293, 0.3559417724609375, 1.570394515991211, 2.0753326416015625, 6.54888916015625, -0.45944786071777344, 0.9217987060546875, 0.5172939300537109, 0.7146968841552734, 7.07403564453125, -0.5719833374023438, 2.2811660766601562, -2.945127487182617, 0.7259254455566406, -0.4788360595703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000486.npy"} +{"epoch": 0.7346938775510204, "step": 487, "batch_size": 64, "mean": 1.6285200119018555, "std": 2.1511025428771973, "min": -2.542285919189453, "p10": -0.661643600463867, "median": 1.444016456604004, "p90": 4.1244132995605485, "max": 8.937103271484375, "pos_frac": 0.8125, "sample": [4.330230712890625, -1.335784912109375, 0.225860595703125, 1.58148193359375, 3.182689666748047, 0.7875156402587891, -0.5647735595703125, 1.9875030517578125, 7.0255584716796875, 2.4204864501953125, 2.32012939453125, -0.49884796142578125, 0.28351593017578125, -0.74853515625, -0.2549285888671875, 6.574436187744141, 3.354217529296875, 2.442890167236328, 3.25701904296875, 3.7978134155273438, -2.2028732299804688, 2.091400146484375, 0.1846485137939453, 3.489198684692383, 1.5766925811767578, 1.1118221282958984, 0.3726959228515625, -0.39154052734375, 0.0160980224609375, 0.1816253662109375, 6.4581756591796875, 2.5847320556640625, 2.5689620971679688, -1.166769027709961, 4.264244079589844, -0.05615234375, 2.0001564025878906, 0.423828125, 0.4708538055419922, 1.2340126037597656, 3.014129638671875, 0.22179412841796875, 0.0800933837890625, 1.8416862487792969, 0.09906387329101562, 1.3238716125488281, 1.9555931091308594, 0.8076972961425781, -0.7031593322753906, -2.542285919189453, 0.7043094635009766, 2.2064666748046875, 1.795440673828125, 1.0581741333007812, 0.9383697509765625, 3.7981414794921875, 0.7381553649902344, 2.375171661376953, -1.0788726806640625, 3.496307373046875, 4.3020782470703125, 1.9115047454833984, 8.937103271484375, 1.5641613006591797], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000487.npy"} +{"epoch": 0.7362055933484505, "step": 488, "batch_size": 64, "mean": 1.956758737564087, "std": 2.32698917388916, "min": -1.983642578125, "p10": -0.6372182846069334, "median": 1.5506877899169922, "p90": 4.793375778198242, "max": 11.028205871582031, "pos_frac": 0.8125, "sample": [0.8286323547363281, 0.7308273315429688, -0.7218055725097656, -1.0974750518798828, 1.3959884643554688, -0.18516921997070312, -1.2975540161132812, 2.589466094970703, 4.781036376953125, 5.321681976318359, 0.9092426300048828, 1.745941162109375, 4.879150390625, 1.209136962890625, 0.9830169677734375, 11.028205871582031, 2.1838302612304688, 4.5135650634765625, -0.17238807678222656, 0.0549774169921875, 1.6363906860351562, 2.6885604858398438, 0.6405715942382812, 0.6355438232421875, 1.6666183471679688, 3.0581436157226562, -1.757080078125, 4.798664093017578, 0.157989501953125, 3.084156036376953, 0.1323089599609375, -0.233795166015625, -0.4398479461669922, 6.024482727050781, 2.101470947265625, 1.0958442687988281, 3.356546401977539, 0.6509132385253906, 4.762115478515625, 3.5203704833984375, 3.108020782470703, 1.6972198486328125, 3.9248123168945312, 3.0373687744140625, 4.386985778808594, 2.2818450927734375, -0.08617401123046875, 0.9055557250976562, -1.4170074462890625, 0.9637603759765625, 1.489776611328125, 1.6115989685058594, -1.5335102081298828, 1.3950881958007812, 0.511505126953125, 3.3977813720703125, 3.41455078125, 5.637332916259766, 3.702291488647461, 3.781482696533203, 1.2942657470703125, 0.17713165283203125, -1.983642578125, 6.2742462158203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000488.npy"} +{"epoch": 0.7377173091458806, "step": 489, "batch_size": 64, "mean": 1.0620079040527344, "std": 2.2172601222991943, "min": -3.32427978515625, "p10": -1.5631662368774415, "median": 0.7447443008422852, "p90": 4.076732063293459, "max": 5.7210235595703125, "pos_frac": 0.71875, "sample": [-3.32427978515625, 1.5700645446777344, 2.5227622985839844, 2.424163818359375, 4.268501281738281, 3.1078224182128906, 0.232696533203125, 2.5505905151367188, 3.629270553588867, 4.87078857421875, 2.541584014892578, 1.0818634033203125, 4.609161376953125, -2.468097686767578, -0.5836257934570312, 2.7069778442382812, -2.1510848999023438, 0.4277458190917969, 3.6243896484375, 1.368032455444336, 5.7210235595703125, 2.8881759643554688, 0.42584991455078125, -1.3803482055664062, 2.7063446044921875, 3.167896270751953, -1.3380050659179688, 0.5810089111328125, 5.695526123046875, 0.2834053039550781, -1.548614501953125, -1.3699111938476562, 4.9420928955078125, 1.3025436401367188, 0.4338226318359375, -0.7484912872314453, 0.17352294921875, 0.2544231414794922, 3.0352401733398438, 0.8907699584960938, 0.7742290496826172, 0.390869140625, -0.4912261962890625, 2.1788787841796875, 1.6045455932617188, -2.356170654296875, 1.7451515197753906, 0.094879150390625, 1.3152694702148438, 3.2422637939453125, -1.260955810546875, 0.8602066040039062, 0.7152595520019531, 0.030237197875976562, -1.5694026947021484, -2.7714691162109375, -3.040679931640625, 0.06060791015625, -0.9688873291015625, -0.462188720703125, -0.16919517517089844, 5.487144470214844, 0.6480445861816406, 2.785491943359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000489.npy"} +{"epoch": 0.7392290249433107, "step": 490, "batch_size": 64, "mean": 1.4126070737838745, "std": 2.710508108139038, "min": -3.3966598510742188, "p10": -1.8080142974853515, "median": 1.333608627319336, "p90": 5.512855911254883, "max": 7.6497955322265625, "pos_frac": 0.671875, "sample": [0.697662353515625, -1.995330810546875, -3.3966598510742188, 6.114776611328125, -0.7732505798339844, 1.810537338256836, -2.5696868896484375, 3.744384765625, 7.143409729003906, 5.626861572265625, -2.672882080078125, 3.730632781982422, -3.2637100219726562, 1.7016544342041016, 3.7465763092041016, 3.0966796875, 5.109230041503906, 0.9922103881835938, 5.5422821044921875, -0.3526153564453125, 0.7799606323242188, 1.710479736328125, 1.6735248565673828, 3.0601768493652344, -1.481201171875, 2.25213623046875, 6.937978744506836, 0.5154972076416016, -0.008556365966796875, 1.23284912109375, -2.48388671875, 1.63604736328125, 1.2978286743164062, 2.6178855895996094, -1.1940536499023438, -1.6693115234375, 1.2453155517578125, 4.914556503295898, -1.5394287109375, -0.5870590209960938, 1.8111915588378906, 0.12804031372070312, 0.8767662048339844, 4.547746658325195, -1.4714488983154297, 1.3693885803222656, -1.475006103515625, -0.6929435729980469, -1.429107666015625, 0.8386688232421875, -1.370819091796875, 1.5218982696533203, -0.035137176513671875, 2.256031036376953, 7.6497955322265625, 1.5420722961425781, 3.4294261932373047, 1.6503219604492188, 2.1166152954101562, 5.444194793701172, 0.6276397705078125, -1.8674583435058594, 6.1695709228515625, 1.8259048461914062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000490.npy"} +{"epoch": 0.7407407407407407, "step": 491, "batch_size": 64, "mean": 1.4500904083251953, "std": 1.9888302087783813, "min": -3.4908218383789062, "p10": -1.1534164428710936, "median": 1.475778579711914, "p90": 3.6317611694335947, "max": 6.305330276489258, "pos_frac": 0.75, "sample": [6.305330276489258, -0.1487884521484375, 4.99488639831543, 2.869293212890625, 2.0041122436523438, 1.3327484130859375, 0.05623435974121094, 3.1896209716796875, 2.5017967224121094, 0.00334930419921875, 0.3184967041015625, -1.7331695556640625, 2.020702362060547, -0.41370582580566406, 4.6189117431640625, -0.4466705322265625, 0.6402626037597656, 3.229583740234375, 1.4267845153808594, 1.6999359130859375, 0.71234130859375, 3.005767822265625, 2.3447399139404297, 1.9473152160644531, 0.6150226593017578, 4.780670166015625, 1.5247726440429688, 0.5654830932617188, -0.12672042846679688, 0.7807083129882812, 4.157798767089844, 2.78009033203125, -1.2803497314453125, 0.9576034545898438, 5.681427001953125, 1.1056938171386719, -0.7811203002929688, 2.4674224853515625, -1.449615478515625, -0.9483489990234375, 3.1777572631835938, -0.46561431884765625, -0.011213302612304688, 1.1893692016601562, 2.0967025756835938, 3.34344482421875, -1.241302490234375, -3.4908218383789062, 3.7553253173828125, 2.0611801147460938, 2.7801284790039062, 2.550647735595703, 3.0560989379882812, 3.2294578552246094, 1.651906967163086, 0.401397705078125, 2.24945068359375, 0.35596466064453125, 3.177215576171875, -2.657989501953125, -1.3865070343017578, 1.3927974700927734, -0.6882247924804688, 2.96820068359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000491.npy"} +{"epoch": 0.7422524565381708, "step": 492, "batch_size": 64, "mean": 1.3122851848602295, "std": 2.4447882175445557, "min": -4.339363098144531, "p10": -1.986971855163574, "median": 1.153275489807129, "p90": 4.436105346679689, "max": 6.942771911621094, "pos_frac": 0.703125, "sample": [3.8828582763671875, 1.0185546875, -0.5213680267333984, 0.3783302307128906, 2.8046035766601562, -2.0763416290283203, 2.0835342407226562, 1.179494857788086, 2.1884117126464844, -0.24524497985839844, 2.4761505126953125, 2.0288009643554688, 2.0728073120117188, 0.8665008544921875, 2.4644317626953125, 0.802276611328125, -4.339363098144531, 0.8241500854492188, 0.7439918518066406, 5.3430328369140625, 1.8523445129394531, -0.5946044921875, 0.918365478515625, -0.5128173828125, 1.5685386657714844, 0.7579002380371094, 0.9883499145507812, 6.93511962890625, 0.2952117919921875, -2.632526397705078, 3.5796432495117188, 4.810138702392578, 0.8524017333984375, 1.4637908935546875, -0.3035755157470703, -0.02155303955078125, -0.50494384765625, 5.083793640136719, -1.7784423828125, -0.4993896484375, 3.491943359375, 3.7301387786865234, 4.123008728027344, -1.1366043090820312, -1.4327926635742188, 1.7757797241210938, 6.942771911621094, 1.1270561218261719, 1.8482742309570312, 2.4337692260742188, -4.095634460449219, -2.5962677001953125, 0.8056793212890625, 3.8813095092773438, 2.5352554321289062, 2.376964569091797, 5.044429779052734, 3.278472900390625, 3.9173927307128906, -0.3456878662109375, 1.2294883728027344, -2.6968612670898438, -3.0552825927734375, 4.570289611816406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000492.npy"} +{"epoch": 0.7437641723356009, "step": 493, "batch_size": 64, "mean": 1.887427568435669, "std": 2.3054757118225098, "min": -5.073053359985352, "p10": -0.7527488708496091, "median": 1.7976465225219727, "p90": 5.162876892089845, "max": 7.032928466796875, "pos_frac": 0.859375, "sample": [1.5117912292480469, 0.4636039733886719, -1.6341094970703125, 2.5131053924560547, 2.9636154174804688, -0.01737213134765625, -3.5131301879882812, 1.6008262634277344, 0.6939735412597656, 2.3513622283935547, 1.3618316650390625, 4.824615478515625, 5.3078460693359375, 4.140106201171875, 0.19055557250976562, -0.87615966796875, 7.032928466796875, 0.5138320922851562, 5.591892242431641, 3.0188674926757812, 0.5375633239746094, 2.1966476440429688, -0.46479034423828125, 1.7304916381835938, 1.2971305847167969, -1.0498275756835938, 2.8363876342773438, 1.0595283508300781, 6.2721405029296875, 1.8648014068603516, 1.4213714599609375, 2.8106002807617188, -1.1499176025390625, 3.092601776123047, 0.20096588134765625, 1.4113388061523438, 6.850799560546875, 0.750396728515625, 3.3644180297851562, 0.18761253356933594, 3.7399368286132812, 2.8677520751953125, 0.4471893310546875, 0.032318115234375, 1.4482994079589844, 6.8991851806640625, 2.325347900390625, 3.0436172485351562, 2.5113067626953125, 0.116668701171875, 0.303466796875, 3.2804603576660156, -0.9746322631835938, 0.022006988525390625, 5.604961395263672, 2.139312744140625, 3.14752197265625, 2.5798416137695312, 3.2613525390625, 2.6176223754882812, 3.1188507080078125, 1.0074615478515625, 3.0683212280273438, -5.073053359985352], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000493.npy"} +{"epoch": 0.745275888133031, "step": 494, "batch_size": 64, "mean": 1.3203232288360596, "std": 2.476392984390259, "min": -4.331809997558594, "p10": -1.338595962524414, "median": 1.3305816650390625, "p90": 4.01288070678711, "max": 9.339731216430664, "pos_frac": 0.75, "sample": [4.024482727050781, 0.5369491577148438, 0.37972259521484375, 2.4414520263671875, 1.9024276733398438, 0.33133697509765625, 0.3546733856201172, 4.653738021850586, 1.9982185363769531, 0.06339263916015625, -0.05869865417480469, 0.077667236328125, 1.4280624389648438, 2.2979736328125, 0.9313888549804688, 1.4242401123046875, 1.887908935546875, 0.11432647705078125, 2.7223548889160156, -1.18206787109375, 1.3559417724609375, 4.2239532470703125, 2.413074493408203, 1.3398056030273438, 2.1807327270507812, 9.339731216430664, 2.2230796813964844, 0.45812225341796875, 1.51434326171875, 3.267057418823242, 2.459432601928711, -4.0507659912109375, -4.331809997558594, 4.514963150024414, -0.6131172180175781, -1.2670135498046875, -3.0568466186523438, 3.042034149169922, 0.9382171630859375, 1.1887435913085938, 0.9074783325195312, -0.06806755065917969, -3.7228622436523438, -0.8669166564941406, 3.0689964294433594, 0.3355064392089844, 6.4957122802734375, 1.3213577270507812, 1.317840576171875, 1.91693115234375, 3.9588623046875, 2.5557861328125, 1.4395294189453125, 3.985809326171875, -0.713104248046875, -1.7777099609375, -1.48541259765625, -1.3692741394042969, 3.1720428466796875, -1.1484375, 3.735992431640625, 0.6244277954101562, -0.3578453063964844, 7.704811096191406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000494.npy"} +{"epoch": 0.7467876039304611, "step": 495, "batch_size": 64, "mean": 1.2908086776733398, "std": 2.0593531131744385, "min": -3.9018688201904297, "p10": -0.8495754241943358, "median": 1.0099716186523438, "p90": 4.051216506958009, "max": 7.208856582641602, "pos_frac": 0.734375, "sample": [-0.45781707763671875, -0.51739501953125, 0.5791778564453125, -3.9018688201904297, 4.377529144287109, -1.1117210388183594, -0.6038188934326172, 1.7584762573242188, 0.7628364562988281, 1.734283447265625, -0.8681297302246094, -2.1501235961914062, 0.370025634765625, 0.8204116821289062, 1.0133438110351562, 6.1452789306640625, -0.44693756103515625, 3.666961669921875, 1.4959335327148438, 0.47396278381347656, 1.6157341003417969, 1.4122753143310547, 2.8452606201171875, 0.06935310363769531, 2.6021270751953125, 3.2880992889404297, -0.6393966674804688, 1.8041458129882812, 0.07442855834960938, 0.5787849426269531, 5.560791015625, 1.899923324584961, 1.0065994262695312, -0.11639595031738281, 2.923257827758789, 0.07550048828125, 0.8531074523925781, 1.2186050415039062, 4.762603759765625, 3.4246673583984375, -0.10457992553710938, 0.7691764831542969, 4.186122894287109, 2.301910400390625, 4.817256927490234, 1.3950347900390625, 0.19600677490234375, -0.34313201904296875, 0.04010009765625, 7.208856582641602, -1.008646011352539, 3.6047611236572266, 1.3931159973144531, -0.8062820434570312, -1.4180068969726562, 3.198974609375, 2.909149169921875, 0.4454193115234375, 1.1179580688476562, -1.38983154296875, 1.2218856811523438, 1.1887741088867188, 3.7364349365234375, -0.4485893249511719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000495.npy"} +{"epoch": 0.7482993197278912, "step": 496, "batch_size": 64, "mean": 1.8385127782821655, "std": 2.9264116287231445, "min": -3.284088134765625, "p10": -1.1638593673706055, "median": 1.4198226928710938, "p90": 5.796162414550781, "max": 10.763069152832031, "pos_frac": 0.75, "sample": [1.79937744140625, 1.962249755859375, 9.607177734375, 3.4067611694335938, 0.6859054565429688, 2.0287628173828125, 1.7833099365234375, 2.8070068359375, 6.232877731323242, 6.072484970092773, -0.774139404296875, -0.26300048828125, 3.8111400604248047, 0.39778900146484375, 0.6609935760498047, 0.9206314086914062, 0.15819168090820312, 1.7611045837402344, 1.7127914428710938, 10.763069152832031, -2.6905288696289062, 9.21942138671875, -0.3199310302734375, 3.8653030395507812, 0.8053131103515625, -3.284088134765625, 4.189544677734375, 2.9958343505859375, 0.46472930908203125, 5.82037353515625, -2.1316757202148438, 0.9183540344238281, 5.554628372192383, 3.363372802734375, -0.5075302124023438, 0.25417137145996094, -1.983367919921875, -0.6008281707763672, 1.3796234130859375, 1.2764053344726562, 5.7396697998046875, 1.46002197265625, 1.8481063842773438, 4.230676651000977, 2.5129661560058594, 1.5023536682128906, -0.1546173095703125, 0.7442226409912109, -1.1170291900634766, 1.0038299560546875, 9.176277160644531, -1.183929443359375, -1.7639617919921875, 0.014059066772460938, 2.675567626953125, 2.2797927856445312, 0.528289794921875, 2.8615798950195312, -0.8117313385009766, 0.9561233520507812, -0.1234283447265625, 1.7114124298095703, -2.07379150390625, 1.524749755859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000496.npy"} +{"epoch": 0.7498110355253212, "step": 497, "batch_size": 64, "mean": 2.29738187789917, "std": 2.6217634677886963, "min": -3.045196533203125, "p10": -0.8076082229614256, "median": 2.290943145751953, "p90": 5.86069602966309, "max": 9.256696701049805, "pos_frac": 0.796875, "sample": [9.256696701049805, 1.6169548034667969, 7.5110015869140625, 2.5456619262695312, 2.457489013671875, 0.18914794921875, 3.8159637451171875, 2.408031463623047, 1.4982528686523438, 3.3860321044921875, 2.8846378326416016, 0.24716949462890625, 3.5986690521240234, 2.2336807250976562, 0.8226089477539062, -0.02619171142578125, 2.1701202392578125, -2.5244216918945312, 4.963521957397461, 2.7157669067382812, -0.027570724487304688, 6.250476837158203, -1.3146133422851562, 0.24906539916992188, 1.1344070434570312, -0.7016143798828125, -1.501800537109375, 3.723674774169922, 3.3083114624023438, 4.7914886474609375, 0.473876953125, 2.34820556640625, 1.019500732421875, 2.9007644653320312, -0.72723388671875, 2.536741256713867, 4.835014343261719, 3.9079151153564453, 6.245199203491211, -0.8420543670654297, 4.756168365478516, 1.2288646697998047, 0.4923553466796875, 3.402679443359375, 4.50457763671875, -3.045196533203125, 7.138580322265625, 8.377937316894531, -0.7256698608398438, 0.07506561279296875, 1.9484062194824219, 4.15472412109375, 4.339941024780273, 1.6155967712402344, 0.9707794189453125, 6.272510528564453, -1.1453704833984375, 0.8899440765380859, -1.0729179382324219, 3.0039939880371094, 3.9351043701171875, 1.1116104125976562, -0.46491241455078125, 4.887115478515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000497.npy"} +{"epoch": 0.7513227513227513, "step": 498, "batch_size": 64, "mean": 1.7135268449783325, "std": 2.245055913925171, "min": -1.7523193359375, "p10": -0.7097885131835937, "median": 1.5073060989379883, "p90": 5.199232482910157, "max": 7.223361968994141, "pos_frac": 0.75, "sample": [-1.6935806274414062, 1.9573860168457031, 2.811004638671875, 1.6840019226074219, -1.7523193359375, -0.9716815948486328, 1.8301811218261719, 5.045989990234375, -0.2699298858642578, -0.6492156982421875, 2.082855224609375, 1.3510093688964844, 1.5397720336914062, 1.8962631225585938, 6.543617248535156, 0.5165252685546875, 0.5775318145751953, 0.31072044372558594, 0.5193634033203125, 2.4894943237304688, 6.357307434082031, 0.01300048828125, 2.539670944213867, -0.1525249481201172, 1.5160064697265625, -0.735748291015625, -0.5073699951171875, 4.209064483642578, 4.494464874267578, 0.26610565185546875, 2.123291015625, 5.2649078369140625, 0.8187770843505859, 1.6049728393554688, 1.4393234252929688, -0.8289794921875, -0.31307220458984375, 1.498605728149414, 0.1299457550048828, 0.27384185791015625, 4.026721954345703, 5.430131912231445, -1.67059326171875, 2.6546974182128906, 1.6183643341064453, -0.08306503295898438, 0.2521171569824219, 1.5409965515136719, 1.88189697265625, 1.3036613464355469, 3.9057464599609375, 7.223361968994141, 4.218719482421875, -0.5681686401367188, 4.089385986328125, -0.1202239990234375, 6.3835296630859375, 1.7147216796875, 0.08353805541992188, 1.4302825927734375, -0.742218017578125, -0.2899742126464844, 2.671855926513672, 6.8796539306640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000498.npy"} +{"epoch": 0.7528344671201814, "step": 499, "batch_size": 64, "mean": 1.547835111618042, "std": 2.6707215309143066, "min": -3.2726192474365234, "p10": -2.188531494140624, "median": 1.4247360229492188, "p90": 5.005355834960938, "max": 7.510345458984375, "pos_frac": 0.71875, "sample": [3.6436691284179688, 7.510345458984375, -2.7812728881835938, 3.7760009765625, 3.2915878295898438, -0.055065155029296875, 0.41562843322753906, -2.712188720703125, 1.3109817504882812, 0.5666580200195312, 2.941375732421875, 2.1064376831054688, 6.602165222167969, -1.1067466735839844, 2.8401947021484375, -1.14776611328125, 6.852935791015625, -3.2726192474365234, 5.5269622802734375, 3.9324588775634766, 3.8248291015625, -0.59423828125, -1.2905654907226562, -1.4077606201171875, -3.1773834228515625, 3.8380279541015625, 2.59539794921875, 3.3340911865234375, -1.3660831451416016, -1.03692626953125, 0.2658424377441406, -0.3271675109863281, 2.413818359375, 1.96734619140625, 1.9236183166503906, 0.5593414306640625, 0.5114402770996094, 1.5384902954101562, 0.16037559509277344, 0.11197280883789062, 4.605735778808594, -0.3406810760498047, 4.621246337890625, 1.1979331970214844, -2.530548095703125, 1.5843658447265625, 0.8352584838867188, 3.0871734619140625, 4.8924560546875, 6.3855133056640625, 2.2331161499023438, 0.6843032836914062, 2.3317699432373047, -2.9999771118164062, -2.5231475830078125, 5.053741455078125, 2.3009586334228516, 2.5899429321289062, 0.6444091796875, 3.948333740234375, 0.4590911865234375, 6.114356994628906, 0.4366912841796875, -0.6368026733398438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000499.npy"} +{"epoch": 0.7543461829176115, "step": 500, "batch_size": 64, "mean": 1.4240679740905762, "std": 2.532271385192871, "min": -5.22515869140625, "p10": -1.9973182678222654, "median": 1.1747550964355469, "p90": 4.769414520263672, "max": 7.066356658935547, "pos_frac": 0.71875, "sample": [-2.291046142578125, 1.9889297485351562, 0.7653732299804688, 0.8560466766357422, 1.4074554443359375, 1.80560302734375, 1.1700210571289062, 5.2779998779296875, -1.7391281127929688, 4.023120880126953, 2.5510711669921875, 6.203315734863281, 4.042633056640625, 0.04927825927734375, 4.6432647705078125, 4.823478698730469, 7.066356658935547, -0.851898193359375, 1.7624359130859375, -0.05206298828125, 3.9247283935546875, 1.1794891357421875, 5.28173828125, 3.313720703125, 0.06745529174804688, 5.564510345458984, 1.425262451171875, -0.16559410095214844, -0.4642219543457031, -2.6375656127929688, -1.1361770629882812, -0.0488739013671875, -0.0669097900390625, 1.967193603515625, -2.5047836303710938, -0.8908500671386719, 1.2887191772460938, -2.9726104736328125, 2.9079742431640625, 0.03223419189453125, 2.4539260864257812, -0.24068832397460938, 2.801494598388672, 0.18336105346679688, -1.43218994140625, 4.479055404663086, 0.3687629699707031, -5.22515869140625, 4.290256500244141, -2.4283218383789062, 3.5217056274414062, 2.292205810546875, 0.44145774841308594, 0.9821510314941406, 2.9871292114257812, 1.1036434173583984, 0.7423019409179688, 0.7838134765625, 2.201801300048828, 0.6013927459716797, -2.10797119140625, 2.335184097290039, 4.358652114868164, 6.078697204589844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000500.npy"} +{"epoch": 0.7558578987150416, "step": 501, "batch_size": 64, "mean": 1.5765225887298584, "std": 1.9363675117492676, "min": -1.8476333618164062, "p10": -0.6780584335327148, "median": 1.3027591705322266, "p90": 3.8901077270507827, "max": 7.4403228759765625, "pos_frac": 0.78125, "sample": [1.2899208068847656, 4.029991149902344, 1.619384765625, 2.911479949951172, 3.202977180480957, 2.774566650390625, -0.0238800048828125, 1.8628005981445312, 3.5637130737304688, 6.062126159667969, 6.2122344970703125, 1.7029781341552734, 1.963623046875, 0.96600341796875, -0.7331943511962891, -0.6983642578125, 2.9224777221679688, 4.128902435302734, -1.8476333618164062, -0.5886611938476562, 1.1478729248046875, -0.29801368713378906, -0.12094306945800781, 3.0014190673828125, 7.4403228759765625, 4.954750061035156, 0.4923267364501953, 6.230712890625, 0.5193252563476562, -0.2949943542480469, -0.756011962890625, 1.8211212158203125, -0.907928466796875, 1.4005851745605469, 3.4506988525390625, 1.0705413818359375, 1.3980159759521484, -0.6306781768798828, 1.006103515625, 0.32129478454589844, 0.3417949676513672, 3.4612655639648438, 0.11354446411132812, 1.4889297485351562, 1.3336257934570312, -1.318603515625, 0.32735443115234375, 2.49853515625, -0.8225288391113281, 0.5326995849609375, 1.3155975341796875, 2.7785263061523438, 1.1258201599121094, -0.5546989440917969, 0.791595458984375, 0.037136077880859375, 2.022674560546875, 0.6959342956542969, 1.0321235656738281, 3.0609512329101562, 2.5459938049316406, 2.5778541564941406, 1.1052207946777344, 1.838134765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000501.npy"} +{"epoch": 0.7573696145124716, "step": 502, "batch_size": 64, "mean": 1.3549081087112427, "std": 2.0848381519317627, "min": -4.09100341796875, "p10": -0.7173423767089844, "median": 1.0410175323486328, "p90": 3.462279510498047, "max": 8.569351196289062, "pos_frac": 0.796875, "sample": [-1.0811386108398438, 0.11091232299804688, 0.4055976867675781, -0.67950439453125, 3.1085433959960938, 1.8028507232666016, 1.105987548828125, 1.6824188232421875, -2.6803665161132812, 0.820556640625, 2.409759521484375, 3.46746826171875, 0.7516040802001953, -1.4776611328125, 1.652679443359375, 5.452301025390625, 2.095458984375, -0.0571441650390625, 2.188995361328125, 0.02178955078125, 2.7050247192382812, 0.09705734252929688, 2.6280136108398438, 1.8251991271972656, 0.581787109375, 0.56475830078125, 1.8681411743164062, -1.3849678039550781, 0.2738800048828125, 5.989997863769531, 0.084930419921875, 0.8066616058349609, 2.81182861328125, 0.3072967529296875, 0.9760475158691406, 0.5520553588867188, 4.421382904052734, 8.569351196289062, -3.083770751953125, 0.6955146789550781, -0.32247161865234375, 0.9605789184570312, 1.9482879638671875, -0.7335586547851562, 0.39987754821777344, -0.25804901123046875, 1.5282821655273438, 3.1247940063476562, 2.225208282470703, -0.33154296875, 2.457399368286133, 2.3021392822265625, 1.7746810913085938, 0.57659912109375, 2.9151611328125, -4.09100341796875, 2.6995849609375, 3.758441925048828, 0.04891204833984375, 3.4501724243164062, -0.14158248901367188, 3.3603439331054688, 4.860443115234375, 1.8101215362548828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000502.npy"} +{"epoch": 0.7588813303099018, "step": 503, "batch_size": 64, "mean": 1.4924452304840088, "std": 2.165546178817749, "min": -3.803974151611328, "p10": -1.1269674301147459, "median": 1.1623039245605469, "p90": 4.535290718078614, "max": 6.595005035400391, "pos_frac": 0.828125, "sample": [1.5221023559570312, -1.5776290893554688, 0.5345458984375, 0.40409088134765625, 2.6853981018066406, 0.8476028442382812, 4.3836517333984375, -2.852508544921875, 1.1522369384765625, 3.353799819946289, 2.81683349609375, 0.4327239990234375, 1.4920387268066406, 4.873600006103516, 0.7930679321289062, 3.666240692138672, -1.2165546417236328, 0.99163818359375, 5.3440704345703125, -0.9179306030273438, -0.35984039306640625, 0.4831275939941406, 6.595005035400391, 0.4614143371582031, 1.4524497985839844, 1.2102584838867188, 1.9790458679199219, 6.083339691162109, 1.1723709106445312, 1.8217048645019531, 0.0311126708984375, -0.03423309326171875, 5.407199859619141, 2.3100547790527344, 4.600278854370117, 0.033763885498046875, 2.2754173278808594, 0.5388565063476562, -2.970733642578125, 0.13418197631835938, 1.0858631134033203, 0.37375640869140625, 0.6184673309326172, 2.3520374298095703, 0.7844524383544922, 1.2977752685546875, 0.691436767578125, 3.6078033447265625, 5.310152053833008, 2.3183135986328125, 4.019254684448242, 2.6966590881347656, -3.803974151611328, -0.032001495361328125, 4.2112884521484375, 2.1145668029785156, 0.5622882843017578, -1.8302116394042969, 0.7732925415039062, 2.7845687866210938, 0.6744804382324219, 1.5596694946289062, 3.308624267578125, -1.9158611297607422], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000503.npy"} +{"epoch": 0.7603930461073318, "step": 504, "batch_size": 64, "mean": 1.929521083831787, "std": 2.1729018688201904, "min": -1.52593994140625, "p10": -0.48902206420898436, "median": 1.4781990051269531, "p90": 5.365703201293946, "max": 7.681461334228516, "pos_frac": 0.78125, "sample": [1.3453369140625, 5.579050064086914, -0.03789520263671875, 4.411645889282227, 1.79351806640625, -0.2685699462890625, 3.3501129150390625, 2.0835723876953125, 7.681461334228516, -0.20250892639160156, -0.1904296875, 2.3744354248046875, 1.1255989074707031, 5.231182098388672, 0.6398735046386719, 4.500408172607422, 2.9905242919921875, 0.016590118408203125, 2.1585922241210938, 6.555549621582031, 2.2677688598632812, 1.2215003967285156, 1.1695613861083984, 4.048133850097656, 5.4233551025390625, -0.2090911865234375, 0.4709949493408203, 5.829967498779297, -0.610565185546875, 6.0614776611328125, 2.051971435546875, 1.802703857421875, -0.5045585632324219, 3.4531383514404297, 1.6110610961914062, -0.00626373291015625, 1.0083160400390625, -0.8564071655273438, 2.045013427734375, 1.010040283203125, 3.5013504028320312, 0.62249755859375, 0.24505996704101562, 4.45294189453125, -0.8625640869140625, 1.309295654296875, 2.1103744506835938, 1.2843303680419922, 1.626922607421875, -0.55059814453125, 5.2275848388671875, 0.8077526092529297, -0.6355323791503906, -1.52593994140625, 1.0903892517089844, 2.0408973693847656, -0.4527702331542969, 2.4903717041015625, 1.8359222412109375, 0.38222312927246094, 0.03505897521972656, 6.987663269042969, 2.1362075805664062, 0.9037456512451172], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000504.npy"} +{"epoch": 0.7619047619047619, "step": 505, "batch_size": 64, "mean": 1.3932104110717773, "std": 2.4193193912506104, "min": -3.3450775146484375, "p10": -1.094855499267578, "median": 1.257080078125, "p90": 4.582906532287598, "max": 9.333663940429688, "pos_frac": 0.671875, "sample": [1.9368209838867188, -0.07100868225097656, -0.1507568359375, 1.028472900390625, -0.6127185821533203, 4.3887939453125, 0.3397979736328125, 9.333663940429688, 3.4381885528564453, 2.3754501342773438, 5.531688690185547, -1.0994033813476562, 1.2641067504882812, -0.045146942138671875, 1.09771728515625, -1.6610870361328125, 0.859527587890625, 0.3868217468261719, 2.3374767303466797, 1.6903953552246094, -0.9672317504882812, 1.7806434631347656, 2.777099609375, 2.3540382385253906, 3.5494422912597656, 7.127067565917969, 2.7277297973632812, 1.4196014404296875, 1.370208740234375, 0.08674049377441406, 1.3157958984375, -0.1048736572265625, 3.6355209350585938, -2.7140846252441406, 5.428974151611328, 3.5942306518554688, -0.33007049560546875, 3.8554515838623047, -0.5148601531982422, -0.7292327880859375, 3.30035400390625, 1.2202529907226562, -1.0842437744140625, -3.3450775146484375, 2.09033203125, 0.7435417175292969, 1.4888763427734375, -1.0601825714111328, 4.6696014404296875, -3.329730987548828, -1.05859375, 2.7418060302734375, 4.666097640991211, 1.710723876953125, 3.8788509368896484, 0.8143310546875, 1.79351806640625, -0.9188232421875, 1.2500534057617188, 4.721864700317383, 0.7490577697753906, -0.18597030639648438, -1.6376266479492188, -2.08453369140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000505.npy"} +{"epoch": 0.763416477702192, "step": 506, "batch_size": 64, "mean": 1.54330575466156, "std": 2.520275592803955, "min": -4.035125732421875, "p10": -1.7530929565429687, "median": 1.3253555297851562, "p90": 4.193571281433106, "max": 9.711921691894531, "pos_frac": 0.8125, "sample": [2.278665542602539, 2.050708770751953, -0.6714115142822266, 1.5988235473632812, 0.764801025390625, 0.107147216796875, 2.865947723388672, -1.887725830078125, 1.6152400970458984, 0.865234375, 0.5603084564208984, 3.4427947998046875, 3.2503433227539062, 0.67694091796875, 0.8506355285644531, 1.4232559204101562, -1.6294479370117188, 8.040817260742188, 9.711921691894531, 0.9163665771484375, 3.0873947143554688, -3.1835250854492188, 1.1235809326171875, -3.5085906982421875, -2.3339157104492188, -4.035125732421875, 1.5892257690429688, 6.32958984375, 5.1303558349609375, 3.238046646118164, 3.688701629638672, -2.3567428588867188, 4.301246643066406, -0.6676864624023438, 4.081682205200195, 0.5154228210449219, 2.571216583251953, 0.43353271484375, 7.4829864501953125, 0.29730224609375, 1.030914306640625, 0.7067642211914062, 0.6816673278808594, 2.893922805786133, 0.9749221801757812, 1.2022209167480469, -0.11582565307617188, 1.2274551391601562, 2.2834529876708984, 1.8575057983398438, 3.0397777557373047, 2.270355224609375, 0.45218658447265625, 2.8514022827148438, 2.2250099182128906, 4.241523742675781, 0.9999618530273438, 2.0765838623046875, -1.8060836791992188, -1.4248313903808594, 1.22503662109375, 1.4819488525390625, 2.0060043334960938, 1.7736282348632812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000506.npy"} +{"epoch": 0.764928193499622, "step": 507, "batch_size": 64, "mean": 1.6002895832061768, "std": 2.5381689071655273, "min": -3.2299423217773438, "p10": -1.4371185302734375, "median": 1.3794746398925781, "p90": 4.651263999938965, "max": 8.83416748046875, "pos_frac": 0.734375, "sample": [2.8117313385009766, 2.0140380859375, -0.8289947509765625, 3.4837646484375, 8.83416748046875, 1.3848419189453125, 3.3562469482421875, -1.471038818359375, -3.2299423217773438, -0.3065032958984375, 0.12332344055175781, 0.7142333984375, -0.27332305908203125, 5.435798645019531, -0.28851318359375, 2.8576812744140625, 0.3076019287109375, -0.27904510498046875, 2.5447750091552734, 1.0377559661865234, -0.1587677001953125, 1.9745025634765625, 1.7254066467285156, -1.0455684661865234, 6.379508972167969, 0.7054443359375, 0.8459129333496094, 0.5579833984375, 1.4565105438232422, 3.3410167694091797, 1.3871536254882812, 0.689453125, 8.515289306640625, -1.729339599609375, 1.1082305908203125, 2.099092483520508, -1.35797119140625, 4.654541015625, -2.4459304809570312, -1.1488780975341797, 0.268646240234375, 3.0478973388671875, 0.07247161865234375, 4.643617630004883, 1.3741073608398438, -0.275390625, 3.784698486328125, 1.5401611328125, 5.358917236328125, 6.8677520751953125, -1.8714447021484375, 4.148529052734375, 0.226043701171875, 1.0267410278320312, 2.8673553466796875, 0.6427688598632812, 3.2657737731933594, -1.6380844116210938, 2.8249740600585938, -3.0053176879882812, 2.5166473388671875, 2.8353805541992188, 1.6106834411621094, 4.503410339355469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000507.npy"} +{"epoch": 0.7664399092970522, "step": 508, "batch_size": 64, "mean": 1.7146738767623901, "std": 2.477325916290283, "min": -2.6403560638427734, "p10": -1.3062397003173827, "median": 1.3434524536132812, "p90": 5.35275573730469, "max": 8.248344421386719, "pos_frac": 0.765625, "sample": [2.464916229248047, 0.7353057861328125, 4.541351318359375, 3.0921478271484375, -0.5836868286132812, -1.4970779418945312, 8.248344421386719, -0.3054981231689453, 2.7191638946533203, 4.0990447998046875, 7.773399353027344, 2.1132965087890625, 0.858612060546875, 1.3512191772460938, -1.780487060546875, 1.61297607421875, -2.6403560638427734, 1.3356857299804688, 1.030527114868164, -2.518251419067383, 1.556121826171875, -0.21828460693359375, 2.2566471099853516, 0.2920341491699219, 0.9992141723632812, -1.2824554443359375, 2.111570358276367, 0.9000606536865234, 1.30804443359375, 1.6999855041503906, 0.279388427734375, 2.264984130859375, 1.0320892333984375, 1.9306411743164062, 6.366115570068359, 3.9722137451171875, 0.07075119018554688, 2.1910247802734375, 7.2145538330078125, -0.39234161376953125, -0.4840202331542969, -1.1462631225585938, 3.2913665771484375, 3.482685089111328, -0.3306846618652344, 0.8636245727539062, 2.4982223510742188, 5.70050048828125, 6.614463806152344, 0.9651985168457031, 2.040813446044922, 0.24552536010742188, 0.4365692138671875, -1.318267822265625, 0.3940238952636719, -2.4336605072021484, -1.3164329528808594, 6.471767425537109, 2.5380020141601562, 4.216094970703125, 3.3371944427490234, 2.6748809814453125, 0.8891143798828125, 2.905414581298828], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000508.npy"} +{"epoch": 0.7679516250944822, "step": 509, "batch_size": 64, "mean": 1.589739203453064, "std": 2.350116491317749, "min": -3.798553466796875, "p10": -1.1518857955932615, "median": 1.578765869140625, "p90": 3.976131439208985, "max": 9.78656005859375, "pos_frac": 0.765625, "sample": [3.055919647216797, -0.6609668731689453, -0.15876197814941406, 1.8959503173828125, 1.3120574951171875, 0.07357025146484375, 1.0610218048095703, -0.9270954132080078, -2.1911773681640625, 0.11068344116210938, 2.6928863525390625, 3.2160568237304688, 2.42974853515625, 1.0610809326171875, 3.6505050659179688, -3.798553466796875, 9.78656005859375, 3.0102005004882812, -1.258758544921875, 0.8633880615234375, 2.7906570434570312, -0.7165451049804688, 2.41094970703125, -1.9116325378417969, 3.2874069213867188, 0.7085342407226562, -1.0587234497070312, 0.7215690612792969, 6.237400054931641, 1.383087158203125, 3.382049560546875, 5.782001495361328, 2.1636199951171875, 2.2868690490722656, 0.48368072509765625, 0.0506744384765625, 3.203479766845703, 0.945343017578125, 3.3542633056640625, 3.5503463745117188, 2.7145767211914062, 0.202880859375, 4.320457458496094, -1.5199012756347656, 4.465404510498047, 1.9394683837890625, 0.993133544921875, 4.00115966796875, 5.277809143066406, 1.774444580078125, 3.9177322387695312, -0.5741119384765625, -0.199066162109375, -3.523834228515625, 0.5918655395507812, 0.8579368591308594, 3.0441055297851562, -1.191812515258789, -1.0024337768554688, 0.5928268432617188, 2.5401363372802734, 1.9311370849609375, 3.764678955078125, 2.545368194580078], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000509.npy"} +{"epoch": 0.7694633408919124, "step": 510, "batch_size": 64, "mean": 1.4934484958648682, "std": 2.2434117794036865, "min": -4.02783203125, "p10": -1.22110595703125, "median": 1.4170942306518555, "p90": 3.8230072021484385, "max": 7.972587585449219, "pos_frac": 0.765625, "sample": [0.5257110595703125, 5.075252532958984, 3.457855224609375, 1.4136276245117188, -4.02783203125, 0.1447906494140625, 3.1962852478027344, 6.0417022705078125, 1.13140869140625, 0.990631103515625, 0.5744094848632812, -0.14495086669921875, 0.4461250305175781, 1.0770893096923828, -1.6887836456298828, 0.39816856384277344, -0.12027359008789062, 2.600860595703125, 0.487579345703125, 1.58154296875, 1.975006103515625, 3.1144847869873047, 3.001953125, -0.3980712890625, 1.3959808349609375, 3.114429473876953, 3.946136474609375, 0.137786865234375, -0.4975318908691406, 3.2605323791503906, 2.8796234130859375, 1.8876113891601562, 3.53570556640625, 3.2055931091308594, 1.4205608367919922, 1.76434326171875, 2.14801025390625, 6.731590270996094, -1.0959014892578125, -0.0384063720703125, 0.5601406097412109, -2.5528793334960938, 1.8515968322753906, 2.792247772216797, 1.0662994384765625, 0.28400421142578125, 1.46112060546875, 7.972587585449219, -0.07666015625, -1.2747650146484375, 2.940786361694336, 2.021799087524414, 3.9480133056640625, 1.2581329345703125, -2.4593734741210938, 2.150411605834961, 6.275299072265625, 1.82147216796875, -0.4033679962158203, 2.3837852478027344, 2.733154296875, 0.5865955352783203, -1.7851543426513672, -2.62518310546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000510.npy"} +{"epoch": 0.7709750566893424, "step": 511, "batch_size": 64, "mean": 2.233816623687744, "std": 2.286176919937134, "min": -2.2317352294921875, "p10": -0.5374402999877927, "median": 1.870626449584961, "p90": 5.419979095458984, "max": 6.653236389160156, "pos_frac": 0.84375, "sample": [6.0562744140625, 5.000282287597656, 2.5244979858398438, 5.013095855712891, 1.8731575012207031, 0.41851806640625, 0.5692520141601562, 0.020036697387695312, 6.454460144042969, 4.329715728759766, 2.0345840454101562, -0.8549270629882812, 1.8680953979492188, 2.8689651489257812, 3.8369503021240234, 0.5565948486328125, 2.5164432525634766, 1.1347808837890625, 5.409431457519531, 3.1207046508789062, 5.294639587402344, 0.867340087890625, 1.7730178833007812, -2.2317352294921875, 0.0311431884765625, -1.544158935546875, 6.063266754150391, 4.27838134765625, 1.00042724609375, 0.103759765625, -0.6465625762939453, 1.118804931640625, 3.1707916259765625, 0.464111328125, 1.9779605865478516, 3.38592529296875, 4.063640594482422, 1.7355537414550781, -0.9607391357421875, 1.096923828125, 1.3732223510742188, 4.873630523681641, 4.248146057128906, 5.596717834472656, 5.42449951171875, 3.0107803344726562, -1.7253265380859375, 0.6112060546875, -0.10610008239746094, 5.976062774658203, 1.4568710327148438, 1.1910343170166016, 2.366588592529297, 3.3870620727539062, 0.5099201202392578, 0.7155609130859375, -0.9400978088378906, 6.653236389160156, 2.3904266357421875, 5.0281829833984375, 4.780488967895508, -0.15044212341308594, -0.2828216552734375, 0.782012939453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000511.npy"} +{"epoch": 0.7724867724867724, "step": 512, "batch_size": 64, "mean": 2.185153007507324, "std": 7.259606838226318, "min": -3.575042724609375, "p10": -2.2472461700439452, "median": 1.125223159790039, "p90": 4.825196838378907, "max": 56.34344482421875, "pos_frac": 0.734375, "sample": [3.6700897216796875, 2.081085205078125, 4.978536605834961, -2.415487289428711, -1.6215972900390625, 5.6860809326171875, -0.28634071350097656, 0.42807960510253906, 0.6333389282226562, 0.15601348876953125, 2.9152488708496094, 3.44659423828125, -3.575042724609375, 0.028972625732421875, 2.1811065673828125, 0.2696990966796875, 2.2339324951171875, -0.4508552551269531, 4.537019729614258, 0.5844955444335938, 3.0215225219726562, 1.1656227111816406, 0.7523422241210938, 1.2817535400390625, 1.0848236083984375, 3.06439208984375, -3.3700332641601562, 0.27426910400390625, 2.0917129516601562, 4.7235870361328125, -0.4788322448730469, 3.2997665405273438, 4.9858856201171875, 0.6968822479248047, 2.7078018188476562, 0.183135986328125, -0.05937385559082031, 2.4217453002929688, -0.11197662353515625, 4.868743896484375, 0.7453384399414062, -1.9956703186035156, 0.8877696990966797, -2.8441085815429688, 1.6106491088867188, 4.4344024658203125, -0.37586212158203125, 2.091531753540039, 2.4329681396484375, -0.7691307067871094, 4.007804870605469, -1.17901611328125, 0.779296875, -2.47515869140625, 0.7777252197265625, 1.6633453369140625, 56.34344482421875, 5.920734405517578, 8.652984619140625, 3.6910324096679688, 1.9320564270019531, -3.4949111938476562, 1.2828845977783203, -2.3550643920898438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000512.npy"} +{"epoch": 0.7739984882842026, "step": 513, "batch_size": 64, "mean": 1.8668807744979858, "std": 2.132620334625244, "min": -1.7704963684082031, "p10": -0.8014450073242188, "median": 1.6073455810546875, "p90": 4.923294067382816, "max": 9.244232177734375, "pos_frac": 0.796875, "sample": [6.3903656005859375, 3.1133804321289062, 6.074615478515625, 1.494140625, 2.1875534057617188, 1.1034622192382812, 1.8335952758789062, 1.7274093627929688, 1.0769977569580078, -0.6382331848144531, 1.15704345703125, -1.1154670715332031, 2.7003860473632812, 0.6047744750976562, 0.7659206390380859, 2.021516799926758, -0.135101318359375, 1.75872802734375, 9.244232177734375, 5.292274475097656, 2.1758766174316406, 5.811611175537109, 1.4188079833984375, 0.7085075378417969, 5.499946594238281, 0.463409423828125, 2.9083709716796875, 2.585355758666992, 1.0033760070800781, 2.0200862884521484, 0.6503562927246094, -0.47368621826171875, 3.4672622680664062, -1.7704963684082031, 1.8551864624023438, 1.4595699310302734, -0.8944511413574219, 1.37353515625, 5.5972900390625, 0.6708450317382812, -0.7564773559570312, -1.2184009552001953, 3.966461181640625, -0.8332672119140625, 3.8754730224609375, 2.9718284606933594, 4.062339782714844, 3.6843795776367188, 3.26654052734375, 1.720550537109375, 1.3720703125, 2.2474822998046875, 3.699493408203125, 1.4112701416015625, 1.742034912109375, -1.0196247100830078, 0.605621337890625, 0.4362373352050781, -0.3152008056640625, 0.6267967224121094, 3.5670166015625, -0.078521728515625, -0.8207168579101562, 2.0786304473876953], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000513.npy"} +{"epoch": 0.7755102040816326, "step": 514, "batch_size": 64, "mean": 1.6746286153793335, "std": 2.6159515380859375, "min": -5.195438385009766, "p10": -1.1361724853515625, "median": 1.585982322692871, "p90": 4.749184417724609, "max": 8.247509002685547, "pos_frac": 0.765625, "sample": [-0.48915672302246094, 1.5513801574707031, 2.621074676513672, 3.263225555419922, 2.0841712951660156, 0.8957805633544922, 4.539878845214844, -1.1539154052734375, 1.0778961181640625, 0.5093498229980469, 1.3817405700683594, 3.366283416748047, 1.9339122772216797, 3.39056396484375, -0.6981029510498047, 0.14878082275390625, -4.3561859130859375, 1.6337566375732422, -0.32950592041015625, 1.7328720092773438, -5.195438385009766, 0.7970466613769531, 1.620584487915039, 2.836994171142578, 8.247509002685547, -2.600372314453125, -0.0332489013671875, 1.8681144714355469, 1.2973785400390625, 4.69464111328125, -1.0006866455078125, -2.063386917114258, 3.4967422485351562, 1.4096221923828125, 4.028861999511719, 2.3143310546875, 1.7340087890625, 6.197772979736328, 0.7616348266601562, 1.2875213623046875, 0.1174468994140625, -1.1917953491210938, -1.0371170043945312, 0.7544288635253906, 2.1214447021484375, 3.4350509643554688, 1.35992431640625, 1.712310791015625, 1.8135509490966797, 3.6715927124023438, -0.9499664306640625, 0.06975173950195312, 0.6030368804931641, 7.473503112792969, -2.151519775390625, 4.980033874511719, 4.772560119628906, 2.9169769287109375, 4.128883361816406, 1.0856695175170898, 7.724639892578125, 5.915033340454102, -1.0947723388671875, 4.142127990722656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000514.npy"} +{"epoch": 0.7770219198790628, "step": 515, "batch_size": 64, "mean": 1.9603983163833618, "std": 2.471931219100952, "min": -3.080322265625, "p10": -0.9004608154296874, "median": 1.7213287353515625, "p90": 4.851725387573244, "max": 10.393798828125, "pos_frac": 0.78125, "sample": [1.0580196380615234, 2.0716075897216797, 1.054168701171875, 2.0884132385253906, 7.480491638183594, 5.206342697143555, 2.430713653564453, 4.080881118774414, 0.7939376831054688, 2.3447799682617188, 4.986732482910156, 2.953876495361328, 3.952535629272461, -1.063760757446289, 1.9396381378173828, 3.3001708984375, -0.957763671875, 3.91204833984375, -0.9462509155273438, 1.9411163330078125, 2.3101577758789062, -0.02337646484375, 0.5404510498046875, 1.2940826416015625, -3.080322265625, 1.5417594909667969, 7.282392501831055, 2.4361419677734375, 2.2180328369140625, 0.08374786376953125, 1.2621688842773438, 2.099994659423828, 0.131317138671875, -0.36013221740722656, 2.9532814025878906, 4.475004196166992, -2.247955322265625, 2.772369384765625, 0.2032299041748047, 1.203582763671875, -0.4616508483886719, 1.3967323303222656, 4.014892578125, -1.7201080322265625, 1.6924591064453125, 10.393798828125, 1.6445484161376953, 1.015523910522461, 4.402320861816406, 3.618480682373047, 1.7501983642578125, 1.0028457641601562, -0.7936172485351562, 6.3279266357421875, 5.638580322265625, -1.7567939758300781, 2.7377052307128906, 0.3662395477294922, 4.536708831787109, -0.26691436767578125, 1.0471878051757812, 4.2805328369140625, -0.6436290740966797, -0.4821014404296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000515.npy"} +{"epoch": 0.7785336356764928, "step": 516, "batch_size": 64, "mean": 1.557773470878601, "std": 2.060089588165283, "min": -4.814830780029297, "p10": -0.3009853363037109, "median": 1.3176727294921875, "p90": 4.277737426757813, "max": 7.838958740234375, "pos_frac": 0.828125, "sample": [4.987556457519531, 7.838958740234375, 2.123096466064453, 0.068084716796875, 0.6721458435058594, 6.458347320556641, 0.00710296630859375, 1.7328910827636719, 1.301910400390625, 1.6842269897460938, 0.9370231628417969, 3.0742225646972656, 3.96331787109375, 5.1743621826171875, 1.3012142181396484, 0.08053779602050781, 2.0040283203125, 2.3133602142333984, 3.721954345703125, 4.2116851806640625, 0.935577392578125, -4.814830780029297, 1.33343505859375, 2.1751670837402344, 4.354804992675781, 2.220184326171875, 2.2429733276367188, -0.32830810546875, 3.3610992431640625, 2.1765174865722656, -1.3893966674804688, 0.6195068359375, -0.06770515441894531, -0.149078369140625, 2.5140151977539062, 0.8620681762695312, 1.522806167602539, 0.3956871032714844, 0.7860374450683594, 0.3979988098144531, 1.1445083618164062, -3.0355300903320312, 1.3480911254882812, 2.267719268798828, 2.5748062133789062, 0.489593505859375, 2.9444847106933594, 2.276988983154297, 0.8164749145507812, 2.2442779541015625, 0.7004547119140625, 0.644561767578125, -0.11351776123046875, 0.7373218536376953, -1.4891510009765625, 4.3060455322265625, -0.23723220825195312, 1.4239826202392578, -0.458892822265625, -0.9292507171630859, 2.7454681396484375, 0.6577949523925781, 0.873992919921875, 4.959922790527344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000516.npy"} +{"epoch": 0.780045351473923, "step": 517, "batch_size": 64, "mean": 1.7575602531433105, "std": 2.4521543979644775, "min": -1.989492416381836, "p10": -0.9186534881591796, "median": 1.5813922882080078, "p90": 5.116287231445313, "max": 9.714946746826172, "pos_frac": 0.71875, "sample": [2.1382369995117188, -0.5484161376953125, -0.3908958435058594, 2.0098094940185547, -1.1471328735351562, 0.9035396575927734, 0.9589080810546875, 1.012411117553711, 6.748939514160156, 3.8048973083496094, 1.5689964294433594, -1.3249740600585938, -0.41634368896484375, 1.3442420959472656, 0.5148773193359375, -1.4301834106445312, 0.8235569000244141, 1.5937881469726562, 3.275209426879883, -0.7886161804199219, 4.11761474609375, -0.9569168090820312, 5.18499755859375, 7.492992401123047, -0.579254150390625, -0.31507301330566406, 1.6706275939941406, 1.6289291381835938, 2.431884765625, 3.4629688262939453, 1.9731674194335938, 3.6432266235351562, 4.924327850341797, -0.03862762451171875, 0.522430419921875, -1.989492416381836, 1.747833251953125, 5.6002349853515625, -0.8060379028320312, -0.8134841918945312, 2.1390762329101562, 3.349437713623047, 6.758018493652344, 0.06692314147949219, 0.7112503051757812, 4.955963134765625, 1.861480712890625, 2.110868453979492, -0.8293724060058594, 3.1427841186523438, 1.3342761993408203, 2.5745620727539062, 1.1454315185546875, 9.714946746826172, 6.5019683837890625, 2.9640560150146484, -0.6839981079101562, -1.6556224822998047, 0.8232078552246094, 1.7146949768066406, 1.9235572814941406, -1.4711036682128906, 2.21685791015625, 1.5613937377929688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000517.npy"} +{"epoch": 0.781557067271353, "step": 518, "batch_size": 64, "mean": 1.1316269636154175, "std": 2.271061420440674, "min": -3.3804779052734375, "p10": -1.2083511352539062, "median": 0.764190673828125, "p90": 4.583321380615235, "max": 8.221633911132812, "pos_frac": 0.6875, "sample": [1.1363296508789062, 0.5569839477539062, 0.8490009307861328, 0.80084228515625, -0.2227325439453125, -0.9760360717773438, 0.32511138916015625, 1.9889411926269531, -2.838336944580078, 0.5458869934082031, 0.4628448486328125, -0.6259689331054688, 0.7275390625, 2.772430419921875, 2.3895645141601562, -3.3804779052734375, 1.2682418823242188, 5.261211395263672, -2.0438899993896484, 0.5903739929199219, 4.43328857421875, -2.2372512817382812, -0.24434661865234375, 1.4701499938964844, 1.3763656616210938, 1.4469223022460938, -1.1984176635742188, 1.6359939575195312, -2.710968017578125, 4.302223205566406, -0.44381141662597656, -1.0517921447753906, 0.13722610473632812, 2.6314029693603516, 4.648468017578125, -1.2126083374023438, 2.571828842163086, -2.0745468139648438, 1.9107666015625, 6.548004150390625, 2.2855682373046875, -0.078399658203125, 0.6710014343261719, 8.221633911132812, 5.8794403076171875, 3.448699951171875, 0.5110931396484375, 4.647621154785156, 0.57122802734375, -0.7882843017578125, -0.40918731689453125, -0.18426513671875, 0.697784423828125, -0.23908424377441406, 0.9244728088378906, 1.6405487060546875, 1.6822891235351562, 2.198078155517578, 0.12354278564453125, 1.3761920928955078, 1.0967254638671875, -0.1823253631591797, 5.0598907470703125, 1.74310302734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000518.npy"} +{"epoch": 0.783068783068783, "step": 519, "batch_size": 64, "mean": 0.9974346160888672, "std": 2.8938591480255127, "min": -7.098419189453125, "p10": -2.0960483551025386, "median": 0.7201213836669922, "p90": 4.479871368408204, "max": 7.77093505859375, "pos_frac": 0.578125, "sample": [-3.386014938354492, 3.0057907104492188, -0.200103759765625, -3.917572021484375, -0.2541046142578125, 1.7483177185058594, -0.7467498779296875, 5.5243072509765625, 1.5493698120117188, 2.05950927734375, 1.0384368896484375, 1.6884479522705078, 4.272926330566406, -0.7279891967773438, 0.8515243530273438, -0.6199569702148438, 2.433277130126953, 0.5909614562988281, -1.4947090148925781, -3.8226318359375, 4.347602844238281, 7.685512542724609, -0.6136322021484375, 0.6469573974609375, 3.1648712158203125, 5.439329147338867, -1.7863082885742188, 2.08642578125, 2.7919464111328125, -0.8314208984375, -1.1438465118408203, 3.8259048461914062, -0.4468536376953125, -0.7294464111328125, 1.8533039093017578, -1.55548095703125, 1.500762939453125, 0.6659812927246094, 3.4805984497070312, -1.6578750610351562, -0.8052902221679688, 1.67626953125, 3.6296539306640625, 0.774261474609375, 0.3810577392578125, -7.098419189453125, 0.2983245849609375, -1.343170166015625, 3.406829833984375, -2.2287940979003906, 4.5155792236328125, -1.4072151184082031, -2.261363983154297, 7.77093505859375, -0.06810760498046875, 2.7169952392578125, 4.396553039550781, 4.821418762207031, 3.834514617919922, -1.4403858184814453, 0.7752418518066406, -1.1889495849609375, 7.1361846923828125, -2.773679733276367], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000519.npy"} +{"epoch": 0.7845804988662132, "step": 520, "batch_size": 64, "mean": 1.562050700187683, "std": 2.305999279022217, "min": -2.8444061279296875, "p10": -1.0960411071777343, "median": 1.3341236114501953, "p90": 4.689951515197755, "max": 11.226516723632812, "pos_frac": 0.734375, "sample": [0.6031246185302734, -0.5516433715820312, -0.5122451782226562, 1.9257011413574219, 2.7360496520996094, 11.226516723632812, 2.1620006561279297, -0.12586212158203125, 0.8379859924316406, 2.235260009765625, 2.9355010986328125, 0.8907070159912109, 1.3037300109863281, 2.2283973693847656, 3.691436767578125, 1.1952438354492188, -0.37905120849609375, -1.2273330688476562, 0.3882255554199219, 5.120845794677734, 5.3998565673828125, 1.6094703674316406, 1.6223526000976562, 3.525726318359375, 0.655792236328125, 2.5467796325683594, 6.1007080078125, -1.8129653930664062, -0.5224151611328125, 4.98529052734375, 1.727569580078125, 1.3645172119140625, -0.6704483032226562, 3.1298294067382812, 1.2957534790039062, -0.08125114440917969, 4.860639572143555, 1.0634384155273438, -1.186309814453125, -1.6900558471679688, -1.3741989135742188, 0.6226577758789062, 0.04827880859375, -0.436737060546875, -0.90277099609375, 0.44264984130859375, 2.0908432006835938, 1.4177131652832031, 0.65875244140625, 0.1523590087890625, 0.7167510986328125, 4.112518310546875, 4.291679382324219, 2.496082305908203, 4.9988555908203125, 3.5474300384521484, 2.112945556640625, 2.5367202758789062, -0.3627204895019531, -1.1788711547851562, 1.6695518493652344, 2.3916873931884766, 2.15460205078125, -2.8444061279296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000520.npy"} +{"epoch": 0.7860922146636432, "step": 521, "batch_size": 64, "mean": 1.5222779512405396, "std": 2.045820474624634, "min": -3.4555892944335938, "p10": -0.517352294921875, "median": 1.1803321838378906, "p90": 4.524715423583986, "max": 6.396942138671875, "pos_frac": 0.796875, "sample": [1.9363250732421875, 0.78680419921875, 3.538909912109375, -0.4160308837890625, 3.9669723510742188, 0.7691726684570312, 5.106880187988281, -0.2480010986328125, -0.03534698486328125, 0.9756317138671875, 2.6791152954101562, 0.7344703674316406, 2.800018310546875, 1.1482009887695312, 2.9645233154296875, 2.2817153930664062, 2.6419601440429688, 1.8489837646484375, 1.8954010009765625, 5.850883483886719, 0.80926513671875, 3.5553665161132812, 0.421478271484375, 0.2861785888671875, 0.22043228149414062, 2.2323532104492188, -1.1711654663085938, 2.7058639526367188, 1.0603103637695312, 1.7435379028320312, 5.635894775390625, 6.396942138671875, -2.4637718200683594, 0.49022483825683594, -0.5607757568359375, -0.00677490234375, 0.11952972412109375, 2.3665313720703125, 0.956298828125, 5.612693786621094, 1.74908447265625, -0.12209510803222656, -1.2067337036132812, 1.9587783813476562, -0.6572303771972656, 0.7020072937011719, 2.6463775634765625, 4.045051574707031, 1.6066093444824219, 5.1650390625, 1.0185470581054688, 1.5877532958984375, -3.4555892944335938, 4.73028564453125, -2.9768524169921875, 0.2589111328125, -0.0076923370361328125, 0.13446044921875, 3.4089202880859375, 0.4412040710449219, 1.7570266723632812, 0.15879058837890625, 1.21246337890625, 1.6336669921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000521.npy"} +{"epoch": 0.7876039304610734, "step": 522, "batch_size": 64, "mean": 1.4820764064788818, "std": 2.370246648788452, "min": -6.8529205322265625, "p10": -1.3735000610351562, "median": 1.472311019897461, "p90": 4.42943344116211, "max": 6.2426910400390625, "pos_frac": 0.796875, "sample": [0.2274169921875, 2.1183624267578125, 0.7876949310302734, 0.9955520629882812, 2.8427810668945312, 4.450035095214844, 2.4826126098632812, 3.6243972778320312, 3.454193115234375, 2.3427486419677734, -1.4002723693847656, -1.1981983184814453, 1.594635009765625, 5.8178253173828125, 0.713592529296875, 3.00262451171875, -2.4955902099609375, 2.2873611450195312, 2.3330116271972656, 1.5113162994384766, 1.0679893493652344, 0.9580345153808594, 1.4356765747070312, -6.8529205322265625, 3.66693115234375, 0.3790168762207031, 1.5089454650878906, -0.6110076904296875, -1.535888671875, 0.51519775390625, -0.43704986572265625, 5.353208541870117, 1.1425933837890625, 3.805522918701172, 4.555631637573242, 0.11144256591796875, -1.3110313415527344, 2.0789794921875, 0.4467048645019531, 0.06432342529296875, 2.048664093017578, 1.9140090942382812, 1.0305442810058594, 1.1518936157226562, 1.925750732421875, -0.0450592041015625, 4.3813629150390625, -2.2432384490966797, 5.2623443603515625, 0.13213539123535156, 4.565971374511719, -1.691375732421875, 1.7827377319335938, 6.2426910400390625, 1.1001014709472656, 1.2603607177734375, -3.510162353515625, 4.186500549316406, 3.1651268005371094, 4.338783264160156, 0.09859466552734375, -1.0239830017089844, 4.1321868896484375, 2.8125534057617188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000522.npy"} +{"epoch": 0.7891156462585034, "step": 523, "batch_size": 64, "mean": 1.930076241493225, "std": 2.508195161819458, "min": -4.145301818847656, "p10": -1.8220321655273435, "median": 2.25335693359375, "p90": 4.942812728881836, "max": 7.891483306884766, "pos_frac": 0.78125, "sample": [-0.7126808166503906, 4.386962890625, 0.06423187255859375, 1.3842697143554688, 4.958515167236328, 4.019975662231445, 1.3781585693359375, 5.526557922363281, 3.125, -0.1584014892578125, 0.6119327545166016, 3.36114501953125, 4.7588958740234375, 0.24483871459960938, 3.8828125, 4.51385498046875, 2.7360763549804688, 0.9980983734130859, 3.3114166259765625, -2.1658363342285156, 1.5017776489257812, 3.0238723754882812, -0.6901168823242188, 1.2779541015625, -1.208526611328125, 1.852743148803711, -1.9278030395507812, -0.7679252624511719, 3.4915237426757812, 3.6528244018554688, 3.0616703033447266, 3.0400466918945312, 3.80816650390625, 2.8272476196289062, 1.4959182739257812, 5.828987121582031, 5.853336334228516, 2.264007568359375, 1.6654739379882812, -3.74420166015625, 0.8111419677734375, -1.5752334594726562, 7.891483306884766, 2.4039840698242188, 2.8813400268554688, 1.7602767944335938, 0.5242443084716797, -0.296295166015625, 5.3500213623046875, -2.6645889282226562, 3.9924697875976562, 1.0966606140136719, 2.31793212890625, -2.1868820190429688, 4.9061737060546875, 3.0208511352539062, 1.3187713623046875, 3.8004150390625, 5.060066223144531, -4.145301818847656, -2.3664474487304688, 2.6909122467041016, 2.242706298828125, 2.157379150390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000523.npy"} +{"epoch": 0.7906273620559335, "step": 524, "batch_size": 64, "mean": 1.328594446182251, "std": 2.040229082107544, "min": -4.897674560546875, "p10": -1.1407812118530274, "median": 1.4998512268066406, "p90": 3.643547248840332, "max": 6.6163177490234375, "pos_frac": 0.734375, "sample": [2.6617889404296875, 1.4889602661132812, 5.815540313720703, -0.27984046936035156, 1.6815643310546875, -0.30631065368652344, -0.9029922485351562, 0.5985870361328125, 3.9932403564453125, 1.53912353515625, 3.1068572998046875, -1.1494770050048828, -2.4170913696289062, 2.50262451171875, -0.5513458251953125, 2.713958740234375, -1.962646484375, -1.8672637939453125, 0.9276084899902344, 2.8682861328125, 2.197845458984375, -4.897674560546875, 1.1278305053710938, 0.29636383056640625, 0.5965423583984375, 1.039957046508789, 6.6163177490234375, 1.1546173095703125, 2.0822296142578125, -0.94586181640625, -0.3578643798828125, 2.8698501586914062, -1.8202896118164062, 1.1572799682617188, 2.79571533203125, 2.6730175018310547, 1.1056079864501953, 2.40838623046875, 2.0569000244140625, -0.6065216064453125, 2.232666015625, 1.5851974487304688, 0.1053619384765625, 4.55426025390625, 4.07249641418457, -1.1070480346679688, 3.857318878173828, -0.9930877685546875, 3.4481239318847656, 2.66766357421875, 1.6781272888183594, 1.4145431518554688, 1.8321151733398438, 3.5558242797851562, 2.518878936767578, 2.000560760498047, 3.681142807006836, 1.2758827209472656, 1.267486572265625, -1.1204910278320312, 3.2162628173828125, -1.1763763427734375, 1.5107421875, 0.9409694671630859], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000524.npy"} +{"epoch": 0.7921390778533636, "step": 525, "batch_size": 64, "mean": 1.2232205867767334, "std": 1.7749276161193848, "min": -2.60888671875, "p10": -0.8992820739746094, "median": 1.0456066131591797, "p90": 3.166453552246094, "max": 5.9376220703125, "pos_frac": 0.796875, "sample": [2.24249267578125, -2.60888671875, 0.1230010986328125, -0.7308082580566406, -2.1042556762695312, 1.70953369140625, 1.2653636932373047, 0.0291290283203125, 1.6728973388671875, -0.8615188598632812, 0.7003021240234375, 0.0077667236328125, 1.0641136169433594, 0.6648597717285156, 0.5837898254394531, 4.509521484375, -0.6009426116943359, -1.35882568359375, 0.8933563232421875, 2.2839431762695312, 2.472187042236328, 0.1296539306640625, 4.193473815917969, 4.690000534057617, 0.07421112060546875, 1.4202957153320312, 0.8441619873046875, 0.7144355773925781, 1.5212249755859375, 2.351451873779297, -0.91546630859375, 2.7222137451171875, 0.7300090789794922, 2.0107688903808594, 2.948974609375, -2.4775009155273438, 0.8400897979736328, 1.5510902404785156, 5.9376220703125, -0.3668537139892578, 1.027099609375, 1.4500999450683594, 1.9657783508300781, 0.557037353515625, 1.3023643493652344, 5.638519287109375, 1.0020465850830078, 0.46242523193359375, 2.2655563354492188, 3.1994400024414062, -0.253021240234375, 3.0894851684570312, 2.3537979125976562, 0.948699951171875, 1.6950149536132812, 2.09051513671875, 0.6228485107421875, 1.6387786865234375, 2.1332130432128906, 2.6893386840820312, -0.9272079467773438, 4.323204040527344, -0.0012645721435546875, -1.864532470703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000525.npy"} +{"epoch": 0.7936507936507936, "step": 526, "batch_size": 64, "mean": 1.1028800010681152, "std": 1.855867862701416, "min": -3.381805419921875, "p10": -1.1149539947509766, "median": 0.9797115325927734, "p90": 3.5325410842895524, "max": 6.170511245727539, "pos_frac": 0.734375, "sample": [4.30426025390625, 2.156522750854492, -1.438699722290039, 0.29001617431640625, -1.5036029815673828, 2.862943649291992, 1.0487327575683594, 2.549560546875, -1.6739959716796875, -0.5781097412109375, 0.787811279296875, 1.8964653015136719, -1.6176872253417969, 1.1927490234375, 1.024261474609375, -0.2071990966796875, 1.8046016693115234, 6.170511245727539, -0.7581558227539062, 3.1873836517333984, 1.8578643798828125, 1.9206085205078125, 0.97393798828125, 4.4049224853515625, 2.7006301879882812, 0.3976707458496094, 1.4189605712890625, 0.8204727172851562, 2.3444747924804688, 0.8513641357421875, 0.386444091796875, 0.16809844970703125, 2.9862136840820312, -0.4502677917480469, 2.7207374572753906, 0.04892730712890625, 1.664520263671875, -0.7990264892578125, 2.5635223388671875, -0.1544647216796875, 4.3370513916015625, 0.301605224609375, 1.329803466796875, 0.6940116882324219, 1.6964149475097656, -2.796121597290039, -0.5106582641601562, -0.8102035522460938, 2.294219970703125, 0.7758941650390625, 0.37419891357421875, -0.08320999145507812, 4.208953857421875, 0.9854850769042969, 4.635499954223633, -1.0510940551757812, -1.1423225402832031, 2.875154495239258, 1.9329605102539062, 0.9988479614257812, 0.5477294921875, 0.36745452880859375, 3.6804656982421875, -3.381805419921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000526.npy"} +{"epoch": 0.7951625094482238, "step": 527, "batch_size": 64, "mean": 1.0811076164245605, "std": 2.429431915283203, "min": -5.1854400634765625, "p10": -1.291379165649414, "median": 0.9346160888671875, "p90": 4.2543785095214846, "max": 7.212688446044922, "pos_frac": 0.6875, "sample": [-1.3142318725585938, 1.5790672302246094, 1.434814453125, 2.4498138427734375, 0.9239273071289062, 0.9325714111328125, 5.869842529296875, 0.9405975341796875, 3.0231285095214844, 3.077880859375, 5.14801025390625, -3.562591552734375, 1.4854049682617188, -1.740386962890625, 2.7604522705078125, -0.5347709655761719, 4.2879486083984375, 0.3759746551513672, 0.29937744140625, -1.2380561828613281, 0.9060478210449219, -3.3479690551757812, -0.736083984375, -0.5010757446289062, 2.272747039794922, 0.9366607666015625, 4.176048278808594, -1.02764892578125, 1.3226890563964844, -0.7478675842285156, 2.9192733764648438, 0.85528564453125, -0.3529815673828125, 1.4117774963378906, 1.8485393524169922, 7.212688446044922, 4.46661376953125, 0.0526885986328125, 0.19877052307128906, 2.1895217895507812, 2.7709484100341797, 0.17428970336914062, 5.7097015380859375, 0.19569015502929688, 2.0589599609375, -0.5045738220214844, -1.1191749572753906, 4.919134140014648, 1.7939300537109375, -0.6806983947753906, -4.091217041015625, 0.8308658599853516, 2.3033447265625, 2.0053329467773438, -0.17229461669921875, -1.0864849090576172, -0.376953125, 3.8201904296875, 3.5369491577148438, 2.9649810791015625, -3.479572296142578, -5.1854400634765625, 1.8264846801757812, 0.7219924926757812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000527.npy"} +{"epoch": 0.7966742252456538, "step": 528, "batch_size": 64, "mean": 1.2484264373779297, "std": 2.106081485748291, "min": -4.0163726806640625, "p10": -1.2543479919433589, "median": 1.0989255905151367, "p90": 3.4850673675537114, "max": 5.974632263183594, "pos_frac": 0.6875, "sample": [5.023780822753906, -3.9300079345703125, 3.6661300659179688, -0.3848991394042969, 3.152698516845703, 0.8657989501953125, 2.0529518127441406, 0.7155609130859375, 0.3634052276611328, 1.7164840698242188, -0.24179840087890625, -1.41961669921875, 2.1697311401367188, -1.8513259887695312, -0.8123111724853516, -0.29282379150390625, 4.058122634887695, 1.106100082397461, 1.0763092041015625, -0.0003204345703125, -0.17824554443359375, 3.317230224609375, 0.3944206237792969, -4.0163726806640625, 0.125762939453125, 1.9919815063476562, -0.40930938720703125, -0.43477630615234375, 1.9009857177734375, 3.1568946838378906, 2.1872940063476562, 0.6429901123046875, 3.178417205810547, 5.974632263183594, 1.0917510986328125, -0.2571277618408203, 5.9319000244140625, 2.8699569702148438, 3.2435379028320312, 0.7542934417724609, 0.8539886474609375, 1.6439552307128906, -0.11458015441894531, 1.2224082946777344, -1.4525909423828125, 2.253265380859375, 2.5664710998535156, 0.9137382507324219, 4.414958953857422, 0.9982166290283203, 2.7193603515625, -3.6578750610351562, -1.731719970703125, 3.3099365234375, 3.292266845703125, 2.1105308532714844, -0.2790412902832031, 1.91082763671875, 3.4000320434570312, -0.8687210083007812, 2.1367111206054688, 2.435943603515625, 3.5215110778808594, -0.2004852294921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000528.npy"} +{"epoch": 0.7981859410430839, "step": 529, "batch_size": 64, "mean": 2.306537389755249, "std": 2.44429612159729, "min": -3.6122093200683594, "p10": -0.5824031829833984, "median": 2.012378692626953, "p90": 5.513483428955079, "max": 7.619295120239258, "pos_frac": 0.8125, "sample": [3.505359649658203, -0.2531929016113281, 3.0041656494140625, 1.293701171875, 5.719585418701172, 0.3093147277832031, 7.120361328125, 5.184196472167969, -0.6016921997070312, 0.3184356689453125, -0.2915363311767578, -1.4444217681884766, 3.4865570068359375, 1.6905441284179688, 1.904205322265625, 0.2368144989013672, -0.6082916259765625, 4.745269775390625, 0.081268310546875, 2.5045089721679688, 4.1230621337890625, 1.183685302734375, -1.080535888671875, 0.3121795654296875, 2.9533538818359375, 5.651542663574219, -2.0944595336914062, -0.7678909301757812, 1.121164321899414, 1.0638961791992188, 4.106269836425781, 1.838470458984375, 5.19134521484375, 7.619295120239258, 1.6163444519042969, 6.844825744628906, 4.409746170043945, 4.43379020690918, -0.512603759765625, 5.15557861328125, 3.1705169677734375, 2.9926185607910156, 6.04266357421875, 2.214263916015625, 0.8964195251464844, 4.740489959716797, 2.9350204467773438, -0.07098388671875, 2.037750244140625, 3.7293014526367188, 2.653472900390625, 3.35394287109375, 0.43157386779785156, 4.040916442871094, -0.5373954772949219, 6.825172424316406, 3.3403091430664062, 1.9870071411132812, 1.3130340576171875, 1.5470123291015625, -3.6122093200683594, 1.458953857421875, 0.14510345458984375, 4.9092254638671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000529.npy"} +{"epoch": 0.799697656840514, "step": 530, "batch_size": 64, "mean": 1.56209135055542, "std": 2.1736409664154053, "min": -2.261371612548828, "p10": -0.9518997192382812, "median": 1.5332164764404297, "p90": 4.087086486816408, "max": 7.7979736328125, "pos_frac": 0.765625, "sample": [6.712371826171875, 3.0361080169677734, 2.0508594512939453, -2.261371612548828, 2.4983291625976562, 0.07669830322265625, 7.7979736328125, -0.8449935913085938, 3.4575958251953125, 2.5822715759277344, 0.9441032409667969, 1.8485488891601562, -1.3755531311035156, 1.2072219848632812, 3.1983890533447266, 3.6323375701904297, 1.0344619750976562, 1.8296737670898438, 3.725006103515625, -1.6222457885742188, -0.23071670532226562, -1.9512863159179688, 0.7672119140625, 2.207183837890625, 1.843048095703125, -0.4407672882080078, 2.5450820922851562, 2.2684173583984375, 1.3621673583984375, 5.481292724609375, 2.186145782470703, 0.35373687744140625, 2.6966705322265625, 6.690948486328125, -0.4906158447265625, 0.1809368133544922, 2.4435348510742188, 2.762439727783203, 0.8390693664550781, 0.3750190734863281, 0.2233428955078125, 4.2422637939453125, -0.1909332275390625, 1.8118438720703125, -0.9816360473632812, 0.576446533203125, 3.5002994537353516, -0.13001251220703125, -1.755929946899414, 6.13140869140625, -1.8048210144042969, -0.8825149536132812, 2.0950145721435547, 0.3395576477050781, 3.0328369140625, -0.114471435546875, 2.718852996826172, 0.5867767333984375, 0.6327743530273438, 0.00495147705078125, 4.511772155761719, 0.4300384521484375, 1.7042655944824219, 1.874420166015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000530.npy"} +{"epoch": 0.8012093726379441, "step": 531, "batch_size": 64, "mean": 1.1534587144851685, "std": 2.0204017162323, "min": -3.5970001220703125, "p10": -0.9629974365234373, "median": 0.7631444931030273, "p90": 3.823022460937501, "max": 6.972862243652344, "pos_frac": 0.71875, "sample": [-1.6478767395019531, 4.2293701171875, 3.0839080810546875, 0.239166259765625, 1.3790206909179688, -0.4120941162109375, 0.21136474609375, 1.4993896484375, 1.4585418701171875, 0.23900222778320312, 0.4833946228027344, 0.6836948394775391, 2.971221923828125, -1.4748687744140625, 3.5786590576171875, 2.6105079650878906, -0.5729522705078125, -1.044708251953125, 1.0576705932617188, 6.788677215576172, 1.1878204345703125, 4.5164642333984375, -0.05466461181640625, -1.1629409790039062, -0.1385955810546875, 1.389577865600586, 1.064849853515625, 0.8006477355957031, 1.2208099365234375, 0.07296371459960938, 1.1745758056640625, 0.27356910705566406, -0.12192916870117188, 0.2871589660644531, -3.5970001220703125, -0.29259681701660156, -0.1961212158203125, 1.8149261474609375, 2.7692947387695312, -2.567676544189453, 1.7263259887695312, 0.5338363647460938, 2.775165557861328, 3.51007080078125, -0.233184814453125, -2.4506912231445312, 4.641441345214844, 2.860645294189453, 1.15966796875, 0.1236114501953125, 0.7252960205078125, 0.35784339904785156, 2.3412857055664062, 3.57037353515625, -0.7723388671875, 0.9448890686035156, 3.9277496337890625, -0.026996612548828125, 2.2852115631103516, 6.972862243652344, 0.7256412506103516, 0.5758743286132812, 4.241546630859375, -0.4969940185546875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000531.npy"} +{"epoch": 0.8027210884353742, "step": 532, "batch_size": 64, "mean": 1.7501823902130127, "std": 2.268594741821289, "min": -2.2594223022460938, "p10": -1.0418003082275389, "median": 1.5494356155395508, "p90": 4.9906967163085945, "max": 7.722028732299805, "pos_frac": 0.765625, "sample": [2.515350341796875, 3.7071666717529297, 5.267295837402344, 5.639015197753906, -1.1277008056640625, -0.7070541381835938, 1.6507644653320312, 6.389194488525391, 0.4379310607910156, 1.64129638671875, -0.8413658142089844, 2.6028976440429688, 5.108772277832031, 0.6212997436523438, 4.5520477294921875, 1.3015327453613281, 2.0219192504882812, 1.87469482421875, 0.5630111694335938, -2.2594223022460938, 0.2901878356933594, 1.549753189086914, 3.0367813110351562, 1.1260910034179688, 1.1674346923828125, 2.64984130859375, 1.3884944915771484, 4.284858703613281, 2.8524818420410156, 3.6230850219726562, 2.7258758544921875, 0.4335136413574219, 0.2848052978515625, 7.722028732299805, 1.3264198303222656, 2.9658050537109375, 2.8829879760742188, -1.4110107421875, 0.5918121337890625, 0.4384784698486328, -0.4162101745605469, -0.8214645385742188, 0.5346775054931641, -0.3215599060058594, 5.680717468261719, 1.07940673828125, 3.9477157592773438, -1.563385009765625, 1.6948318481445312, 2.8195343017578125, 0.7737617492675781, 1.5491180419921875, -0.6955280303955078, 4.715187072753906, -1.246063232421875, 3.800067901611328, -0.7021236419677734, 2.2370758056640625, 6.076593399047852, -2.2002487182617188, -0.41777610778808594, -1.4911231994628906, 4.2926177978515625, 1.7974739074707031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000532.npy"} +{"epoch": 0.8042328042328042, "step": 533, "batch_size": 64, "mean": 1.6374918222427368, "std": 2.375528335571289, "min": -4.780792236328125, "p10": -0.9969989776611328, "median": 1.4179763793945312, "p90": 5.3676494598388675, "max": 7.023899078369141, "pos_frac": 0.796875, "sample": [3.0847396850585938, 3.7933521270751953, -2.004608154296875, 3.2885189056396484, 1.7093772888183594, 2.2952022552490234, 0.7161712646484375, -0.09946823120117188, 3.1710548400878906, 3.423917770385742, 1.641265869140625, 0.4588298797607422, 1.9833030700683594, -0.4809246063232422, 3.176727294921875, 5.60980224609375, 0.6535854339599609, 2.5214691162109375, 2.70452880859375, 0.0994415283203125, 1.074554443359375, 0.7181987762451172, 1.507476806640625, 1.6721305847167969, -0.8303451538085938, 1.1473865509033203, 3.1285266876220703, -2.03228759765625, 1.0113658905029297, -2.7459964752197266, 0.6374740600585938, 0.10802078247070312, 4.28135871887207, 6.4974822998046875, -1.3064498901367188, 2.9362258911132812, -0.79998779296875, 5.393035888671875, 0.8643474578857422, 0.11039352416992188, 0.835113525390625, 1.9571609497070312, 1.0025215148925781, 1.6705589294433594, 6.0644378662109375, 7.023899078369141, 0.6902313232421875, -1.0200462341308594, 1.3284759521484375, -0.36400604248046875, -0.9432220458984375, 5.308414459228516, 1.8217887878417969, 0.18415069580078125, 0.5055961608886719, -2.2212753295898438, 0.7616119384765625, 2.9895172119140625, 1.7656478881835938, 5.902217864990234, 5.9198150634765625, 4.126373291015625, -4.780792236328125, 3.1520843505859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000533.npy"} +{"epoch": 0.8057445200302343, "step": 534, "batch_size": 64, "mean": 1.4657821655273438, "std": 1.9731861352920532, "min": -2.3995819091796875, "p10": -1.215110397338867, "median": 1.451329231262207, "p90": 3.6735139846801763, "max": 6.764373779296875, "pos_frac": 0.765625, "sample": [0.6662979125976562, -2.2233200073242188, 1.8585662841796875, 1.4086971282958984, 0.8032398223876953, 1.8610763549804688, 1.1716156005859375, -1.5664100646972656, 1.1243324279785156, -2.3995819091796875, -0.03594207763671875, 2.1746749877929688, 1.7662010192871094, 3.5558090209960938, -0.5886764526367188, 6.15631103515625, 2.0131988525390625, 0.6581821441650391, 3.270824432373047, -1.5317840576171875, 2.8662033081054688, 3.2783432006835938, 1.1576957702636719, 1.2001266479492188, 0.05956268310546875, 3.255401611328125, -2.2539749145507812, 2.2610740661621094, 4.231178283691406, 2.4151973724365234, 2.7517547607421875, -1.2643165588378906, 5.195976257324219, 4.599811553955078, 3.741619110107422, 3.289306640625, 2.4993038177490234, 1.6066513061523438, 6.764373779296875, 1.0930671691894531, 3.0247039794921875, 2.5685501098632812, -0.8967132568359375, 0.10262298583984375, 2.490591049194336, 2.2684974670410156, -1.1002960205078125, 1.0730361938476562, 1.31219482421875, -0.7786483764648438, 0.89837646484375, 3.436737060546875, 1.4939613342285156, -0.6936302185058594, 0.987396240234375, 1.1051483154296875, 1.9351768493652344, -0.9415855407714844, 1.602426528930664, -0.3310699462890625, 2.3207473754882812, 0.9827117919921875, 3.723958969116211, -1.6665077209472656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000534.npy"} +{"epoch": 0.8072562358276644, "step": 535, "batch_size": 64, "mean": 1.5417053699493408, "std": 2.547635316848755, "min": -4.336641311645508, "p10": -1.2740364074707031, "median": 1.4788970947265625, "p90": 4.794390487670903, "max": 8.690200805664062, "pos_frac": 0.71875, "sample": [0.34722900390625, -1.4838294982910156, 5.503009796142578, 2.8613243103027344, 2.3411483764648438, 3.681934356689453, -2.656911849975586, -1.7701129913330078, -0.19794464111328125, 1.6518402099609375, 2.130645751953125, 1.2560749053955078, 1.516519546508789, 2.7827224731445312, -0.47503089904785156, 0.451141357421875, -0.76446533203125, 2.41015625, 0.40522003173828125, -0.7009506225585938, 1.8051033020019531, 0.857208251953125, 3.0893402099609375, 0.566497802734375, 1.9649581909179688, -1.0771102905273438, 5.24188232421875, -0.9342842102050781, -3.2536468505859375, 0.3525714874267578, 7.0742645263671875, 7.7430877685546875, 3.4452762603759766, -1.2940292358398438, 1.0252494812011719, 1.6033954620361328, 0.36476898193359375, 7.46429443359375, -1.7353057861328125, 1.441274642944336, 2.5964736938476562, 0.5973167419433594, 8.690200805664062, 1.3240280151367188, 0.9434127807617188, -4.336641311645508, -0.0526123046875, 2.7647552490234375, 0.7962970733642578, 3.2303466796875, 1.8801651000976562, -1.0853652954101562, -0.3826103210449219, 3.7902259826660156, 2.7709789276123047, 1.9983787536621094, 1.7568855285644531, 2.806610107421875, 3.401287078857422, 2.4983673095703125, -0.062084197998046875, -1.227386474609375, 5.2247467041015625, 3.710845947265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000535.npy"} +{"epoch": 0.8087679516250945, "step": 536, "batch_size": 64, "mean": 1.632230520248413, "std": 1.987197995185852, "min": -2.155853271484375, "p10": -0.6465129852294921, "median": 1.3935670852661133, "p90": 4.288426208496094, "max": 7.411748886108398, "pos_frac": 0.765625, "sample": [2.412036895751953, 0.19222259521484375, -1.1874008178710938, 7.411748886108398, 4.266326904296875, 0.46428680419921875, 0.9324417114257812, -2.155853271484375, 0.9199600219726562, -0.02313232421875, -0.89166259765625, 0.9739570617675781, 3.050811767578125, 2.4678726196289062, 0.6610755920410156, 1.612457275390625, 2.82391357421875, 4.2978973388671875, 6.9602203369140625, 2.4980106353759766, 1.2017784118652344, -0.5286064147949219, 3.707693099975586, 2.4970016479492188, -0.0542144775390625, 5.700187683105469, 1.9830169677734375, 0.7630672454833984, 1.6268081665039062, 1.7919044494628906, 2.8548545837402344, -1.1630687713623047, 1.3149795532226562, 1.7209510803222656, -0.6970443725585938, -0.3650360107421875, 3.6445560455322266, -0.06878662109375, 2.3664703369140625, 0.3954620361328125, 4.3235321044921875, 3.538604736328125, 1.8609580993652344, -1.1591949462890625, -0.29401206970214844, 0.39894866943359375, 0.06253814697265625, 3.3029651641845703, 1.4721546173095703, 3.4530029296875, -0.22802352905273438, 3.6131362915039062, 0.7790374755859375, 4.682838439941406, 2.6250457763671875, 1.5032386779785156, 0.8760337829589844, 4.4510650634765625, -0.10439109802246094, -1.7097320556640625, 1.0624065399169922, 1.2731952667236328, 0.27036285400390625, 2.0298690795898438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000536.npy"} +{"epoch": 0.8102796674225246, "step": 537, "batch_size": 64, "mean": 1.6563512086868286, "std": 2.3740904331207275, "min": -3.519969940185547, "p10": -1.4807817459106443, "median": 1.66680908203125, "p90": 4.488338088989259, "max": 7.096710205078125, "pos_frac": 0.75, "sample": [1.0702171325683594, 2.8294906616210938, 1.90240478515625, 3.273468017578125, 3.822509765625, 3.0558948516845703, 4.101982116699219, -0.26097679138183594, 2.5985031127929688, 1.8723907470703125, 0.19330787658691406, 2.05615234375, 4.704387664794922, 3.5071868896484375, 5.210498809814453, 2.3489990234375, 4.197017669677734, 2.8593406677246094, 0.3273735046386719, -1.5753059387207031, -1.035430908203125, 1.581329345703125, 1.4933929443359375, 0.9415397644042969, -0.9934234619140625, 3.9178009033203125, 1.579376220703125, 0.470458984375, -3.0772857666015625, 0.15928077697753906, 1.1575241088867188, -0.1050262451171875, 0.8810386657714844, -1.113006591796875, -1.7041511535644531, 0.16686630249023438, 5.7310791015625, 1.752288818359375, 3.5158538818359375, -0.14213180541992188, 6.077751159667969, 3.6428375244140625, 3.4979248046875, 1.458831787109375, -1.2602252960205078, 0.00246429443359375, 3.6264495849609375, -2.2113723754882812, 0.9346237182617188, 4.6883392333984375, 1.39300537109375, 2.947988510131836, -3.1154251098632812, 7.096710205078125, 4.192100524902344, 4.613189697265625, -0.11686325073242188, 3.77691650390625, -1.9404048919677734, 1.8802490234375, 3.964569091796875, 2.1701126098632812, -3.519969940185547, -1.0655441284179688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000537.npy"} +{"epoch": 0.8117913832199547, "step": 538, "batch_size": 64, "mean": 1.9266443252563477, "std": 2.1947736740112305, "min": -3.2612266540527344, "p10": -0.7258441925048827, "median": 1.757101058959961, "p90": 4.198146820068359, "max": 8.60830307006836, "pos_frac": 0.8125, "sample": [0.35411834716796875, 2.4688587188720703, -0.7597770690917969, 0.7234840393066406, 2.2717552185058594, -3.2612266540527344, 2.215648651123047, -0.3809814453125, 3.1867752075195312, 2.843606948852539, 1.2375755310058594, 4.110748291015625, -0.017181396484375, 3.7677154541015625, 3.4260292053222656, 2.862445831298828, 3.9366073608398438, 5.173614501953125, 3.0938491821289062, 2.0263309478759766, 1.73773193359375, -0.5750312805175781, 4.235603332519531, 1.6061248779296875, 1.1042652130126953, -1.1403236389160156, 1.3896713256835938, 1.4382247924804688, 6.677001953125, 3.5798263549804688, -0.64666748046875, 1.4484329223632812, -1.6448974609375, -0.139862060546875, -0.9568710327148438, 1.3008804321289062, 1.689666748046875, 1.7764701843261719, 0.63006591796875, 0.17925262451171875, 3.5849990844726562, 0.5565948486328125, 5.6171875, 3.7270126342773438, 0.30274391174316406, 2.585294723510742, -1.1876678466796875, 5.172540664672852, 1.8588409423828125, 8.002944946289062, 2.312429428100586, 2.7391128540039062, 2.4028472900390625, -1.23516845703125, 1.004730224609375, 0.5463523864746094, 8.60830307006836, 0.5219516754150391, 2.350177764892578, 1.223297119140625, 1.3479385375976562, 3.1498546600341797, 2.5723190307617188, 2.571033477783203], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000538.npy"} +{"epoch": 0.8133030990173847, "step": 539, "batch_size": 64, "mean": 1.6932867765426636, "std": 2.0713915824890137, "min": -2.215351104736328, "p10": -0.46439228057861326, "median": 1.1685762405395508, "p90": 4.857380676269532, "max": 6.657135009765625, "pos_frac": 0.8125, "sample": [1.7372970581054688, 3.812000274658203, 0.11581611633300781, 5.833946228027344, 2.5982627868652344, 1.1818771362304688, 2.5495872497558594, 2.10064697265625, 5.053955078125, 0.37056732177734375, 6.539703369140625, 1.86505126953125, 0.06781005859375, 1.4948654174804688, -0.4267425537109375, -0.04463005065917969, 4.2016143798828125, -1.0490837097167969, 4.621315002441406, 0.9828948974609375, 2.0007553100585938, -0.4805278778076172, 2.5616607666015625, -1.58575439453125, 0.8315582275390625, -0.3308258056640625, 0.5612068176269531, 0.5458602905273438, 6.657135009765625, 1.04815673828125, 0.02831268310546875, 1.2310676574707031, 0.4802970886230469, 0.20003509521484375, 3.91326904296875, 1.2430801391601562, 0.7026138305664062, 1.6088104248046875, 3.7557144165039062, 0.5824966430664062, -0.625030517578125, 1.1286773681640625, 0.753387451171875, 0.750091552734375, 3.1190338134765625, 1.49908447265625, -0.7898330688476562, 6.122962951660156, 2.4683685302734375, -0.30458831787109375, 1.5353660583496094, 2.6881370544433594, 1.0915946960449219, -0.33490753173828125, 0.3631134033203125, 4.88543701171875, -0.6544342041015625, 0.6562938690185547, -2.215351104736328, 3.6125030517578125, 1.1552753448486328, 4.7919158935546875, 6.0387115478515625, 1.472869873046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000539.npy"} +{"epoch": 0.8148148148148148, "step": 540, "batch_size": 64, "mean": 2.1190683841705322, "std": 2.467097043991089, "min": -3.2476062774658203, "p10": -0.7103618621826171, "median": 1.8773488998413086, "p90": 5.6226440429687505, "max": 7.514862060546875, "pos_frac": 0.765625, "sample": [-1.395843505859375, 0.8525314331054688, 7.514862060546875, 2.6113357543945312, 0.7827854156494141, -3.2476062774658203, 1.9956207275390625, 0.0857391357421875, 0.2524261474609375, 3.9852333068847656, -0.3268318176269531, 3.204925537109375, 4.373418807983398, 1.3082122802734375, -1.222991943359375, 1.856882095336914, 6.0216522216796875, 2.5276107788085938, -0.6105918884277344, 3.1103591918945312, 4.3176727294921875, -0.7531204223632812, 4.7464599609375, 1.225900650024414, 5.215484619140625, 5.041648864746094, -0.8732528686523438, 2.2477874755859375, -0.473876953125, 2.4505081176757812, -1.666961669921875, 4.4088134765625, 5.705535888671875, 1.7704238891601562, 5.429229736328125, 0.7552947998046875, 0.6396331787109375, 3.254129409790039, -1.037933349609375, 0.361358642578125, -0.08014678955078125, 3.456817626953125, 1.9011192321777344, 6.716194152832031, 5.308536529541016, 1.8978157043457031, 6.065055847167969, 2.890605926513672, -0.0109405517578125, -0.4712867736816406, 1.0967483520507812, 6.641387939453125, -0.39521026611328125, -0.4122962951660156, 1.0456466674804688, 3.36907958984375, 0.5987167358398438, 6.22802734375, 0.2991790771484375, 4.1059722900390625, 2.5670623779296875, 1.1412487030029297, 0.4346275329589844, 4.781949996948242], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000540.npy"} +{"epoch": 0.8163265306122449, "step": 541, "batch_size": 64, "mean": 1.9675726890563965, "std": 2.4450111389160156, "min": -3.512054443359375, "p10": -0.9908992767333983, "median": 2.1154708862304688, "p90": 4.943337249755861, "max": 9.342594146728516, "pos_frac": 0.78125, "sample": [9.342594146728516, 3.3567733764648438, 4.448875427246094, 5.70311164855957, -0.402496337890625, 3.62322998046875, -0.0511322021484375, 2.7535552978515625, 3.8752384185791016, -0.7476959228515625, 0.44647216796875, 3.902252197265625, 2.8557205200195312, 2.066984176635742, 1.9854049682617188, -2.723552703857422, 1.6280860900878906, 5.145973205566406, 1.2039642333984375, 3.6296253204345703, -1.057525634765625, 2.5280685424804688, 5.994132995605469, 3.5250091552734375, 5.953971862792969, 2.4939002990722656, 2.5878753662109375, -0.11063003540039062, -2.775178909301758, -0.6110744476318359, 2.0677032470703125, 1.2808303833007812, 4.068946838378906, 4.47052001953125, 0.2982063293457031, 3.2999343872070312, 1.6863632202148438, -1.7878189086914062, 2.493389129638672, 0.6005363464355469, 0.2401599884033203, -0.8354377746582031, 5.42523193359375, 3.522693634033203, 1.0110931396484375, 2.163238525390625, 4.142391204833984, 2.396820068359375, -3.512054443359375, 0.32898712158203125, 0.39147377014160156, 3.9219207763671875, 2.492046356201172, 1.398345947265625, -0.19704055786132812, -1.7820587158203125, 1.6978759765625, 2.5858020782470703, 0.9290676116943359, 2.8312454223632812, 0.22023773193359375, -2.0492382049560547, 5.964227676391602, 3.5874710083007812], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000541.npy"} +{"epoch": 0.817838246409675, "step": 542, "batch_size": 64, "mean": 1.8493754863739014, "std": 2.4488472938537598, "min": -3.142333984375, "p10": -1.4673080444335933, "median": 1.7460784912109375, "p90": 5.4956096649169925, "max": 7.766387939453125, "pos_frac": 0.75, "sample": [3.5842323303222656, 4.064670562744141, -0.09328460693359375, 3.68817138671875, 0.9791355133056641, 2.59112548828125, 1.85906982421875, 3.3221473693847656, 1.4451675415039062, 1.0109100341796875, 2.935100555419922, -0.7141532897949219, -1.1497573852539062, 2.0783843994140625, 0.7787952423095703, 2.606538772583008, -1.8349533081054688, 2.6471710205078125, -1.6784591674804688, 4.021934509277344, 5.724617004394531, 4.179237365722656, 0.8742141723632812, -0.8344497680664062, -2.093351364135742, -2.5579833984375, 0.7406234741210938, 0.226776123046875, 0.12885284423828125, -0.2954139709472656, 6.147605895996094, 5.534603118896484, 0.9391574859619141, 4.03399658203125, -0.5185985565185547, -0.8282890319824219, -3.142333984375, 0.884979248046875, -1.6034011840820312, 4.079803466796875, 2.3212966918945312, 0.9017333984375, 6.202980041503906, -0.06503868103027344, 1.764801025390625, 1.704833984375, 2.884246826171875, 2.285053253173828, 0.06792449951171875, 4.458091735839844, 6.045570373535156, 6.274372100830078, -1.8039493560791016, 2.5824851989746094, 1.4827938079833984, 2.7191734313964844, -0.08311843872070312, 7.766387939453125, 3.382984161376953, 1.72735595703125, 1.7661857604980469, 3.9806594848632812, 0.82598876953125, 5.404624938964844], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000542.npy"} +{"epoch": 0.8193499622071051, "step": 543, "batch_size": 64, "mean": 1.5270410776138306, "std": 2.059453010559082, "min": -5.1725006103515625, "p10": -0.4969430923461914, "median": 1.2470722198486328, "p90": 4.454564094543458, "max": 6.439054489135742, "pos_frac": 0.78125, "sample": [0.2317047119140625, 0.8743076324462891, -0.5189476013183594, -0.286956787109375, 1.9230194091796875, 0.6232261657714844, 1.95587158203125, 4.749763488769531, -2.7098960876464844, 4.242095947265625, -0.50006103515625, 2.420848846435547, 1.1712989807128906, 3.0965805053710938, 4.1921539306640625, 1.9540863037109375, 6.439054489135742, -1.9465312957763672, 1.061727523803711, 2.967266082763672, 0.7978591918945312, 1.4000091552734375, 0.5606613159179688, 0.7952346801757812, 0.9444618225097656, 4.545621871948242, -5.1725006103515625, 0.38774681091308594, 0.7517967224121094, -0.4121589660644531, 1.4869537353515625, 4.12298583984375, 5.1094970703125, 2.22247314453125, 3.479562759399414, 0.9030609130859375, 4.9385986328125, -0.21168136596679688, -1.2914199829101562, -0.4446086883544922, 2.896190643310547, 1.710662841796875, 1.672332763671875, 4.916961669921875, 1.6932640075683594, 1.3092269897460938, 4.86090087890625, 4.114906311035156, -0.6266059875488281, 0.20433425903320312, 1.1849174499511719, 0.7116661071777344, -0.18653106689453125, 1.8142166137695312, 3.11419677734375, 1.09014892578125, 2.8026046752929688, 1.4755783081054688, 0.9947891235351562, -0.4896678924560547, 0.710479736328125, 3.5214004516601562, -0.4793968200683594, 1.8592853546142578], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000543.npy"} +{"epoch": 0.8208616780045351, "step": 544, "batch_size": 64, "mean": 2.0900115966796875, "std": 2.841240882873535, "min": -3.1570167541503906, "p10": -1.6334117889404296, "median": 1.5615406036376953, "p90": 6.070260620117188, "max": 7.724884033203125, "pos_frac": 0.8125, "sample": [5.73004150390625, -1.6101722717285156, 1.7412490844726562, 1.4701042175292969, 2.5324783325195312, 2.43511962890625, -2.0333213806152344, 0.4585990905761719, 6.087921142578125, 0.033721923828125, 0.687530517578125, 0.8344745635986328, 0.94281005859375, 5.080390930175781, 1.2044525146484375, 0.2515125274658203, -3.1570167541503906, 1.6995697021484375, 5.3528289794921875, 1.693695068359375, 1.1922607421875, 0.109100341796875, 2.4715118408203125, 3.6197471618652344, 3.973480224609375, 0.49676513671875, 1.6529769897460938, -0.9007568359375, 0.1482830047607422, -1.64337158203125, 7.6856842041015625, -1.8300247192382812, 2.514251708984375, -0.6895980834960938, 6.029052734375, 1.8645172119140625, 5.326812744140625, -3.1304664611816406, 1.3436565399169922, 2.2434768676757812, -0.56829833984375, 1.0132522583007812, 5.9947967529296875, 0.6485748291015625, 1.0397567749023438, 1.06072998046875, 4.70863151550293, 1.080892562866211, 5.6844940185546875, -2.4114837646484375, 0.10808372497558594, 6.640106201171875, 5.5990142822265625, 6.805519104003906, -2.1105785369873047, 3.0733718872070312, 7.724884033203125, 0.5039520263671875, 7.368003845214844, 6.853065490722656, -0.4435863494873047, 3.151153564453125, 4.330711364746094, 1.992340087890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000544.npy"} +{"epoch": 0.8223733938019653, "step": 545, "batch_size": 64, "mean": 1.6072427034378052, "std": 2.4273147583007812, "min": -2.866851806640625, "p10": -1.087687873840332, "median": 1.297347068786621, "p90": 5.067752075195314, "max": 9.397811889648438, "pos_frac": 0.71875, "sample": [3.7751216888427734, 1.5109786987304688, -0.6024322509765625, 5.344829559326172, 1.1173286437988281, 1.3153762817382812, -1.2267074584960938, -0.2980308532714844, -0.24657249450683594, 2.300220489501953, 2.212158203125, -1.1030120849609375, 6.306755065917969, 1.279317855834961, -0.11750221252441406, 2.971752166748047, 0.3980865478515625, 0.30887603759765625, -1.051931381225586, 1.3748626708984375, 1.7276382446289062, 5.2470703125, -0.7511444091796875, 3.376251220703125, 5.7071685791015625, 4.4723663330078125, 1.811676025390625, -0.17052268981933594, 2.8420867919921875, 0.008207321166992188, -0.017389297485351562, -2.6007652282714844, -2.866851806640625, 2.1739959716796875, 0.60577392578125, 0.4295768737792969, 5.179107666015625, 5.983707427978516, 4.68341064453125, -0.0140380859375, -0.24078369140625, 1.5230865478515625, 3.042522430419922, 1.7890090942382812, 0.6878738403320312, 2.2672653198242188, -1.7753429412841797, 1.1307449340820312, 0.840789794921875, -2.659841537475586, 1.1558837890625, 3.7240447998046875, 4.80792236328125, 9.397811889648438, 2.074615478515625, -2.354400634765625, 0.8292427062988281, 1.86444091796875, -0.7342376708984375, 0.25240325927734375, 3.8099746704101562, 4.411170959472656, 2.7285919189453125, 0.8939476013183594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000545.npy"} +{"epoch": 0.8238851095993953, "step": 546, "batch_size": 64, "mean": 1.52994704246521, "std": 2.222773790359497, "min": -3.7054595947265625, "p10": -1.4901535034179687, "median": 1.4971466064453125, "p90": 4.451861000061036, "max": 7.170970916748047, "pos_frac": 0.78125, "sample": [4.9562225341796875, -1.414520263671875, 3.3430633544921875, 1.4392547607421875, 1.254537582397461, 3.0353660583496094, 1.3018798828125, -1.5225677490234375, -0.453155517578125, -0.15875816345214844, -3.7054595947265625, 2.4403457641601562, 1.9359626770019531, 0.7191352844238281, -2.2584152221679688, 3.1259765625, 0.07746315002441406, 0.6268482208251953, -2.226888656616211, 0.8907661437988281, -0.738555908203125, 2.8346405029296875, 2.0149002075195312, 0.90679931640625, 2.58978271484375, 0.9908981323242188, 2.984783172607422, -0.7457427978515625, 7.170970916748047, 5.4165496826171875, 0.6948947906494141, 2.5410995483398438, 4.4943695068359375, 1.5257568359375, 2.464986801147461, -3.503662109375, 1.468536376953125, 2.85400390625, 0.27242088317871094, 2.8008499145507812, 2.0437049865722656, 1.698495864868164, -1.8043937683105469, 1.3815078735351562, 2.0848827362060547, 4.591804504394531, -0.8580646514892578, 2.0473804473876953, 2.807199478149414, 3.4128036499023438, 4.052604675292969, 0.45575714111328125, 4.819305419921875, 0.99493408203125, -1.0345039367675781, 1.329580307006836, 4.35267448425293, 3.6669158935546875, 5.106607437133789, 1.1691665649414062, 2.4958114624023438, -2.8790130615234375, 2.857271194458008, 0.6788425445556641], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000546.npy"} +{"epoch": 0.8253968253968254, "step": 547, "batch_size": 64, "mean": 2.0226151943206787, "std": 2.3040249347686768, "min": -4.553581237792969, "p10": -0.3149238586425781, "median": 1.9011650085449219, "p90": 4.500607299804687, "max": 8.985511779785156, "pos_frac": 0.859375, "sample": [0.7432384490966797, 0.02072906494140625, 1.3945789337158203, 1.7014007568359375, 5.104583740234375, 0.569366455078125, -1.0049571990966797, 8.985511779785156, 3.817424774169922, 2.3474082946777344, 3.102386474609375, 2.9757919311523438, 1.671142578125, 1.6669750213623047, 0.8149051666259766, 0.37613677978515625, 5.0425872802734375, 8.291389465332031, 0.5348777770996094, 2.0757598876953125, 3.1322250366210938, 2.0074462890625, 5.896724700927734, 0.48394775390625, 4.45025634765625, 1.5272254943847656, 0.9375, 3.842620849609375, 4.522186279296875, -0.11717987060546875, 3.8591842651367188, -0.3268280029296875, -4.553581237792969, 1.7948837280273438, -1.5285186767578125, 2.648134231567383, 3.843334197998047, 0.5072193145751953, 2.3919677734375, 2.661355972290039, 2.093639373779297, 4.205970764160156, 2.0494003295898438, 2.934377670288086, -2.248668670654297, 3.9862537384033203, 1.2248992919921875, -2.7689285278320312, -0.28714752197265625, 1.56768798828125, 2.5146713256835938, 1.6559562683105469, 5.3996429443359375, -0.8316459655761719, 0.9059963226318359, 0.9714508056640625, 2.064411163330078, 4.309230804443359, 1.0546283721923828, 2.1666717529296875, 3.430727005004883, 1.6019363403320312, 3.1475067138671875, 0.08736610412597656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000547.npy"} +{"epoch": 0.8269085411942555, "step": 548, "batch_size": 64, "mean": 1.4542981386184692, "std": 2.001265048980713, "min": -1.7806129455566406, "p10": -0.7135210037231445, "median": 1.1551170349121094, "p90": 4.00530014038086, "max": 7.72462272644043, "pos_frac": 0.703125, "sample": [0.54595947265625, 2.6574630737304688, 2.56402587890625, -1.6801605224609375, -0.6180744171142578, 2.3365020751953125, -1.2160873413085938, 6.468738555908203, 3.0148963928222656, 1.6457977294921875, -0.647674560546875, 4.58856201171875, 0.44719696044921875, -0.36646270751953125, -0.7410888671875, 1.6638565063476562, -0.461181640625, 0.9307727813720703, 1.3389511108398438, 2.2781524658203125, 3.2372589111328125, 1.114084243774414, -1.7806129455566406, 1.2579269409179688, 1.1512298583984375, 0.9750251770019531, 5.2238616943359375, 3.3641395568847656, 3.5265274047851562, 4.1937713623046875, 1.0124473571777344, 1.1590042114257812, 0.9125404357910156, 1.251953125, -0.4215240478515625, 0.7428131103515625, 3.897735595703125, 1.7131385803222656, -0.588714599609375, 1.9991226196289062, 4.051399230957031, 2.5471439361572266, 2.6086273193359375, -1.20452880859375, 5.8323516845703125, -0.2665519714355469, -0.4006500244140625, 2.1916046142578125, -1.0978069305419922, 0.4647369384765625, 2.01129150390625, -0.7214431762695312, 1.599191665649414, 2.6660118103027344, 1.0100021362304688, -0.1851806640625, 3.607637405395508, -0.45098876953125, 1.0697555541992188, -0.5931243896484375, 7.72462272644043, 1.6437644958496094, -0.6950359344482422, 0.9703750610351562], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000548.npy"} +{"epoch": 0.8284202569916855, "step": 549, "batch_size": 64, "mean": 1.4225798845291138, "std": 2.2987287044525146, "min": -3.7404632568359375, "p10": -1.4312398910522461, "median": 1.1091957092285156, "p90": 4.482220458984376, "max": 6.592737197875977, "pos_frac": 0.734375, "sample": [-0.7796630859375, 4.540008544921875, 0.3799896240234375, 0.5022964477539062, -0.4132080078125, 0.1837615966796875, 2.829437255859375, 0.4162559509277344, 1.1986923217773438, 5.567352294921875, 4.750213623046875, 0.9032669067382812, 0.6014499664306641, 6.592737197875977, 0.41925048828125, 1.0196990966796875, -1.4553413391113281, 2.3923568725585938, 2.8973617553710938, 3.3771400451660156, 2.7443161010742188, 3.737293243408203, 2.429107666015625, -0.5903549194335938, 2.9547119140625, 0.867706298828125, 1.0153007507324219, 2.8586196899414062, 1.402862548828125, -3.3836288452148438, 3.5440502166748047, 6.339508056640625, 4.30035400390625, -0.04427337646484375, 1.4409198760986328, 2.6723403930664062, -0.8954620361328125, -2.2737579345703125, -1.0190353393554688, 1.9267425537109375, -1.8311767578125, 2.2185287475585938, 0.2714557647705078, 0.4358863830566406, 1.9472789764404297, 2.712129592895508, -1.3996143341064453, 1.5771713256835938, 1.371917724609375, -1.444793701171875, -0.5338172912597656, 3.9141902923583984, 4.347381591796875, 1.463897705078125, 0.8240203857421875, -1.00531005859375, 2.189577102661133, 0.6279869079589844, 0.9398689270019531, 5.286079406738281, 6.448221206665039, -3.7404632568359375, -1.46453857421875, -0.0611419677734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000549.npy"} +{"epoch": 0.8299319727891157, "step": 550, "batch_size": 64, "mean": 1.42422616481781, "std": 2.1236660480499268, "min": -3.7517623901367188, "p10": -0.6409397125244141, "median": 0.9664344787597656, "p90": 4.384810256958009, "max": 6.826847076416016, "pos_frac": 0.8125, "sample": [0.4001502990722656, 6.826847076416016, 5.341938018798828, 2.2913589477539062, -2.101320266723633, 1.5005912780761719, -0.5586624145507812, 0.85809326171875, 0.5576400756835938, 0.3941154479980469, 2.9725208282470703, 1.833251953125, 2.6925125122070312, 1.4312381744384766, -0.21442604064941406, 5.4405517578125, 6.1766815185546875, 1.4242401123046875, 3.2005767822265625, 1.6066226959228516, 2.6456298828125, 1.1742515563964844, 1.5608749389648438, 3.1297836303710938, 3.369293212890625, -0.529541015625, 3.1764984130859375, 0.2441253662109375, 0.014461517333984375, -0.9156951904296875, -2.1783924102783203, 0.5146026611328125, 1.9188995361328125, 0.7060546875, 0.737548828125, 0.8668899536132812, 1.3612136840820312, 0.9435653686523438, -0.06614303588867188, 0.5904731750488281, 0.08607101440429688, 0.9893035888671875, 2.9332656860351562, 0.5338611602783203, 2.0834884643554688, 1.62109375, 4.8622283935546875, 1.9870433807373047, 0.27854156494140625, 3.2671165466308594, -0.6005783081054688, 3.0808143615722656, 3.9395904541015625, 0.9298992156982422, 4.575618743896484, 0.5103282928466797, -3.7517623901367188, 0.1179656982421875, 6.2548980712890625, -3.093690872192383, 0.7987747192382812, 0.49514007568359375, -1.4292182922363281, -0.6582374572753906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000550.npy"} +{"epoch": 0.8314436885865457, "step": 551, "batch_size": 64, "mean": 1.5473123788833618, "std": 2.178978443145752, "min": -2.443634033203125, "p10": -0.6686452865600586, "median": 1.2613258361816406, "p90": 4.564056396484376, "max": 7.2568817138671875, "pos_frac": 0.765625, "sample": [2.0566177368164062, 0.9292144775390625, 3.0305252075195312, 1.64215087890625, 2.080629348754883, 2.597087860107422, 6.747314453125, -0.037746429443359375, 0.2951641082763672, 4.6765289306640625, 0.19986724853515625, 1.1611099243164062, 6.62721061706543, 0.4928722381591797, 4.309837341308594, 1.5039539337158203, 0.568359375, 1.433258056640625, -2.443634033203125, -0.6730747222900391, 1.0153999328613281, 1.3162612915039062, 2.66461181640625, 1.06658935546875, 1.3423309326171875, 4.694435119628906, 1.0363540649414062, 1.206390380859375, 4.371070861816406, 1.4989509582519531, 1.3808364868164062, 0.2065887451171875, 0.37315940856933594, 1.96844482421875, -0.6583099365234375, 4.4134521484375, -1.375885009765625, 6.85821533203125, 0.12388992309570312, 7.2568817138671875, -0.2535209655761719, 0.3276214599609375, 0.21550559997558594, -0.013895034790039062, 4.15704345703125, 1.9265899658203125, 2.23516845703125, 2.409992218017578, -0.23073959350585938, -1.8523101806640625, -0.12616348266601562, 2.6925048828125, 0.01434326171875, -2.31195068359375, -0.9141159057617188, 1.3401947021484375, -0.6370887756347656, 3.6275634765625, -0.4524383544921875, 1.40142822265625, -0.852996826171875, 0.3616485595703125, 3.3780899047851562, 4.62860107421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000551.npy"} +{"epoch": 0.8329554043839759, "step": 552, "batch_size": 64, "mean": 1.7723689079284668, "std": 2.15144944190979, "min": -3.416606903076172, "p10": -0.5034833908081052, "median": 1.8679161071777344, "p90": 4.152058410644531, "max": 7.613658905029297, "pos_frac": 0.859375, "sample": [-0.5954303741455078, 2.321502685546875, 2.4105300903320312, -0.15694427490234375, 3.7968616485595703, 0.7607364654541016, 0.37117767333984375, 0.6037139892578125, 0.16024017333984375, 4.1729278564453125, 3.689422607421875, 6.7069244384765625, 2.4671497344970703, 3.3836669921875, 1.6344871520996094, 0.21722793579101562, 0.00612640380859375, 3.8326950073242188, 1.8311996459960938, 0.1177825927734375, 3.233001708984375, 2.6071548461914062, 1.000762939453125, 1.7277507781982422, 3.2637786865234375, 0.48616790771484375, 3.0959396362304688, 3.908933639526367, 1.62939453125, -2.59375, 1.9749298095703125, 4.18549919128418, 2.70672607421875, -3.357706069946289, 2.500469207763672, -0.2889404296875, 4.103363037109375, 2.4825592041015625, 0.16489410400390625, 2.5061798095703125, 4.189117431640625, 1.2311172485351562, -1.7112274169921875, 6.159332275390625, 0.944000244140625, 1.904632568359375, 3.2494735717773438, 1.5908889770507812, -2.713226318359375, -0.8595046997070312, 7.613658905029297, 1.6841201782226562, 2.114816665649414, -3.416606903076172, 3.0050010681152344, 0.9147109985351562, 0.5282669067382812, 1.987874984741211, 2.594482421875, 2.4905357360839844, 4.7525787353515625, 0.6697463989257812, 0.9776458740234375, 0.4610710144042969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000552.npy"} +{"epoch": 0.8344671201814059, "step": 553, "batch_size": 64, "mean": 1.720375657081604, "std": 3.0175955295562744, "min": -5.701774597167969, "p10": -1.5471832275390625, "median": 1.746124267578125, "p90": 6.111185836791995, "max": 9.100616455078125, "pos_frac": 0.703125, "sample": [5.534507751464844, -1.5522117614746094, 0.26966094970703125, 2.747587203979492, 1.5122833251953125, -0.09139060974121094, 4.780324935913086, 2.33428955078125, -1.0362167358398438, 0.003711700439453125, 0.32733917236328125, 3.226959228515625, 2.8099632263183594, 2.6942138671875, 0.24117279052734375, 3.8907089233398438, -0.2977294921875, 4.187778472900391, 0.4297294616699219, -5.701774597167969, -1.5354499816894531, 6.4296722412109375, 2.0218353271484375, 2.4843406677246094, 6.4022979736328125, 3.7521743774414062, -0.100677490234375, 0.4361152648925781, -3.398681640625, -2.439189910888672, 8.261070251464844, 4.471309661865234, 4.193939208984375, 8.88275146484375, 1.7544174194335938, 2.779369354248047, 6.358333587646484, 2.0312042236328125, 3.00750732421875, 9.100616455078125, 2.1053848266601562, 1.7378311157226562, -0.34060096740722656, 0.4546222686767578, -1.7253952026367188, 0.144805908203125, -1.2306995391845703, 8.56155014038086, -3.2805862426757812, -1.5883674621582031, 0.46617889404296875, 1.5286407470703125, 2.6425933837890625, 1.9398441314697266, -0.46802520751953125, 2.877593994140625, 0.4793281555175781, -1.0833587646484375, 2.773040771484375, 2.3790817260742188, -0.8839797973632812, -0.7570152282714844, 3.4142913818359375, -1.24658203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000553.npy"} +{"epoch": 0.8359788359788359, "step": 554, "batch_size": 64, "mean": 1.4661433696746826, "std": 2.3758623600006104, "min": -5.7312469482421875, "p10": -0.8893892288208007, "median": 1.1798639297485352, "p90": 4.849100112915041, "max": 8.434545516967773, "pos_frac": 0.734375, "sample": [0.03018951416015625, 4.470760345458984, 7.346288681030273, 5.0112457275390625, 0.5504684448242188, 1.1809158325195312, 1.6973152160644531, 0.7329139709472656, 2.0028076171875, 1.9268875122070312, -0.9020843505859375, -0.13358306884765625, -0.6092147827148438, 3.4354171752929688, -2.4939613342285156, 1.7203292846679688, 2.144723892211914, -2.1107330322265625, 1.41033935546875, 0.7896823883056641, 0.5554733276367188, 1.7731475830078125, -0.20699501037597656, -0.9720458984375, 1.743743896484375, 3.8024063110351562, 5.341560363769531, 0.5824851989746094, -0.7525138854980469, 1.4755020141601562, 8.434545516967773, 0.20293426513671875, 1.7522335052490234, -0.16354751586914062, -0.12058639526367188, -0.9012813568115234, 2.522388458251953, -0.23073577880859375, 2.1291046142578125, -0.0710601806640625, -5.7312469482421875, 3.56463623046875, -0.8616409301757812, 1.6694526672363281, 2.1035614013671875, 0.4250221252441406, 1.5359935760498047, 1.178812026977539, 0.17162513732910156, 0.576751708984375, 2.5376205444335938, 0.9330978393554688, -0.9744071960449219, 4.208106994628906, 5.2553558349609375, 0.25921630859375, 1.1328582763671875, 2.4847640991210938, 1.113800048828125, 3.8540477752685547, 5.412200927734375, -0.41046142578125, 6.848045349121094, 1.4484977722167969], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000554.npy"} +{"epoch": 0.8374905517762661, "step": 555, "batch_size": 64, "mean": 1.225550651550293, "std": 2.3334224224090576, "min": -6.0291595458984375, "p10": -1.504043960571289, "median": 1.326131820678711, "p90": 4.17529640197754, "max": 6.447967529296875, "pos_frac": 0.734375, "sample": [0.42621421813964844, -4.653926849365234, 0.0018825531005859375, 0.3681488037109375, 2.5393218994140625, -0.5495223999023438, -1.7955474853515625, 0.23525238037109375, -0.8559989929199219, -1.7974929809570312, 2.593963623046875, -2.237030029296875, 2.878631591796875, 1.300140380859375, 4.071590423583984, 4.708587646484375, 0.20869064331054688, -0.2539024353027344, 0.6924095153808594, 2.5328140258789062, -0.01055145263671875, -0.02954864501953125, 6.447967529296875, -0.172332763671875, 2.6304874420166016, 0.7770156860351562, 3.2063331604003906, 4.556917190551758, 0.62640380859375, 1.8270187377929688, 3.077474594116211, 2.2014541625976562, 1.9997138977050781, 4.8271331787109375, -2.3778305053710938, 1.3521232604980469, -6.0291595458984375, 1.7994880676269531, 0.5484771728515625, 1.8411693572998047, 0.14007568359375, 1.8975296020507812, 0.255126953125, 1.7168655395507812, -1.2954330444335938, 0.4373207092285156, 6.0750885009765625, 2.915740966796875, 3.9422378540039062, -1.182525634765625, 3.728839874267578, -0.33416748046875, -1.5695648193359375, 0.18073272705078125, 1.8793830871582031, 2.5038833618164062, 4.2197418212890625, 2.4146156311035156, 1.4707508087158203, -1.3511619567871094, 4.851654052734375, 2.856555938720703, 2.8830947875976562, 0.28487586975097656], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000555.npy"} +{"epoch": 0.8390022675736961, "step": 556, "batch_size": 64, "mean": 1.528992772102356, "std": 2.3979332447052, "min": -2.6097564697265625, "p10": -1.5226890563964843, "median": 1.0375328063964844, "p90": 4.301710510253907, "max": 8.4234619140625, "pos_frac": 0.734375, "sample": [2.04766845703125, 1.0346603393554688, 1.42742919921875, 0.023670196533203125, -0.9570693969726562, 4.230632781982422, 6.654483795166016, 0.8257732391357422, -0.17968368530273438, -2.6097564697265625, 0.33840370178222656, 0.8947715759277344, 4.245079040527344, 4.232330322265625, 0.0273590087890625, -0.40711212158203125, 2.8533172607421875, -0.4282684326171875, 2.0610389709472656, 2.7476959228515625, -0.429534912109375, 3.4730758666992188, 4.951231002807617, 1.0404052734375, -0.10871505737304688, 0.9939727783203125, -1.9789810180664062, 3.422189712524414, 3.445301055908203, -0.7915191650390625, 0.1300811767578125, -2.3832664489746094, 6.876060485839844, 0.7093887329101562, -0.22841644287109375, 1.1188125610351562, 1.394683837890625, 4.325981140136719, 0.6832351684570312, 0.2899284362792969, 3.589803695678711, 8.4234619140625, 0.24940109252929688, 3.040210723876953, 3.0361557006835938, 2.310150146484375, 0.6482200622558594, 2.8073577880859375, -2.3198280334472656, -1.5412216186523438, 5.10968017578125, -2.191741943359375, -0.6102638244628906, 4.036018371582031, 0.3704032897949219, -1.4794464111328125, 1.3451690673828125, -2.191814422607422, 2.8052520751953125, 4.066822052001953, 4.4221343994140625, 3.253438949584961, 2.111173629760742, 0.56866455078125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000556.npy"} +{"epoch": 0.8405139833711263, "step": 557, "batch_size": 64, "mean": 2.1672027111053467, "std": 2.466348886489868, "min": -1.8781261444091797, "p10": -0.37861461639404276, "median": 1.8340063095092773, "p90": 5.6962421417236335, "max": 10.1983642578125, "pos_frac": 0.859375, "sample": [3.9636764526367188, 1.8205547332763672, 2.3734664916992188, 0.975067138671875, 0.7540702819824219, -1.8139495849609375, 8.068679809570312, 0.8697433471679688, 1.9175682067871094, 0.9165496826171875, 1.4602203369140625, -1.596893310546875, 0.06069183349609375, 2.0829830169677734, 0.974365234375, 0.8501949310302734, 2.2925567626953125, 2.9620513916015625, 8.764726638793945, 2.2971267700195312, 0.20943069458007812, 1.2099609375, 5.089630126953125, -0.170074462890625, -0.4679889678955078, -0.6662368774414062, 6.684440612792969, 6.698844909667969, 0.6696395874023438, -0.09238433837890625, 0.826995849609375, 1.1508731842041016, 2.7292251586914062, -1.8781261444091797, 2.01739501953125, 5.798770904541016, 1.5789642333984375, 5.457008361816406, 4.770820617675781, 10.1983642578125, 1.2989349365234375, 0.3151988983154297, -1.443756103515625, 0.229339599609375, 0.22306442260742188, 3.0460052490234375, 2.4755401611328125, 4.3594970703125, 3.0302658081054688, 1.8474578857421875, 0.24207687377929688, 1.3444137573242188, 1.5664043426513672, 2.8069095611572266, 3.1106796264648438, 2.7156219482421875, 2.8043060302734375, 2.4850025177001953, 2.244861602783203, 3.2151947021484375, 0.85797119140625, -0.884033203125, 5.8639984130859375, 3.1370162963867188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000557.npy"} +{"epoch": 0.8420256991685563, "step": 558, "batch_size": 64, "mean": 1.7948615550994873, "std": 2.344653606414795, "min": -3.5325050354003906, "p10": -0.9291618347167967, "median": 1.2021331787109375, "p90": 4.963186645507813, "max": 8.318077087402344, "pos_frac": 0.8125, "sample": [0.91845703125, 4.085365295410156, 4.91766357421875, -0.5801773071289062, 5.34393310546875, 0.5912208557128906, 4.169458389282227, 0.22978782653808594, 0.5893268585205078, 1.138763427734375, 0.3860130310058594, 2.9230880737304688, 1.9514312744140625, 1.6552658081054688, 0.46181488037109375, 1.5047550201416016, 2.5234508514404297, 1.2655029296875, 2.1041831970214844, 2.3045501708984375, 4.982696533203125, 3.415924072265625, 0.7179737091064453, 3.6985912322998047, 1.9194831848144531, -1.5763092041015625, 7.08660888671875, 2.4216842651367188, 2.9652252197265625, 1.3335952758789062, 4.586528778076172, 0.2278594970703125, 0.86932373046875, 8.318077087402344, -1.0723152160644531, -0.6269207000732422, 0.8790264129638672, 0.7718887329101562, 1.0230712890625, 6.910430908203125, 3.702373504638672, -1.3482666015625, 4.443214416503906, -1.4631805419921875, 3.620969772338867, 5.873348236083984, -1.0097808837890625, -0.7410507202148438, 5.1708831787109375, 0.5381011962890625, 1.0859222412109375, -3.5325050354003906, 1.3544044494628906, 0.05469512939453125, 0.3094024658203125, -0.2839508056640625, 3.0282440185546875, -0.36338043212890625, -1.3700637817382812, 0.3181915283203125, 3.2924423217773438, 0.40648460388183594, 3.7759933471679688, 0.6723480224609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000558.npy"} +{"epoch": 0.8435374149659864, "step": 559, "batch_size": 64, "mean": 1.8165605068206787, "std": 2.714946746826172, "min": -3.813030242919922, "p10": -1.1727981567382812, "median": 1.5577964782714844, "p90": 5.41129722595215, "max": 8.801544189453125, "pos_frac": 0.71875, "sample": [-1.8135986328125, -0.9057540893554688, 1.1114921569824219, -0.4366302490234375, 2.6637954711914062, 8.801544189453125, 5.204315185546875, 3.8115386962890625, 1.3998260498046875, 0.6300849914550781, -1.7253265380859375, 4.941741943359375, -0.47900390625, 2.6283721923828125, -0.1606426239013672, 4.6924285888671875, 5.166435241699219, 0.44835662841796875, 3.5963058471679688, 2.028491973876953, 3.5600929260253906, 0.5076751708984375, -1.1114349365234375, 2.6632080078125, 0.015106201171875, 0.028509140014648438, -1.1990966796875, 1.8305091857910156, -0.8142929077148438, 7.422153472900391, 0.6785736083984375, 1.332061767578125, 3.6706161499023438, 4.082611083984375, 5.073509216308594, -0.7581844329833984, 3.3416194915771484, -0.5336074829101562, 1.823272705078125, -1.96307373046875, 0.22455596923828125, -1.796417236328125, -0.9370193481445312, 1.7157669067382812, 2.7562026977539062, 5.94721794128418, 4.117389678955078, -0.27716064453125, 3.664752960205078, 4.046699523925781, 2.3972434997558594, 6.796424865722656, -3.813030242919922, -0.9928436279296875, 2.702045440673828, 5.500003814697266, 1.0189018249511719, 5.550010681152344, 0.6848907470703125, 0.09442901611328125, 6.141887664794922, 2.6104507446289062, 0.08550262451171875, -3.231637954711914], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000559.npy"} +{"epoch": 0.8450491307634165, "step": 560, "batch_size": 64, "mean": 1.5243771076202393, "std": 2.3137195110321045, "min": -3.1609954833984375, "p10": -1.018848419189453, "median": 1.037637710571289, "p90": 4.857581329345703, "max": 6.599611282348633, "pos_frac": 0.734375, "sample": [1.0808143615722656, 1.693359375, 5.407279968261719, 2.4487991333007812, -3.1609954833984375, -1.0740432739257812, 4.356647491455078, 1.3523101806640625, 4.997222900390625, 0.2656135559082031, 4.5811614990234375, -0.27802276611328125, 0.5638656616210938, 2.6712799072265625, -0.49147796630859375, -1.7741622924804688, -1.903717041015625, -0.5617446899414062, 2.1240386962890625, 3.1475601196289062, -1.5097198486328125, 2.3285064697265625, 0.526519775390625, 0.24500274658203125, 2.642789840698242, 2.772153854370117, 0.9944610595703125, 4.074117660522461, 6.3907318115234375, 1.2071952819824219, 1.5965728759765625, -0.622650146484375, -2.8560409545898438, 3.9984207153320312, 4.271148681640625, -0.4237213134765625, 0.55255126953125, -0.8900604248046875, 6.599611282348633, -2.0449905395507812, 5.401165008544922, 4.7662200927734375, 2.575775146484375, 3.0713043212890625, 0.5810623168945312, 0.5092010498046875, 0.05918121337890625, 4.896736145019531, 2.037067413330078, -0.4992237091064453, -0.1288909912109375, 2.65924072265625, 0.35332489013671875, -0.6369590759277344, 0.16070556640625, 3.1268844604492188, -0.8011627197265625, 0.9938583374023438, 4.62901496887207, 0.8268165588378906, 5.186164855957031, 0.7287254333496094, 0.2578773498535156, 1.5076560974121094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000560.npy"} +{"epoch": 0.8465608465608465, "step": 561, "batch_size": 64, "mean": 1.671128273010254, "std": 2.3678317070007324, "min": -3.29730224609375, "p10": -0.9380985260009765, "median": 1.3143768310546875, "p90": 5.391267395019532, "max": 8.363059997558594, "pos_frac": 0.734375, "sample": [-0.14698410034179688, 0.1893463134765625, 1.834747314453125, 0.7413387298583984, 0.7718696594238281, 3.0236663818359375, 0.8279590606689453, -0.9495506286621094, 2.408609390258789, 2.118011474609375, 0.9401092529296875, 1.9013442993164062, 1.5312614440917969, 3.41046142578125, 0.39055633544921875, -1.7511558532714844, 0.3189697265625, 3.988279342651367, 8.363059997558594, -0.38767242431640625, -0.4779243469238281, 3.201263427734375, 2.088592529296875, -0.6842308044433594, 3.0310745239257812, -1.3014507293701172, 1.5830459594726562, -0.9051971435546875, -3.29730224609375, 5.786529541015625, -1.6626739501953125, 1.7792243957519531, 2.8435516357421875, 3.724536895751953, 1.20831298828125, 1.0593414306640625, 1.420440673828125, 2.552501678466797, 0.4895057678222656, 5.48675537109375, 5.4440155029296875, 5.5747222900390625, 4.827140808105469, 7.001930236816406, 2.275035858154297, -0.6846542358398438, -1.2958145141601562, 2.954448699951172, 5.2681884765625, 0.7436237335205078, -0.911376953125, 0.7672119140625, -0.087310791015625, -0.3984031677246094, 2.1229400634765625, 2.766956329345703, 0.2484149932861328, 1.7680854797363281, 2.9244308471679688, -1.1877975463867188, -0.05535888671875, 1.1577091217041016, 7.383296966552734, 0.8946456909179688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000561.npy"} +{"epoch": 0.8480725623582767, "step": 562, "batch_size": 64, "mean": 1.9733293056488037, "std": 2.461191177368164, "min": -3.13604736328125, "p10": -0.9554000854492187, "median": 2.0180587768554688, "p90": 5.0122116088867195, "max": 8.346275329589844, "pos_frac": 0.75, "sample": [0.28751373291015625, 4.601202011108398, -2.2104454040527344, 2.568450927734375, 1.9535064697265625, 3.2579269409179688, 0.12961578369140625, 4.68701171875, 4.940711975097656, 0.284942626953125, 2.5688552856445312, 3.5120086669921875, -1.6448001861572266, -0.06986618041992188, 5.46710205078125, -0.19734954833984375, 0.1025848388671875, 1.494598388671875, 3.6549530029296875, 0.068695068359375, 4.3062896728515625, 4.259275436401367, -0.16246795654296875, -0.0372467041015625, -1.4426193237304688, 5.920112609863281, -0.681549072265625, -1.6552505493164062, 4.6348724365234375, 1.4113807678222656, 3.977794647216797, -0.959716796875, 8.346275329589844, 3.2444992065429688, 1.4470367431640625, 5.141815185546875, -0.6468086242675781, 0.7744827270507812, -0.7539196014404297, 5.042854309082031, 0.11710357666015625, 0.8418197631835938, 2.562227249145508, 2.9983596801757812, 2.986480712890625, 5.696430206298828, 2.082611083984375, 2.220306396484375, 4.515415191650391, 1.75042724609375, 3.4775848388671875, 3.603515625, -0.0073833465576171875, 2.2175941467285156, 2.0897674560546875, 3.5591907501220703, 3.0888748168945312, 7.77923583984375, 0.423736572265625, -3.13604736328125, -1.462066650390625, -0.9453277587890625, 0.8484573364257812, 1.3604316711425781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000562.npy"} +{"epoch": 0.8495842781557067, "step": 563, "batch_size": 64, "mean": 1.677725076675415, "std": 2.830482244491577, "min": -4.649143218994141, "p10": -1.6991668701171874, "median": 1.3696517944335938, "p90": 4.6441041946411135, "max": 10.16533088684082, "pos_frac": 0.765625, "sample": [0.05506134033203125, 2.65264892578125, 3.2161102294921875, 0.29356956481933594, 3.619842529296875, 4.337913513183594, -3.378072738647461, 4.84112548828125, 1.7532501220703125, -0.8338699340820312, 0.10533523559570312, 0.7615127563476562, -0.12958526611328125, -1.3429031372070312, 2.8423080444335938, 0.7696743011474609, -2.1737709045410156, 4.216423034667969, -2.3799285888671875, 4.6628570556640625, 2.728118896484375, 8.883743286132812, -0.9364757537841797, 2.5793380737304688, 3.8957595825195312, 1.1488761901855469, 3.4116668701171875, 1.4757766723632812, 1.6021041870117188, -3.1713790893554688, 0.8127975463867188, -1.32073974609375, -1.555633544921875, 0.5261764526367188, 0.2690143585205078, 7.290380477905273, 1.043853759765625, -4.649143218994141, 2.6754932403564453, 1.2182350158691406, 5.521614074707031, 4.2382965087890625, 3.253934860229492, -1.0693817138671875, -1.76068115234375, -0.486968994140625, 5.0639495849609375, 1.0243148803710938, 3.664825439453125, 1.8897666931152344, -4.2427978515625, 0.7098731994628906, 2.8425254821777344, 4.600347518920898, 3.3812217712402344, 1.2635269165039062, 10.16533088684082, 3.80718994140625, 0.636077880859375, 3.804698944091797, 2.7149734497070312, 1.2042121887207031, 2.742023468017578, 0.5880641937255859], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000563.npy"} +{"epoch": 0.8510959939531368, "step": 564, "batch_size": 64, "mean": 1.7244811058044434, "std": 2.310460090637207, "min": -3.40802001953125, "p10": -1.2386886596679687, "median": 1.4357261657714844, "p90": 4.449933242797852, "max": 7.504697799682617, "pos_frac": 0.765625, "sample": [3.745248794555664, -1.2468643188476562, 3.677074432373047, 2.79632568359375, 4.133636474609375, 5.0089263916015625, 2.98291015625, -2.8697280883789062, -0.22129058837890625, -1.2196121215820312, 4.823524475097656, 4.413873672485352, 0.8734169006347656, -0.3459606170654297, 0.553192138671875, -0.008060455322265625, -0.8194618225097656, 1.3712177276611328, 1.4299087524414062, 0.637847900390625, -0.15689849853515625, 0.2569694519042969, 4.092538833618164, 1.98828125, 3.5842132568359375, 0.6639556884765625, 3.622894287109375, -1.2629165649414062, 1.7917976379394531, 2.426361083984375, 3.8810958862304688, 5.4979095458984375, 5.9291229248046875, 0.6070594787597656, 0.8932952880859375, -2.1834945678710938, 1.3193836212158203, 1.7614173889160156, 0.639892578125, 1.4415435791015625, 4.465387344360352, 2.0593414306640625, 3.2250328063964844, 1.3501873016357422, -1.0767822265625, -0.193939208984375, 2.252246856689453, -1.524993896484375, 0.6787033081054688, -3.1078567504882812, 4.6150970458984375, 2.0659961700439453, 4.128265380859375, 0.9750995635986328, 3.707366943359375, 1.2990036010742188, 3.4532203674316406, 7.504697799682617, -3.40802001953125, 4.020591735839844, 0.52447509765625, 2.434192657470703, 0.2552013397216797, 4.1537322998046875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000564.npy"} +{"epoch": 0.8526077097505669, "step": 565, "batch_size": 64, "mean": 1.627647876739502, "std": 2.333108425140381, "min": -4.201683044433594, "p10": -1.1982040405273438, "median": 1.5197906494140625, "p90": 4.164503860473633, "max": 8.250310897827148, "pos_frac": 0.765625, "sample": [0.8511447906494141, -0.550628662109375, -4.201683044433594, 4.167091369628906, 2.0748291015625, 0.56304931640625, 1.028717041015625, 3.1910552978515625, 0.10137176513671875, 3.06396484375, 3.4366989135742188, 3.518280029296875, 5.193443298339844, -3.744749069213867, 0.9789657592773438, 2.050628662109375, 1.5703620910644531, 5.199125289916992, 6.5598297119140625, 6.5083465576171875, 2.4526290893554688, -1.6218795776367188, -0.6123275756835938, -0.6750068664550781, 3.027193069458008, 1.359527587890625, 1.7996826171875, 1.0059890747070312, 3.0574111938476562, 2.9756393432617188, 2.8424739837646484, 0.4237022399902344, -1.66229248046875, 1.3211860656738281, 2.6227588653564453, -1.2221107482910156, 2.140573501586914, 2.2937049865722656, -1.3833446502685547, 2.153106689453125, 1.98638916015625, -1.1424217224121094, 0.288787841796875, 2.8735504150390625, 0.397369384765625, 3.361125946044922, 0.6517791748046875, 1.4692192077636719, 4.158466339111328, -0.23287200927734375, 4.588462829589844, -0.38399505615234375, -2.2472381591796875, -0.5718193054199219, 1.3408432006835938, 0.5031642913818359, 1.8041763305664062, 3.6017189025878906, -0.31603240966796875, 0.9649581909179688, 8.250310897827148, 3.923116683959961, 1.1285972595214844, 3.9133453369140625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000565.npy"} +{"epoch": 0.854119425547997, "step": 566, "batch_size": 64, "mean": 1.4104200601577759, "std": 2.8833370208740234, "min": -3.9293899536132812, "p10": -2.3601722717285156, "median": 1.4357795715332031, "p90": 4.589817047119141, "max": 11.159614562988281, "pos_frac": 0.671875, "sample": [2.0762710571289062, 1.9833297729492188, -0.7159461975097656, 1.2752304077148438, 0.363494873046875, 5.565040588378906, 5.140970230102539, 2.3160858154296875, 1.7592716217041016, 2.8510665893554688, 1.2267189025878906, 1.0824737548828125, 1.0870685577392578, 0.5123672485351562, 3.1811370849609375, 5.6334228515625, -3.765594482421875, 2.7194366455078125, 1.2572059631347656, 2.543428421020508, 2.396160125732422, -0.4261932373046875, -2.3622665405273438, 4.620750427246094, -0.12799072265625, -3.5533294677734375, 5.161037445068359, 2.71148681640625, 2.5389328002929688, -3.9293899536132812, 3.7690277099609375, 4.1716766357421875, 3.8331871032714844, 0.4733428955078125, -2.20953369140625, -3.708303451538086, -1.4136810302734375, 1.5107498168945312, 2.7741241455078125, -2.858295440673828, -0.6386032104492188, -2.35528564453125, 11.159614562988281, -2.6205825805664062, 3.002523422241211, 1.360809326171875, -1.3075580596923828, -1.5211944580078125, 4.185951232910156, 0.6665916442871094, -0.4880847930908203, -0.0019683837890625, 4.0982208251953125, 2.6917343139648438, -0.6084442138671875, 1.9807243347167969, -1.7368621826171875, 2.9036331176757812, 1.6282196044921875, -0.025426864624023438, 4.51763916015625, 0.9137039184570312, 2.498809814453125, 8.498748779296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000566.npy"} +{"epoch": 0.8556311413454271, "step": 567, "batch_size": 64, "mean": 1.9788271188735962, "std": 2.6873857975006104, "min": -2.8485488891601562, "p10": -0.7641471862792967, "median": 1.3783836364746094, "p90": 4.8850036621093755, "max": 13.063743591308594, "pos_frac": 0.78125, "sample": [1.1416091918945312, 0.7554969787597656, -0.958038330078125, 6.831144332885742, 1.18267822265625, 1.959075927734375, 4.53558349609375, 3.2835044860839844, 6.7746429443359375, 0.3823432922363281, 3.6230506896972656, 2.1256351470947266, 1.1283416748046875, -1.9422988891601562, 1.5740890502929688, 1.1428604125976562, -1.487813949584961, 2.2266616821289062, 0.7575759887695312, 2.6038665771484375, 3.437225341796875, 0.9496307373046875, -0.30968666076660156, 1.826446533203125, 0.13248443603515625, 0.03981590270996094, -0.2599067687988281, -0.9936332702636719, 1.6888198852539062, -2.8485488891601562, 9.275306701660156, 0.9079513549804688, 6.2852783203125, 13.063743591308594, 4.024070739746094, 1.0522651672363281, 3.507051467895508, 0.5193328857421875, 2.55072021484375, 3.9321212768554688, -1.54608154296875, 0.6258544921875, 2.13153076171875, 1.8736419677734375, 3.280719757080078, 4.450855255126953, -0.08174514770507812, 0.9430961608886719, 4.9250030517578125, 0.9051055908203125, 3.6122665405273438, 0.040225982666015625, 4.8260650634765625, -0.214813232421875, -0.37935638427734375, -0.6038284301757812, -0.5954132080078125, 2.2344913482666016, 4.9102630615234375, -0.832855224609375, 2.14599609375, 0.7924728393554688, 3.516813278198242, 3.2641258239746094], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000567.npy"} +{"epoch": 0.8571428571428571, "step": 568, "batch_size": 64, "mean": 2.017970561981201, "std": 2.264200210571289, "min": -1.8790245056152344, "p10": -0.49319648742675765, "median": 1.6878738403320312, "p90": 5.168323516845704, "max": 8.689208984375, "pos_frac": 0.828125, "sample": [-0.20053863525390625, 0.15494346618652344, 1.2917366027832031, 0.6699066162109375, 1.5907249450683594, 3.3678207397460938, 2.1707420349121094, -1.5059661865234375, 0.348663330078125, 1.7496795654296875, -1.1086082458496094, 4.852884292602539, 0.744903564453125, 3.7279052734375, -0.8941478729248047, -0.3267974853515625, -0.2171478271484375, 1.0258102416992188, 2.9801712036132812, 1.5823612213134766, 0.6931686401367188, 1.8008575439453125, 0.25712108612060547, 1.47216796875, 4.983612060546875, 6.957283020019531, 3.393423080444336, 4.9580841064453125, -0.5645103454589844, 2.30078125, 1.4406585693359375, 0.8616867065429688, 2.3787784576416016, 0.07159423828125, 2.2843666076660156, 2.439178466796875, -1.8790245056152344, 5.579170227050781, 0.03867530822753906, 2.634765625, 2.904449462890625, 5.218116760253906, -0.18656158447265625, 2.7600326538085938, 4.536354064941406, 2.219573974609375, 1.859619140625, -0.7749443054199219, 2.2425079345703125, 1.626068115234375, 1.5114364624023438, 1.7754669189453125, -0.9620819091796875, 2.0553455352783203, 6.659881591796875, 0.6847877502441406, 0.058807373046875, 5.0521392822265625, 6.630561828613281, 3.2082977294921875, 0.9154510498046875, 0.402679443359375, 5.956031799316406, 8.689208984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000568.npy"} +{"epoch": 0.8586545729402872, "step": 569, "batch_size": 64, "mean": 1.6459428071975708, "std": 2.1129953861236572, "min": -3.8410720825195312, "p10": -0.7542947769165037, "median": 1.6205596923828125, "p90": 4.11114845275879, "max": 7.0264892578125, "pos_frac": 0.859375, "sample": [3.2332305908203125, 0.2525177001953125, 0.31751441955566406, 4.230197906494141, 2.6977386474609375, 4.894008636474609, -2.9077682495117188, 3.285858154296875, 2.8896484375, 5.549537658691406, 3.338054656982422, 1.6872177124023438, 0.5297050476074219, 1.715555191040039, -3.8410720825195312, 3.5560035705566406, 2.3952369689941406, 3.201324462890625, 0.15746307373046875, 3.114978790283203, 3.674297332763672, 3.277313232421875, 1.1244621276855469, 5.303703308105469, 0.984222412109375, 1.5855560302734375, 2.914154052734375, 1.9122428894042969, 2.3874130249023438, -3.0534305572509766, -1.838064193725586, 0.9360408782958984, 0.4994239807128906, 0.21574783325195312, 1.1295318603515625, 1.6282501220703125, 7.0264892578125, 1.178335189819336, 3.5525894165039062, 0.12363433837890625, 1.8822460174560547, -0.5479068756103516, 1.4308624267578125, -0.9949378967285156, 0.71453857421875, -3.5677719116210938, 2.9441986083984375, 0.738433837890625, 4.146484375, 1.447854995727539, 1.7402381896972656, 1.6128692626953125, 4.671417236328125, -0.8427467346191406, -0.09852218627929688, 1.0277843475341797, 2.525848388671875, 0.318756103515625, 2.7839584350585938, 0.390228271484375, 0.9460067749023438, 1.4029006958007812, 4.028697967529297, 1.7800369262695312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000569.npy"} +{"epoch": 0.8601662887377173, "step": 570, "batch_size": 64, "mean": 2.294064521789551, "std": 2.2118375301361084, "min": -1.9952239990234375, "p10": -0.41773204803466774, "median": 2.056743621826172, "p90": 5.473524284362793, "max": 7.629451751708984, "pos_frac": 0.875, "sample": [2.3403396606445312, -1.94537353515625, 2.70654296875, 2.4955711364746094, 3.5638046264648438, 3.2084884643554688, 0.977203369140625, -0.521270751953125, 1.3426628112792969, 1.2128067016601562, 0.29459381103515625, 0.36226463317871094, 0.90728759765625, 2.587350845336914, 1.9044647216796875, 3.253459930419922, -0.5104846954345703, -0.2013092041015625, 1.0457305908203125, 3.3320236206054688, 1.1447906494140625, -0.5653953552246094, 0.32112884521484375, 5.334878921508789, 7.629451751708984, 0.6367950439453125, 5.297504425048828, 2.9609832763671875, 4.327690124511719, 1.240325927734375, 2.262237548828125, 5.690315246582031, 3.8819923400878906, 0.2061767578125, 2.343240737915039, 2.6502304077148438, 3.7218284606933594, 1.455953598022461, 5.5329437255859375, 1.5541915893554688, 4.19268798828125, 6.8095550537109375, 2.8939361572265625, -0.84674072265625, 2.2286834716796875, 1.6125869750976562, 5.2689208984375, 3.7301254272460938, 1.797271728515625, -1.9952239990234375, 3.0112266540527344, 0.06539154052734375, 0.8465538024902344, 1.4422550201416016, 7.196281433105469, 1.8939895629882812, 0.8435249328613281, 1.2299919128417969, 2.2090225219726562, 5.712677001953125, 6.902099609375, 0.5580711364746094, 4.398639678955078, -1.1648101806640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000570.npy"} +{"epoch": 0.8616780045351474, "step": 571, "batch_size": 64, "mean": 1.4249444007873535, "std": 2.3289401531219482, "min": -3.0796775817871094, "p10": -1.5602476119995117, "median": 1.4578971862792969, "p90": 3.6785812377929688, "max": 8.311214447021484, "pos_frac": 0.734375, "sample": [2.3696975708007812, 2.0485763549804688, 1.6946029663085938, 0.7207374572753906, 0.09689712524414062, 8.311214447021484, 3.695220947265625, 1.5010795593261719, 3.399524688720703, 0.65234375, 0.8253326416015625, 3.6312942504882812, -0.7328109741210938, -0.08922958374023438, -0.8157444000244141, 3.5257644653320312, 1.830352783203125, 1.8159942626953125, -0.1998138427734375, 3.32012939453125, 4.525169372558594, -0.22499847412109375, 3.5786895751953125, 1.66448974609375, 3.1889190673828125, 2.6122589111328125, 2.7638778686523438, 2.1903018951416016, -2.71099853515625, 2.1502532958984375, 0.38642120361328125, -2.096355438232422, 1.3326969146728516, -1.5742969512939453, 3.6397552490234375, 0.6551456451416016, -1.5274658203125, -1.055093765258789, -1.4737091064453125, 1.438873291015625, 0.6310043334960938, 1.213623046875, 1.3413543701171875, 0.10896682739257812, 2.6197681427001953, 6.1746978759765625, 3.134521484375, -2.220651626586914, 0.5023956298828125, 1.3808670043945312, -3.0310134887695312, -2.55035400390625, 2.4719619750976562, 4.635894775390625, 2.6973190307617188, -0.33133697509765625, 2.2954025268554688, 5.220249176025391, 1.4769210815429688, 3.0686798095703125, -0.8084030151367188, 0.6130580902099609, -3.0796775817871094, 6.566097259521484], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000571.npy"} +{"epoch": 0.8631897203325775, "step": 572, "batch_size": 64, "mean": 1.4121863842010498, "std": 2.1896965503692627, "min": -3.027374267578125, "p10": -1.6423992156982419, "median": 1.2088050842285156, "p90": 4.000318908691407, "max": 9.24581527709961, "pos_frac": 0.796875, "sample": [0.40378570556640625, -1.861541748046875, 0.7937202453613281, 2.2528114318847656, 4.046905517578125, 0.6555061340332031, 4.591381072998047, 1.8957443237304688, -1.0320510864257812, 9.24581527709961, 3.0032501220703125, 2.1117382049560547, 2.951873779296875, 3.4846038818359375, -0.4925346374511719, 2.1538314819335938, 0.0711517333984375, 0.6453895568847656, 0.6817550659179688, 4.525016784667969, 3.2140731811523438, 1.7418975830078125, 0.7457275390625, 5.116306304931641, 0.9961776733398438, 1.3993492126464844, 1.0270099639892578, 1.0512313842773438, -1.7997589111328125, -0.58392333984375, 1.6432418823242188, 3.8722057342529297, 3.8896408081054688, -3.027374267578125, 2.9049415588378906, 1.8686866760253906, 1.2298355102539062, -1.2752265930175781, 0.7421302795410156, 0.41245460510253906, 2.8731613159179688, 2.4989242553710938, 1.1909408569335938, 2.590909957885742, 2.573240280151367, 5.0713653564453125, 0.010211944580078125, -0.7911224365234375, 5.165580749511719, 3.8916168212890625, 1.1366195678710938, 1.2266693115234375, -0.574493408203125, 1.3796463012695312, -2.558319091796875, -2.4375152587890625, 0.793365478515625, 0.9782867431640625, -2.0591506958007812, -2.056396484375, 1.4347076416015625, 0.489654541015625, 1.4013404846191406, 0.8539028167724609], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000572.npy"} +{"epoch": 0.8647014361300076, "step": 573, "batch_size": 64, "mean": 1.906752586364746, "std": 2.950704574584961, "min": -5.173593521118164, "p10": -1.577862548828125, "median": 1.5654830932617188, "p90": 5.872262573242189, "max": 9.698165893554688, "pos_frac": 0.765625, "sample": [0.8361053466796875, 2.5211868286132812, -5.173593521118164, 2.8343238830566406, -0.9050979614257812, 1.8581161499023438, -2.0968685150146484, 4.566095352172852, 1.125732421875, -1.5899658203125, -3.5124340057373047, -1.54962158203125, -0.9667606353759766, 0.23621368408203125, 2.271270751953125, 0.49432373046875, -0.7320175170898438, 1.57623291015625, 4.523662567138672, -2.1185531616210938, 6.586647033691406, 1.8965988159179688, 3.8862457275390625, 1.062408447265625, 0.17630767822265625, 0.07057571411132812, 9.698165893554688, 1.473611831665039, 4.508232116699219, 1.4859542846679688, 3.9667816162109375, 1.751434326171875, 1.2566032409667969, 2.57733154296875, 8.715911865234375, 1.7157630920410156, -0.14669227600097656, 1.164541244506836, 0.890869140625, 9.669044494628906, 4.600395202636719, 2.8957386016845703, 2.868305206298828, 0.09603118896484375, 1.5547332763671875, 1.2779541015625, 5.512969970703125, 1.7570953369140625, 0.9975509643554688, 3.8229904174804688, 6.928642272949219, 4.9251861572265625, -0.133514404296875, 2.647918701171875, 0.23543548583984375, -0.6766281127929688, 6.0262451171875, -1.9155197143554688, 2.9599571228027344, 1.8126239776611328, 2.5505828857421875, -0.3473854064941406, -1.9566726684570312, 6.986846923828125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000573.npy"} +{"epoch": 0.8662131519274376, "step": 574, "batch_size": 64, "mean": 1.8618476390838623, "std": 2.0873873233795166, "min": -1.2963638305664062, "p10": -0.4834434509277344, "median": 1.6757354736328125, "p90": 4.0520671844482425, "max": 8.919609069824219, "pos_frac": 0.796875, "sample": [2.083179473876953, -0.48480987548828125, -0.2011260986328125, -0.22574234008789062, 0.1582794189453125, 2.3715782165527344, 0.4529838562011719, -0.6860122680664062, 0.7277393341064453, -0.8236160278320312, -0.5543479919433594, 1.6851348876953125, 1.6716766357421875, 0.34787559509277344, -1.0194931030273438, -0.20669174194335938, -0.2928752899169922, 0.6495513916015625, 1.36273193359375, 1.6797943115234375, 3.506866455078125, 0.8200035095214844, 1.575714111328125, 2.1856613159179688, -1.2963638305664062, 8.919609069824219, 3.984710693359375, 5.8756103515625, 0.03824615478515625, 2.7008132934570312, 4.055389404296875, 0.3068523406982422, 2.182353973388672, 2.26861572265625, 1.444305419921875, 3.2037429809570312, -0.8356857299804688, 0.20674514770507812, 3.8333587646484375, 1.0162277221679688, 1.2559051513671875, 3.6136245727539062, 6.060626983642578, 1.5753936767578125, 4.044315338134766, 4.0192718505859375, 1.9799957275390625, 1.7153167724609375, 1.68951416015625, 2.3396987915039062, 0.04926300048828125, 6.436149597167969, 1.9038162231445312, 2.928567886352539, 3.938488006591797, -0.23395538330078125, 0.8168277740478516, 2.921245574951172, 5.173576354980469, 5.885986328125, -0.480255126953125, 2.5280914306640625, 3.6524658203125, 0.655731201171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000574.npy"} +{"epoch": 0.8677248677248677, "step": 575, "batch_size": 64, "mean": 1.4590165615081787, "std": 2.5495758056640625, "min": -3.9101943969726562, "p10": -1.4133203506469727, "median": 1.0788154602050781, "p90": 5.104732513427734, "max": 8.929962158203125, "pos_frac": 0.703125, "sample": [-0.07601165771484375, -1.4271602630615234, -2.9428253173828125, 0.055690765380859375, 2.27374267578125, 1.1004180908203125, 0.1870880126953125, 2.6419754028320312, -0.2811126708984375, -1.7364730834960938, 3.1146087646484375, 0.21520233154296875, 6.4416046142578125, -1.3645553588867188, -2.2700119018554688, 2.8935089111328125, -2.4916305541992188, 3.2586212158203125, 2.7508544921875, 1.0285263061523438, 0.9967765808105469, 5.548992156982422, 0.5850143432617188, 2.4823532104492188, 0.7235794067382812, 0.31649017333984375, -0.7353363037109375, 2.5564517974853516, 5.864166259765625, 0.7005996704101562, -0.2072467803955078, 1.2466201782226562, 5.142692565917969, 5.0161590576171875, 3.8511276245117188, -2.359588623046875, 6.566734313964844, -1.3810272216796875, 5.524848937988281, 4.339893341064453, -0.4883270263671875, 3.4345474243164062, 2.7326278686523438, 0.006927490234375, 2.1973876953125, 0.834686279296875, 1.0572128295898438, -3.9101943969726562, -0.942626953125, 8.929962158203125, 3.6440887451171875, -0.244049072265625, 2.47503662109375, 1.8363780975341797, 0.28558349609375, 2.6096115112304688, 1.1415882110595703, -0.7935447692871094, 1.1723804473876953, 3.076496124267578, -0.7879142761230469, 3.683208465576172, 2.1464462280273438, -0.8718223571777344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000575.npy"} +{"epoch": 0.8692365835222978, "step": 576, "batch_size": 64, "mean": 1.6160247325897217, "std": 2.3849430084228516, "min": -2.8580322265625, "p10": -1.326022720336914, "median": 1.64605712890625, "p90": 4.69188232421875, "max": 6.506622314453125, "pos_frac": 0.6875, "sample": [4.234031677246094, 3.34063720703125, -0.7259292602539062, 2.25836181640625, -2.37677001953125, 3.6823272705078125, 4.853494644165039, 1.1558494567871094, 3.40216064453125, 1.678985595703125, -1.5485706329345703, -1.2827186584472656, 3.9232025146484375, -2.8580322265625, 2.1166000366210938, -1.8029861450195312, -0.6392593383789062, 2.886798858642578, 5.5404510498046875, 0.435516357421875, 2.277925491333008, 5.7999420166015625, 2.4243812561035156, -0.18091964721679688, 3.3785934448242188, 2.364156723022461, 1.6899642944335938, -1.3445816040039062, 4.593818664550781, -0.46463966369628906, 0.6999130249023438, -1.0385971069335938, -0.172393798828125, 4.9888916015625, 1.77880859375, 1.4260711669921875, 6.506622314453125, -1.0601882934570312, 0.7236175537109375, 1.3376426696777344, 2.5117931365966797, -1.1993064880371094, 4.2747344970703125, -1.2051887512207031, 3.6472930908203125, 1.2886962890625, 6.072879791259766, 4.426963806152344, -0.15147781372070312, 1.613128662109375, 3.1727294921875, -1.7726898193359375, 0.303131103515625, -2.1409454345703125, 4.280174255371094, 2.136444091796875, -1.070943832397461, -1.1462326049804688, 3.9437332153320312, 4.733909606933594, 1.1159000396728516, 0.7764720916748047, 0.17797088623046875, 3.6332321166992188], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000576.npy"} +{"epoch": 0.8707482993197279, "step": 577, "batch_size": 64, "mean": 1.2779215574264526, "std": 2.3241026401519775, "min": -3.7227706909179688, "p10": -1.796927261352539, "median": 1.0685043334960938, "p90": 4.026572227478028, "max": 8.733634948730469, "pos_frac": 0.75, "sample": [3.6247520446777344, 0.6964855194091797, -2.9126052856445312, 1.7688522338867188, 0.3149528503417969, 1.8357086181640625, 0.17435455322265625, -0.46794700622558594, 2.2933349609375, 1.0829010009765625, -1.8296737670898438, 1.9288902282714844, 3.212993621826172, -0.6511459350585938, -0.9320297241210938, 4.761196136474609, 3.38580322265625, 4.164018630981445, -2.8051376342773438, 2.690549850463867, 3.4236183166503906, 1.676513671875, 0.6330413818359375, 4.486244201660156, 0.5445022583007812, -2.466400146484375, -1.7613029479980469, 3.0385971069335938, 1.901601791381836, -0.2251129150390625, 0.3487396240234375, 1.5571327209472656, 0.2545318603515625, 0.6280231475830078, 0.6065673828125, 1.054107666015625, 0.6742286682128906, 2.0789794921875, -0.19854736328125, -0.8581523895263672, 1.4110145568847656, 1.5167465209960938, 4.752204895019531, 0.4360504150390625, 7.402372360229492, 4.810432434082031, -3.7227706909179688, 1.8502120971679688, 0.6472015380859375, 3.6914901733398438, -0.4982452392578125, 2.1521835327148438, 8.733634948730469, 3.4432125091552734, 0.3400306701660156, 0.4834136962890625, -1.81219482421875, -2.254058837890625, -0.7648677825927734, 3.7058639526367188, 0.4220733642578125, 2.0575733184814453, 1.7113723754882812, 1.5388641357421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000577.npy"} +{"epoch": 0.872260015117158, "step": 578, "batch_size": 64, "mean": 1.1640193462371826, "std": 2.1714015007019043, "min": -2.58148193359375, "p10": -1.2309169769287107, "median": 0.6334810256958008, "p90": 4.343233871459961, "max": 7.81561279296875, "pos_frac": 0.671875, "sample": [0.7682304382324219, -1.8071060180664062, -0.5575790405273438, -0.6335678100585938, 2.2813720703125, -0.07385444641113281, 1.0840606689453125, 2.1421127319335938, 0.7670211791992188, -0.4539604187011719, 0.04473876953125, 7.81561279296875, -0.9029998779296875, 1.8004417419433594, 3.134449005126953, -1.0576019287109375, 5.957817077636719, 1.7085857391357422, 1.7333030700683594, 4.366546630859375, 4.593128204345703, 1.412445068359375, 0.3118934631347656, 1.7389240264892578, -0.39890289306640625, 1.3239517211914062, 2.8765640258789062, 0.060028076171875, 0.4360618591308594, 0.4999408721923828, 5.0818634033203125, 3.176942825317383, 6.189029693603516, 2.942962646484375, 0.17906951904296875, 1.1736602783203125, 1.8144035339355469, 1.2537002563476562, 1.1019363403320312, 0.08656692504882812, 3.6749343872070312, -2.58148193359375, 0.8820343017578125, 0.1883869171142578, -1.0253753662109375, -1.5665817260742188, -1.8857345581054688, 3.7269535064697266, -0.10331153869628906, 4.288837432861328, 1.9998092651367188, -1.3734359741210938, 4.915313720703125, -0.051563262939453125, 0.2818756103515625, -1.6716327667236328, 0.3601837158203125, 0.0513763427734375, -0.7045135498046875, -0.03510856628417969, -0.9322280883789062, -0.05454063415527344, -1.3051948547363281, 3.446441650390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000578.npy"} +{"epoch": 0.873771730914588, "step": 579, "batch_size": 64, "mean": 2.1486077308654785, "std": 2.427582263946533, "min": -2.4744720458984375, "p10": -0.4976993560791015, "median": 1.9715900421142578, "p90": 5.798522186279297, "max": 8.750656127929688, "pos_frac": 0.796875, "sample": [2.3946151733398438, 2.379596710205078, 1.0345344543457031, 0.6413078308105469, 0.20193862915039062, -0.204803466796875, 2.5435028076171875, 1.3387069702148438, 1.850189208984375, 6.630706787109375, 5.824859619140625, 2.0129966735839844, 1.7301216125488281, 5.559906005859375, 2.2376861572265625, 1.7635345458984375, 5.737068176269531, -0.9495086669921875, -0.1822967529296875, 2.3403167724609375, 8.750656127929688, -0.43231201171875, 8.514873504638672, -0.09834480285644531, 2.1932373046875, 0.7101230621337891, 6.35845947265625, 3.9791641235351562, 4.952312469482422, 2.2544898986816406, -0.5257225036621094, 1.9045448303222656, 2.3905715942382812, 2.3786163330078125, 2.4818038940429688, 2.3892745971679688, 3.7372970581054688, 3.733327865600586, -0.618194580078125, 7.427692413330078, 0.33678436279296875, 2.0143909454345703, -0.01016998291015625, -2.4049148559570312, 2.112964630126953, 1.166421890258789, -0.5259170532226562, -2.4744720458984375, 6.5919952392578125, -0.18920135498046875, 1.9301834106445312, 0.38993072509765625, 0.5095901489257812, 3.360544204711914, 2.8873367309570312, 1.2128868103027344, 2.2694931030273438, 3.2776336669921875, 1.3825435638427734, 1.204132080078125, -0.8705902099609375, 1.3661041259765625, 4.563499450683594, 0.042881011962890625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000579.npy"} +{"epoch": 0.8752834467120182, "step": 580, "batch_size": 64, "mean": 1.3896890878677368, "std": 2.4275877475738525, "min": -3.7137413024902344, "p10": -1.8850318908691406, "median": 1.1678199768066406, "p90": 4.722323608398438, "max": 6.657081604003906, "pos_frac": 0.734375, "sample": [-1.8551406860351562, 0.9635467529296875, -2.029510498046875, 0.2645263671875, -2.1790695190429688, 2.9236087799072266, 6.657081604003906, 2.594226837158203, 1.304473876953125, 0.223602294921875, 2.3130035400390625, 1.0311660766601562, 0.686309814453125, 0.6103706359863281, -1.8978424072265625, 1.5559463500976562, 2.860729217529297, 2.6229248046875, -1.5540580749511719, 0.9668998718261719, 0.036655426025390625, 2.959451675415039, 5.831390380859375, -0.04099273681640625, 3.2997398376464844, 0.4265594482421875, 5.2900390625, -0.3674468994140625, -0.3809013366699219, 4.75274658203125, 1.8809890747070312, 2.3271408081054688, 1.7695541381835938, 2.2863330841064453, -1.7085037231445312, 6.6107025146484375, -2.467071533203125, -3.7137413024902344, 0.04314422607421875, 5.78564453125, -2.6620025634765625, 3.70501708984375, -1.5308380126953125, 1.0243072509765625, 6.0790557861328125, -0.03078460693359375, 0.7085857391357422, 0.2877769470214844, 2.483245849609375, 0.502227783203125, 3.902240753173828, 4.490942001342773, 0.0330657958984375, -2.5965194702148438, -0.4722785949707031, -0.4334373474121094, 2.0560760498046875, 2.9986438751220703, 3.2510337829589844, 4.651336669921875, 1.42584228515625, 2.4562835693359375, 1.4493789672851562, 2.4766769409179688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000580.npy"} +{"epoch": 0.8767951625094482, "step": 581, "batch_size": 64, "mean": 1.492587924003601, "std": 2.346820116043091, "min": -4.066225051879883, "p10": -0.9684226989746093, "median": 1.1452560424804688, "p90": 4.458458709716799, "max": 8.9755859375, "pos_frac": 0.765625, "sample": [3.2145118713378906, 2.9716415405273438, 0.5307083129882812, 2.0699024200439453, 1.1527099609375, 0.35471343994140625, 0.9613800048828125, -1.4301681518554688, 3.0301132202148438, 2.1321754455566406, -3.2484359741210938, 0.7414360046386719, 0.35604095458984375, 2.0320796966552734, 3.9447860717773438, -1.0196533203125, 2.0658187866210938, 0.3506584167480469, 2.1937637329101562, 0.372467041015625, 5.346893310546875, 0.6480045318603516, 6.149131774902344, -1.250762939453125, 3.179229736328125, 6.989784240722656, 2.851755142211914, 0.3802032470703125, 4.679691314697266, 2.3232803344726562, -0.61773681640625, 3.1379623413085938, 0.12377357482910156, 2.8256607055664062, 2.7007217407226562, 1.0385284423828125, -0.32501220703125, 0.21694183349609375, 8.9755859375, 4.6786041259765625, 0.9178848266601562, -2.18267822265625, 1.7589263916015625, -0.8809661865234375, 1.9787406921386719, -4.066225051879883, 6.972625732421875, 1.1378021240234375, -1.0031051635742188, -0.3395500183105469, 2.9252662658691406, 0.3526153564453125, 1.8100433349609375, 3.2049217224121094, 0.12943458557128906, 1.2578582763671875, -0.08513259887695312, -0.3277130126953125, -0.8874969482421875, 2.301685333251953, 0.5954742431640625, 2.454853057861328, 1.374795913696289, -0.7033271789550781], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000581.npy"} +{"epoch": 0.8783068783068783, "step": 582, "batch_size": 64, "mean": 1.7041112184524536, "std": 2.1305971145629883, "min": -2.35968017578125, "p10": -1.078033447265625, "median": 1.6038846969604492, "p90": 4.292634010314941, "max": 7.4660491943359375, "pos_frac": 0.8125, "sample": [2.3916397094726562, 6.7836151123046875, 1.1513900756835938, 1.315399169921875, 1.0647964477539062, -0.001483917236328125, 2.632415771484375, 1.6288089752197266, 2.764781951904297, 4.5127716064453125, 1.22393798828125, 1.615753173828125, 1.6188507080078125, 7.4660491943359375, 1.0339889526367188, 4.337160110473633, 1.625762939453125, 1.5620880126953125, 0.9245986938476562, 2.7357330322265625, 0.6898441314697266, 1.9414215087890625, 1.7012577056884766, 4.118751525878906, 3.1365432739257812, 3.560028076171875, 1.8791370391845703, 1.5920162200927734, -1.911844253540039, -2.1421680450439453, -0.3061351776123047, -2.35968017578125, 2.2978286743164062, -1.0960845947265625, 0.2936897277832031, 3.890665054321289, 6.6944427490234375, 3.696371078491211, 3.7394943237304688, 0.8327350616455078, 2.753082275390625, 3.302501678466797, 0.7562160491943359, 1.642965316772461, -0.8708953857421875, 0.1852264404296875, -1.7850418090820312, 1.4553680419921875, 2.0262908935546875, -1.0800933837890625, 0.8237724304199219, 1.2052726745605469, 0.16785430908203125, 0.13314247131347656, -0.5501194000244141, 1.5563125610351562, 4.188739776611328, -2.041015625, 3.6476669311523438, -1.0732269287109375, 1.9498443603515625, 0.33295440673828125, 4.357246398925781, 5.342679977416992], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000582.npy"} +{"epoch": 0.8798185941043084, "step": 583, "batch_size": 64, "mean": 1.4541677236557007, "std": 2.661071538925171, "min": -4.337625503540039, "p10": -1.2949024200439452, "median": 1.197983741760254, "p90": 3.9378967285156254, "max": 9.221847534179688, "pos_frac": 0.71875, "sample": [2.666545867919922, 0.8129081726074219, -1.1485977172851562, -1.3311424255371094, 9.221847534179688, 1.5762214660644531, -0.31610107421875, -0.9700279235839844, -2.078266143798828, 0.7022857666015625, 1.148345947265625, -2.7940330505371094, 0.03183555603027344, 2.5867385864257812, 1.8856945037841797, -1.2103424072265625, 2.175281524658203, -0.5035858154296875, 6.524986267089844, 6.681468963623047, 2.6787986755371094, -3.9702796936035156, 1.02838134765625, -0.3675346374511719, 1.4767913818359375, 1.2016334533691406, 2.0054779052734375, -3.1616897583007812, 0.8984870910644531, 3.4831924438476562, 1.8365211486816406, 0.9914188385009766, 3.969390869140625, -0.35564422607421875, 2.296356201171875, 3.864410400390625, -1.5592727661132812, -1.1996421813964844, 0.7312164306640625, 5.90283203125, 1.424264907836914, 4.978851318359375, -4.337625503540039, 3.7427825927734375, 0.8772678375244141, 3.572296142578125, 3.4418067932128906, -0.30974578857421875, 0.5548477172851562, -0.9485492706298828, 2.85467529296875, -0.9014968872070312, 3.2511558532714844, 1.1943340301513672, 2.4702529907226562, 0.55413818359375, 1.3180389404296875, 2.1744346618652344, 0.144287109375, 3.817129135131836, 9.17791748046875, 3.4435882568359375, 2.2927703857421875, 0.8663997650146484], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000583.npy"} +{"epoch": 0.8813303099017384, "step": 584, "batch_size": 64, "mean": 1.8320791721343994, "std": 2.6848220825195312, "min": -3.325298309326172, "p10": -1.2695461273193356, "median": 1.9011554718017578, "p90": 5.167734146118165, "max": 9.597625732421875, "pos_frac": 0.71875, "sample": [5.05682373046875, 1.4654998779296875, 4.411386489868164, -0.7677879333496094, 2.1297378540039062, -0.25733184814453125, 5.89373779296875, 2.46002197265625, 2.3211421966552734, 2.8155059814453125, 2.095928192138672, -1.005462646484375, -1.3870697021484375, -0.3067169189453125, 0.45612335205078125, 5.642433166503906, 3.953643798828125, 4.576192855834961, 4.532739639282227, -2.1304893493652344, 5.215267181396484, 2.321685791015625, -0.7608871459960938, 4.511016845703125, 3.78558349609375, 2.6175308227539062, 0.05290412902832031, -0.7090682983398438, 2.2039108276367188, 1.751190185546875, 3.8323707580566406, 1.5382232666015625, 2.8409862518310547, 0.8314056396484375, -0.46961212158203125, 4.224266052246094, -1.3827247619628906, -0.9588775634765625, 0.025285720825195312, 0.25664329528808594, 2.0511207580566406, 7.951507568359375, -2.8256149291992188, 2.4394569396972656, 5.453826904296875, 1.218963623046875, -0.037540435791015625, 2.783832550048828, 1.686676025390625, -1.8000259399414062, -3.325298309326172, 2.172801971435547, -0.42138671875, 2.9930191040039062, 1.2886333465576172, 9.597625732421875, 7.839103698730469, 0.230865478515625, 3.708311080932617, 1.1287841796875, 1.1344718933105469, 2.1605491638183594, -3.2681121826171875, -0.5916671752929688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000584.npy"} +{"epoch": 0.8828420256991686, "step": 585, "batch_size": 64, "mean": 1.5696361064910889, "std": 2.220522880554199, "min": -4.317546844482422, "p10": -0.5588562011718748, "median": 1.4713449478149414, "p90": 4.20122528076172, "max": 7.7496490478515625, "pos_frac": 0.78125, "sample": [1.3234367370605469, 3.63031005859375, 1.3577041625976562, 3.4895057678222656, 2.3983917236328125, -0.23557281494140625, 1.41412353515625, 6.602363586425781, 1.0896224975585938, 0.3130321502685547, -4.317546844482422, 0.2771110534667969, 4.998809814453125, -0.38741302490234375, 0.1524829864501953, 0.536163330078125, -2.152006149291992, 1.543487548828125, 2.3685989379882812, 1.9132080078125, 4.761013031005859, 0.5104103088378906, 0.5684814453125, -2.7053680419921875, 1.4619922637939453, 1.9628334045410156, -0.27310943603515625, -0.6323318481445312, 1.4806976318359375, 1.1520729064941406, 0.3395042419433594, 2.7672576904296875, 4.274501800537109, -0.2978553771972656, -1.4041213989257812, 2.1324615478515625, 0.7520904541015625, 4.030246734619141, 3.731201171875, 3.1330909729003906, 2.255573272705078, 2.181671142578125, -0.02803802490234375, -2.1575679779052734, -3.1141586303710938, 4.914737701416016, 4.001171112060547, 3.318126678466797, -0.36750030517578125, -0.06330108642578125, 1.7828960418701172, 3.2357177734375, 4.976783752441406, 1.4930953979492188, 1.2851448059082031, 0.7018203735351562, 0.337615966796875, 3.7177963256835938, 7.7496490478515625, 2.2337188720703125, 0.4780769348144531, 1.7185287475585938, 2.4027347564697266, 3.3415374755859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000585.npy"} +{"epoch": 0.8843537414965986, "step": 586, "batch_size": 64, "mean": 1.8877204656600952, "std": 2.3506503105163574, "min": -4.1160430908203125, "p10": -0.922934341430664, "median": 1.981501579284668, "p90": 5.038803482055665, "max": 8.712387084960938, "pos_frac": 0.796875, "sample": [6.059417724609375, 1.3594131469726562, 2.6785621643066406, 1.7950973510742188, 0.980255126953125, 6.0319976806640625, 2.09356689453125, 0.25716400146484375, 3.3849945068359375, 2.146535873413086, 2.5846424102783203, -1.4592514038085938, -0.9986438751220703, 4.5237579345703125, 5.361602783203125, -0.06091117858886719, 1.233572006225586, 2.6787757873535156, 0.7496700286865234, 2.7996082305908203, 0.09380149841308594, 3.5053443908691406, 2.011770248413086, 2.76806640625, 1.5647735595703125, -0.011568069458007812, -0.93115234375, 0.8838729858398438, 2.16827392578125, 3.194080352783203, 3.895416259765625, 2.9169578552246094, -0.0238494873046875, 4.763729095458984, -0.2677764892578125, 0.5670108795166016, -3.392131805419922, 8.712387084960938, -0.11106491088867188, 0.5393238067626953, 0.42185211181640625, 1.95123291015625, 0.6708984375, 3.1789932250976562, 2.4596481323242188, -0.9037590026855469, -1.2957763671875, 2.314973831176758, 0.017826080322265625, 1.7843551635742188, 5.1566925048828125, 2.1386566162109375, 2.2591476440429688, 0.37320709228515625, 4.386049270629883, 2.2568893432617188, -1.3755035400390625, 6.214059829711914, -4.1160430908203125, 7.1254119873046875, 3.2422866821289062, 1.6725311279296875, 1.8193836212158203, 2.014007568359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000586.npy"} +{"epoch": 0.8858654572940288, "step": 587, "batch_size": 64, "mean": 1.640378475189209, "std": 2.561973810195923, "min": -5.131072998046875, "p10": -1.128433227539062, "median": 1.4621267318725586, "p90": 5.250845336914063, "max": 7.816526412963867, "pos_frac": 0.765625, "sample": [0.3807411193847656, -4.4280853271484375, 1.3538665771484375, 2.096975326538086, 2.5118560791015625, -1.2979774475097656, 1.2277603149414062, -0.41961669921875, 1.396087646484375, 2.7064666748046875, 1.4811077117919922, 2.123401641845703, -1.7308540344238281, 0.460906982421875, 1.2811508178710938, 2.44207763671875, 4.549945831298828, -0.21812820434570312, 0.890106201171875, 7.657073974609375, 3.1783447265625, 1.8138580322265625, -0.7328300476074219, 5.295722961425781, 1.443145751953125, 2.5660400390625, 6.861835479736328, -0.13408660888671875, -5.131072998046875, -0.3875083923339844, 3.6728515625, 1.5497016906738281, 5.212425231933594, 7.816526412963867, 1.3521347045898438, 1.2996063232421875, 2.900716781616211, -4.2619476318359375, 0.73577880859375, 3.677886962890625, 5.489501953125, -1.4300117492675781, 1.57647705078125, 1.4173851013183594, -0.3165130615234375, 2.9119529724121094, 1.899139404296875, 3.2591094970703125, 3.0707244873046875, 0.17329788208007812, 5.267311096191406, -2.243030548095703, 1.8435211181640625, 0.12237930297851562, 1.969015121459961, 3.8134307861328125, 0.7722702026367188, 0.158966064453125, -0.13380813598632812, 2.4080352783203125, 0.7016525268554688, 5.9926300048828125, 3.2695884704589844, -0.202789306640625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000587.npy"} +{"epoch": 0.8873771730914588, "step": 588, "batch_size": 64, "mean": 1.697368860244751, "std": 2.3625733852386475, "min": -4.861083984375, "p10": -1.2808521270751954, "median": 1.6692962646484375, "p90": 4.516346740722658, "max": 8.388381958007812, "pos_frac": 0.78125, "sample": [1.162139892578125, -1.3023529052734375, 2.3770599365234375, 2.0950469970703125, -0.881317138671875, 2.0537033081054688, 6.034294128417969, 3.717681884765625, 3.3686981201171875, 4.0595245361328125, 8.388381958007812, 1.1394309997558594, 1.4869613647460938, 0.3999309539794922, 1.6992721557617188, 2.349506378173828, 6.624980926513672, 7.535400390625, 2.2431812286376953, 2.5477447509765625, 2.2707061767578125, 1.9438362121582031, -1.5967140197753906, 2.8681106567382812, 1.8181705474853516, 4.712127685546875, 1.3226089477539062, -0.3769683837890625, -1.0363712310791016, 1.26739501953125, 0.37131309509277344, 3.475252151489258, -1.2437858581542969, -1.2967376708984375, 3.3331832885742188, 5.039093017578125, -0.4435253143310547, 1.5480270385742188, 1.2060527801513672, -0.04296112060546875, -1.3922805786132812, -1.665069580078125, 2.017425537109375, 2.60693359375, 2.0435829162597656, 2.1236495971679688, 3.6621246337890625, 0.8972854614257812, 0.5858001708984375, 3.746633529663086, 1.3380584716796875, 3.001617431640625, 0.48374366760253906, 1.6393203735351562, 0.0181427001953125, 0.883697509765625, -0.26102447509765625, 1.618703842163086, 1.2070083618164062, -4.861083984375, 2.3078861236572266, 5.052276611328125, 2.7391014099121094, -3.4000091552734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000588.npy"} +{"epoch": 0.8888888888888888, "step": 589, "batch_size": 64, "mean": 2.2617812156677246, "std": 2.4941070079803467, "min": -4.407501220703125, "p10": -0.31896705627441385, "median": 1.7491378784179688, "p90": 5.33634147644043, "max": 9.177574157714844, "pos_frac": 0.859375, "sample": [0.9951496124267578, 4.171424865722656, -1.3478927612304688, 5.351387023925781, 1.9174346923828125, 1.303070068359375, 1.10760498046875, 0.6034393310546875, 1.1308135986328125, 0.6482315063476562, 1.1316070556640625, -0.10286331176757812, 6.0413055419921875, 3.6451454162597656, 4.2592926025390625, 2.4659805297851562, 1.5065345764160156, 3.213176727294922, 7.405986785888672, 4.783821105957031, 0.6969375610351562, 3.32269287109375, 3.6238021850585938, 2.172271728515625, 8.356719970703125, 0.37366485595703125, -0.7716598510742188, 2.439647674560547, 4.699125289916992, 1.627065658569336, 2.1821556091308594, 0.27608680725097656, 5.790872573852539, 3.4237213134765625, 0.029750823974609375, 1.5054855346679688, 2.112884521484375, 3.7569961547851562, 1.689361572265625, 4.773002624511719, 3.1979522705078125, 1.7083053588867188, 4.2379913330078125, 0.9232940673828125, 2.7191848754882812, 2.022125244140625, -0.41158294677734375, -1.2364444732666016, 0.11198997497558594, 1.081817626953125, -4.407501220703125, 2.4694976806640625, 8.824195861816406, -0.5498275756835938, -0.03202056884765625, 2.2842559814453125, 5.301235198974609, 1.7899703979492188, 1.0643844604492188, 0.31540679931640625, -0.535736083984375, 1.0484142303466797, 9.177574157714844, 1.3382759094238281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000589.npy"} +{"epoch": 0.890400604686319, "step": 590, "batch_size": 64, "mean": 2.1952526569366455, "std": 2.422541856765747, "min": -3.5089035034179688, "p10": -0.9939357757568358, "median": 1.935561180114746, "p90": 5.234464645385742, "max": 7.46612548828125, "pos_frac": 0.796875, "sample": [4.799995422363281, 1.837921142578125, 1.9323577880859375, 0.6325302124023438, 4.3801422119140625, 7.46612548828125, 1.83123779296875, -3.5089035034179688, 0.6507339477539062, 1.6923370361328125, 1.688079833984375, -0.10585975646972656, -0.5537490844726562, -0.175323486328125, -1.9968185424804688, 1.0418052673339844, 2.3610801696777344, 5.6423187255859375, -2.32965087890625, -1.03936767578125, 0.08563995361328125, 0.50048828125, 0.5807266235351562, 4.965423583984375, 6.546546936035156, 0.9875564575195312, 1.1782150268554688, 3.3686752319335938, 4.230926513671875, 2.6844635009765625, -0.2165355682373047, 5.276092529296875, 1.9387645721435547, 5.076416015625, 1.9233322143554688, 2.8885498046875, 4.300617218017578, 2.6567230224609375, 0.5516128540039062, 2.0015602111816406, 4.782318115234375, 2.7454051971435547, 5.8811187744140625, 3.53564453125, 4.945991516113281, 1.7012786865234375, 3.4708251953125, 1.2126312255859375, -1.2322006225585938, 2.5997772216796875, -1.7240257263183594, 3.290903091430664, 4.275735855102539, 1.3155059814453125, 1.7996673583984375, 5.432683944702148, 2.498136520385742, 2.0146026611328125, 5.137332916259766, -0.8879280090332031, 4.326450347900391, -0.3303070068359375, 7.091283798217773, -1.1594429016113281], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000590.npy"} +{"epoch": 0.891912320483749, "step": 591, "batch_size": 64, "mean": 1.7440354824066162, "std": 2.493366003036499, "min": -3.7600746154785156, "p10": -1.3911697387695312, "median": 1.57373046875, "p90": 4.977632904052735, "max": 8.152114868164062, "pos_frac": 0.71875, "sample": [-0.4445972442626953, 6.608528137207031, 2.9458274841308594, 2.938009262084961, 1.9458084106445312, 1.4264907836914062, 3.769145965576172, 2.0671539306640625, 1.5689449310302734, 2.9712066650390625, 2.01312255859375, -1.625213623046875, 2.0112152099609375, -1.0188064575195312, 1.5785160064697266, -0.29660797119140625, 2.8060302734375, 0.675201416015625, 1.305450439453125, 0.5180721282958984, 2.243621826171875, 1.4586772918701172, 6.5076904296875, -1.4482803344726562, 4.268211364746094, -2.003864288330078, 3.560302734375, 0.9609184265136719, 1.2998313903808594, 1.5302734375, -2.175811767578125, -1.2579116821289062, 4.8156280517578125, 8.152114868164062, 1.1537818908691406, 3.9886550903320312, -3.7600746154785156, 1.8504486083984375, 4.408882141113281, 0.21873092651367188, -1.1084785461425781, -0.31166839599609375, 1.510162353515625, 3.3965301513671875, -0.5972805023193359, 5.2147064208984375, -1.0752525329589844, -2.0817794799804688, -1.5899124145507812, 4.821746826171875, -0.860626220703125, -0.300048828125, 7.1014251708984375, 2.2864437103271484, 0.5964622497558594, 3.677722930908203, 5.044441223144531, 1.693613052368164, 0.8387908935546875, 3.1776962280273438, 5.24090576171875, 2.1210479736328125, -0.4280433654785156, 3.714336395263672], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000591.npy"} +{"epoch": 0.8934240362811792, "step": 592, "batch_size": 64, "mean": 1.750661849975586, "std": 2.5419812202453613, "min": -2.7463245391845703, "p10": -1.1395019531249997, "median": 1.0723457336425781, "p90": 5.266035079956056, "max": 9.08282470703125, "pos_frac": 0.78125, "sample": [1.8649520874023438, 5.4211578369140625, 4.9041290283203125, 1.5966205596923828, 2.0595970153808594, 5.63719367980957, 4.33251953125, -0.6083641052246094, 1.4233589172363281, 2.0149688720703125, -0.12926483154296875, 2.540172576904297, -0.0734405517578125, -0.7107620239257812, 4.762298583984375, 4.884490966796875, 7.428205490112305, 2.3906784057617188, 0.0519561767578125, 0.8333911895751953, -2.0172119140625, 3.5950775146484375, 1.0103530883789062, 4.741912841796875, 0.11788558959960938, 1.7895965576171875, 0.7640762329101562, 6.2872467041015625, 3.0892868041992188, 2.7288894653320312, -1.608154296875, 2.393108367919922, 0.46978759765625, 0.438629150390625, 1.8380661010742188, 0.6504364013671875, -1.694091796875, 3.984283447265625, -2.7463245391845703, -0.24749755859375, 0.05492401123046875, 4.983478546142578, 0.0136260986328125, -1.2891921997070312, -0.8903961181640625, 4.005409240722656, -1.5673675537109375, 5.3871307373046875, 7.653800964355469, -1.2462615966796875, 9.08282470703125, 0.3203773498535156, 0.3392295837402344, 0.259857177734375, 1.1812591552734375, 0.27976226806640625, 0.79632568359375, 1.1570777893066406, -0.7355995178222656, 0.8247489929199219, 0.9497623443603516, 0.04167938232421875, 1.13433837890625, 3.0963478088378906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000592.npy"} +{"epoch": 0.8949357520786092, "step": 593, "batch_size": 64, "mean": 1.640941858291626, "std": 2.4756546020507812, "min": -4.685676574707031, "p10": -1.2321443557739258, "median": 1.5195503234863281, "p90": 4.868394088745117, "max": 7.866767883300781, "pos_frac": 0.75, "sample": [2.546722412109375, 0.8058052062988281, 3.9634552001953125, -1.976959228515625, 5.66377067565918, -0.8231277465820312, 0.7998123168945312, -1.246337890625, 4.924228668212891, 1.0787582397460938, 4.9640350341796875, 0.3706169128417969, 3.8745269775390625, 3.656494140625, 1.2440338134765625, -0.7444839477539062, 1.3399200439453125, 3.0172119140625, 4.7381134033203125, 2.20501708984375, 0.9427947998046875, 2.7897377014160156, 4.272785186767578, 4.273139953613281, 3.0466575622558594, 2.439208984375, -0.4678153991699219, 3.130748748779297, 1.1588859558105469, -3.3277320861816406, 4.6129302978515625, -2.6125965118408203, 0.1750335693359375, 3.4464664459228516, 2.3735809326171875, 2.6164913177490234, -0.2288837432861328, -1.1660079956054688, 2.178802490234375, -0.14253997802734375, -0.7940406799316406, -1.199026107788086, 5.018157958984375, -3.3488540649414062, 6.819511413574219, 2.9116859436035156, 5.163299560546875, 1.1417865753173828, -1.35595703125, 7.866767883300781, 1.3983306884765625, 0.29858970642089844, 1.6407699584960938, 1.7916393280029297, 2.7136306762695312, -4.685676574707031, 0.4164447784423828, 2.925567626953125, 0.56805419921875, -0.4666175842285156, 1.187042236328125, 1.0049819946289062, 2.0236282348632812, 2.067258834838867], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000593.npy"} +{"epoch": 0.8964474678760394, "step": 594, "batch_size": 64, "mean": 1.3571163415908813, "std": 2.3823671340942383, "min": -4.0174102783203125, "p10": -1.3871463775634763, "median": 1.0640125274658203, "p90": 4.289429473876954, "max": 9.519386291503906, "pos_frac": 0.75, "sample": [0.9957160949707031, 2.1629791259765625, -1.4752464294433594, 4.0480804443359375, 3.2383384704589844, 2.566009521484375, -0.5189361572265625, 0.178070068359375, 2.187957763671875, -1.6045074462890625, 0.0340423583984375, 1.0237617492675781, -0.6002120971679688, 0.8522491455078125, 0.3684558868408203, 0.17136383056640625, -1.7311248779296875, 0.2791404724121094, 3.729339599609375, 0.11043548583984375, 2.476337432861328, 1.1729660034179688, 0.3038063049316406, 1.6110420227050781, 5.217378616333008, 1.1103591918945312, 0.6967239379882812, -2.4243240356445312, 2.691875457763672, -0.9102916717529297, 9.519386291503906, 1.1042633056640625, 4.3404083251953125, 4.736476898193359, 1.0010604858398438, -1.0834846496582031, -1.18157958984375, 1.9705390930175781, 4.019611358642578, -1.6290702819824219, 1.9343414306640625, -0.36655616760253906, 4.170478820800781, 6.585338592529297, -0.7578659057617188, 1.9409904479980469, 0.6465053558349609, 3.3495941162109375, -1.0994148254394531, 2.7669448852539062, 3.858551025390625, -4.0174102783203125, 5.8044891357421875, 4.778617858886719, 1.65728759765625, 2.122821807861328, 0.3179454803466797, 1.1368637084960938, 1.96966552734375, -0.9936370849609375, 1.40252685546875, -2.6001052856445312, 0.7916793823242188, 0.6963958740234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000594.npy"} +{"epoch": 0.8979591836734694, "step": 595, "batch_size": 64, "mean": 2.245739459991455, "std": 2.7159059047698975, "min": -5.988718032836914, "p10": -1.2538032531738281, "median": 2.3684310913085938, "p90": 5.293320846557618, "max": 10.350914001464844, "pos_frac": 0.8125, "sample": [-1.2614364624023438, 4.246612548828125, 4.294403076171875, 2.5796051025390625, 2.4511642456054688, 3.251638412475586, 3.7865676879882812, 1.8035869598388672, 0.11126327514648438, 5.395252227783203, 3.5628204345703125, 0.6060256958007812, 0.17620849609375, -2.0774478912353516, 4.756757736206055, 0.4644355773925781, 1.9239063262939453, -0.7390518188476562, 5.000556945800781, 0.2437915802001953, 6.084144592285156, 2.4704856872558594, 4.0217742919921875, 1.552398681640625, 5.05548095703125, 2.0282669067382812, 1.1063613891601562, 0.95458984375, 2.3408355712890625, 4.16937255859375, 1.07086181640625, -1.34619140625, 1.8147125244140625, -1.425933837890625, 4.359748840332031, 3.1890869140625, 6.1773529052734375, -2.289306640625, -5.988718032836914, 8.759895324707031, 10.350914001464844, 3.929647445678711, 3.860748291015625, 1.740936279296875, 1.1808090209960938, 4.241172790527344, 2.9443817138671875, 2.5124359130859375, 3.2544326782226562, 3.221454620361328, -0.5795936584472656, 3.1734237670898438, 0.19268035888671875, 1.2478103637695312, 1.34130859375, 2.2254638671875, 5.7090606689453125, 2.396026611328125, -1.235992431640625, -0.45394134521484375, -0.1467571258544922, -1.766998291015625, 3.732667922973633, 5.973356246948242], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000595.npy"} +{"epoch": 0.8994708994708994, "step": 596, "batch_size": 64, "mean": 1.1845312118530273, "std": 2.440032482147217, "min": -3.501129150390625, "p10": -1.5215845108032224, "median": 0.7052984237670898, "p90": 5.085631179809571, "max": 7.773429870605469, "pos_frac": 0.65625, "sample": [-1.8226852416992188, 2.3249740600585938, 6.415168762207031, 1.2416458129882812, 1.6545219421386719, -0.9126739501953125, -0.05853271484375, 0.3267669677734375, -0.2255096435546875, 5.785682678222656, -1.5782356262207031, 1.1453704833984375, -2.45989990234375, 0.23207855224609375, 4.880523681640625, 5.83741569519043, 2.5637588500976562, 0.4691944122314453, -1.6498908996582031, 0.9367198944091797, -0.7685089111328125, 1.9632110595703125, 2.05078125, 1.940908432006836, 7.380685806274414, 2.904956817626953, 1.5798263549804688, -1.0772857666015625, -0.24184036254882812, -0.43212890625, 0.9662971496582031, 0.4832305908203125, 0.9718818664550781, 0.12469291687011719, 0.5656356811523438, -0.9566154479980469, -1.7225723266601562, 7.773429870605469, 0.248809814453125, -3.2239913940429688, 2.1244163513183594, 2.3864669799804688, -0.3704032897949219, -3.501129150390625, 0.21651458740234375, 1.9891014099121094, -0.012775421142578125, -1.346343994140625, 6.017097473144531, 2.7391815185546875, 3.2854061126708984, 0.582611083984375, -0.6044387817382812, 1.6299209594726562, 0.6993484497070312, 5.173534393310547, -0.5932846069335938, 2.9128856658935547, -1.3893985748291016, 0.7112483978271484, 2.9241676330566406, 3.3916473388671875, 1.5976409912109375, -0.3912200927734375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000596.npy"} +{"epoch": 0.9009826152683296, "step": 597, "batch_size": 64, "mean": 1.5828520059585571, "std": 2.3161141872406006, "min": -3.1194610595703125, "p10": -0.9182456970214842, "median": 1.0954093933105469, "p90": 4.216416549682618, "max": 10.345619201660156, "pos_frac": 0.78125, "sample": [-0.16745567321777344, -2.812458038330078, 0.6663227081298828, 0.1320648193359375, 3.6440277099609375, 1.6425933837890625, 1.5360832214355469, -1.1585769653320312, -0.7357101440429688, 3.9796180725097656, -0.3820457458496094, 4.317901611328125, 0.8979053497314453, 0.6514205932617188, -0.6331939697265625, 1.297780990600586, 6.664268493652344, 5.622264862060547, 2.2695770263671875, -1.1606330871582031, 2.7586746215820312, 1.2079849243164062, -3.1194610595703125, -0.7265548706054688, 3.306743621826172, 3.391500473022461, 1.1285877227783203, 0.41831398010253906, 3.5670242309570312, -1.0503730773925781, 1.26165771484375, 1.833160400390625, 2.8619003295898438, 2.5328140258789062, 2.8747425079345703, 2.0538291931152344, 0.81927490234375, 1.04815673828125, 1.8475780487060547, 3.833770751953125, 0.15746307373046875, -0.35036468505859375, 1.0524215698242188, -0.1829204559326172, 0.18910789489746094, 0.5713672637939453, 0.2764110565185547, 0.391021728515625, 5.745948791503906, 0.9322357177734375, 10.345619201660156, 1.0622310638427734, 2.2708969116210938, -0.9964752197265625, -1.6603317260742188, 2.9394378662109375, 0.10126686096191406, 1.0032501220703125, 5.97235107421875, 3.74176025390625, 0.7809638977050781, 4.868171691894531, 2.7833404541015625, 1.1862716674804688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000597.npy"} +{"epoch": 0.9024943310657596, "step": 598, "batch_size": 64, "mean": 1.2671819925308228, "std": 2.488030195236206, "min": -2.7133216857910156, "p10": -1.7211652755737303, "median": 0.9549674987792969, "p90": 4.3870185852050785, "max": 8.114439010620117, "pos_frac": 0.65625, "sample": [-0.072784423828125, -1.6714859008789062, 4.470878601074219, 2.2422332763671875, 2.3050994873046875, -0.036685943603515625, 1.006317138671875, 4.161746978759766, 1.3052558898925781, -1.8034133911132812, -0.5660362243652344, 1.1133842468261719, 0.5913009643554688, -1.7424564361572266, 7.078895568847656, -2.7133216857910156, 7.724529266357422, -1.483856201171875, 3.5893898010253906, 1.5460853576660156, -0.5399627685546875, -2.661256790161133, -2.318145751953125, 2.7158584594726562, 0.10970306396484375, 0.5872001647949219, 1.5512962341308594, 3.2319107055664062, -1.6318435668945312, 0.2831916809082031, -0.45944786071777344, 0.8897247314453125, 2.1908340454101562, 1.1263580322265625, 4.83929443359375, -0.5920619964599609, 4.010261535644531, -0.7701683044433594, 4.006278991699219, 1.0551605224609375, 3.0622406005859375, 2.910858154296875, -0.11734962463378906, 8.114439010620117, -0.700408935546875, 0.7299461364746094, 1.6046600341796875, -0.21713829040527344, 3.1982288360595703, 2.41876220703125, 0.151947021484375, -2.4450531005859375, 0.9036178588867188, 1.2448883056640625, 0.28606414794921875, -1.8076095581054688, 5.2999267578125, 1.7115936279296875, -0.8873825073242188, 0.24204254150390625, 4.19134521484375, 5.1297454833984375, -1.569793701171875, 2.9748153686523438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000598.npy"} +{"epoch": 0.9040060468631897, "step": 599, "batch_size": 64, "mean": 1.4130332469940186, "std": 2.600252866744995, "min": -3.6762771606445312, "p10": -1.0839309692382812, "median": 0.7775173187255859, "p90": 4.569401359558106, "max": 10.462677001953125, "pos_frac": 0.734375, "sample": [-1.035980224609375, 0.41016387939453125, 0.7606658935546875, 0.13552093505859375, -1.0463638305664062, 1.9365081787109375, -1.5712413787841797, 5.187103271484375, -0.34929656982421875, 0.839508056640625, -1.8505935668945312, -0.9110088348388672, 2.27362060546875, 2.4825801849365234, 3.6378097534179688, -0.3586578369140625, -3.324127197265625, 1.019510269165039, 1.3503456115722656, 0.1385955810546875, 5.894079208374023, 1.4716663360595703, 0.15471649169921875, -3.6762771606445312, -0.21217727661132812, -1.0940704345703125, 0.13409423828125, 5.452558517456055, -0.11545562744140625, 0.48834228515625, 5.5433349609375, 2.8445167541503906, -1.060272216796875, -2.5730514526367188, 0.8258399963378906, 0.24650192260742188, 0.29969215393066406, 0.7282485961914062, 0.04836273193359375, 4.5901947021484375, 4.301239013671875, 0.45708465576171875, 0.07387924194335938, 4.520883560180664, 3.8509044647216797, 4.219818115234375, 3.0762100219726562, -0.5966224670410156, 0.11854934692382812, 0.7943687438964844, 10.462677001953125, 2.554840087890625, 3.0647811889648438, 0.9686622619628906, 4.3956451416015625, -0.5003223419189453, 8.188690185546875, 0.6718978881835938, 2.9008750915527344, 1.418487548828125, -1.5934600830078125, 2.708017349243164, 3.6025238037109375, 1.0589866638183594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000599.npy"} +{"epoch": 0.9055177626606198, "step": 600, "batch_size": 64, "mean": 1.3058440685272217, "std": 2.2593133449554443, "min": -3.634632110595703, "p10": -1.140140914916992, "median": 0.928802490234375, "p90": 4.053752899169922, "max": 7.8099517822265625, "pos_frac": 0.734375, "sample": [3.5977935791015625, 0.09657096862792969, -1.2090034484863281, 1.127532958984375, 3.7130661010742188, 1.6683540344238281, 1.9928398132324219, -0.8151531219482422, 5.090991973876953, 4.063926696777344, -0.7621803283691406, 2.6473045349121094, 3.4821319580078125, 4.7033843994140625, -0.9248466491699219, -0.5288944244384766, -0.979461669921875, -0.6389789581298828, -2.0525054931640625, 4.0300140380859375, 0.14718246459960938, 2.3936691284179688, 4.35272216796875, 0.33774566650390625, -1.6366348266601562, 2.0482177734375, 0.6852874755859375, 3.345203399658203, 0.09204864501953125, 3.2678756713867188, 2.8561477661132812, 0.6481819152832031, -3.634632110595703, -0.5885162353515625, 0.5572052001953125, -2.416675567626953, 0.1943359375, 1.0661239624023438, 0.6205368041992188, 3.370391845703125, 0.12957763671875, 0.9775390625, 2.3196792602539062, 1.5473403930664062, -0.3874359130859375, 3.2616519927978516, -1.2104949951171875, -0.9374122619628906, -3.2213058471679688, 4.279491424560547, 0.3243236541748047, 0.88006591796875, 0.8181705474853516, 1.2196121215820312, 1.3349189758300781, 2.1222381591796875, 3.7874679565429688, 0.6417274475097656, 1.7294197082519531, 6.970813751220703, -0.23906707763671875, 0.40351295471191406, 3.0029296875, 7.8099517822265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000600.npy"} +{"epoch": 0.9070294784580499, "step": 601, "batch_size": 64, "mean": 1.7845829725265503, "std": 2.5807249546051025, "min": -4.307838439941406, "p10": -1.3431289672851563, "median": 1.5797290802001953, "p90": 4.808769607543946, "max": 9.050071716308594, "pos_frac": 0.734375, "sample": [3.597320556640625, 0.8024711608886719, 3.1126937866210938, -2.0501155853271484, 3.01104736328125, 4.738452911376953, 8.483718872070312, 0.938140869140625, 1.0586318969726562, -2.256988525390625, 9.050071716308594, -4.307838439941406, 5.468223571777344, 1.6488533020019531, 0.7670326232910156, 1.5106048583984375, 4.444879531860352, 3.0506744384765625, 0.21011734008789062, 1.949554443359375, 4.444141387939453, 0.5076007843017578, 0.5350189208984375, 6.1591796875, 6.086391448974609, -0.38425445556640625, -0.39321136474609375, -0.3099040985107422, 2.712066650390625, -1.3139801025390625, -0.1620941162109375, 2.8347110748291016, 0.34332275390625, -0.11224746704101562, 0.02838897705078125, 4.23133659362793, 2.2375106811523438, 2.5584487915039062, 1.9082794189453125, 1.9183216094970703, -1.355621337890625, 2.4763031005859375, 0.22847747802734375, 4.022163391113281, 0.143585205078125, 1.0224132537841797, 2.1014938354492188, -0.26065826416015625, -1.413503646850586, 1.0264816284179688, 1.3411712646484375, 4.465911865234375, -0.3294200897216797, -0.12430572509765625, 2.370281219482422, 2.4262466430664062, 6.0205078125, 2.735471725463867, 3.3705062866210938, -0.23360633850097656, -1.82342529296875, 4.455486297607422, 4.838905334472656, -2.3481292724609375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000601.npy"} +{"epoch": 0.90854119425548, "step": 602, "batch_size": 64, "mean": 1.81719970703125, "std": 2.2819876670837402, "min": -2.49334716796875, "p10": -0.5773433685302735, "median": 1.4883480072021484, "p90": 4.817351150512695, "max": 7.370506286621094, "pos_frac": 0.765625, "sample": [-1.9966812133789062, 4.760498046875, 3.0251731872558594, 5.346588134765625, 3.0183868408203125, 2.5974578857421875, 1.10040283203125, 0.3791389465332031, 3.65045166015625, 1.4781074523925781, 4.638702392578125, -1.38916015625, 3.1784820556640625, 5.81640625, -1.6429634094238281, 7.370506286621094, -2.49334716796875, 2.6874237060546875, 0.1311798095703125, 1.2216529846191406, 2.0106964111328125, -0.5052032470703125, -0.08649444580078125, -0.5811805725097656, 0.2204132080078125, 3.4907073974609375, 0.4530982971191406, 2.7894210815429688, 1.3465709686279297, 1.640237808227539, -0.08962631225585938, 0.7323246002197266, 2.4781265258789062, 1.05010986328125, 1.3638038635253906, -0.4323310852050781, -0.01375579833984375, 1.3508720397949219, 2.6615962982177734, -0.568389892578125, 0.09101486206054688, 0.389068603515625, 2.8105850219726562, 4.841716766357422, -1.5872344970703125, -2.1610488891601562, 3.7028846740722656, 1.7617759704589844, 2.680828094482422, 0.7003631591796875, -0.13116455078125, 1.4985885620117188, 7.2804718017578125, 4.6999359130859375, 0.30364227294921875, 4.036783218383789, 3.3662757873535156, 1.748931884765625, 6.432472229003906, 5.506172180175781, -0.26131439208984375, 3.436687469482422, 1.9497795104980469, 1.01416015625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000602.npy"} +{"epoch": 0.91005291005291, "step": 603, "batch_size": 64, "mean": 1.6207441091537476, "std": 2.038555383682251, "min": -3.0191421508789062, "p10": -0.8076381683349608, "median": 1.233738899230957, "p90": 4.421344375610352, "max": 7.08502197265625, "pos_frac": 0.828125, "sample": [2.03338623046875, 4.031135559082031, 3.5903472900390625, 0.3865509033203125, 0.47017669677734375, 2.2835693359375, 1.0073089599609375, 3.765380859375, -0.8629074096679688, -1.7696380615234375, 2.8611068725585938, 0.4713134765625, 4.463878631591797, -0.030122756958007812, 0.40612030029296875, 6.5713043212890625, -1.3532562255859375, 0.37456512451171875, 1.450408935546875, 7.08502197265625, 3.011821746826172, 2.2792510986328125, 4.93876838684082, 1.84246826171875, 0.05408477783203125, 0.1357421875, 2.529054641723633, 0.0494842529296875, 2.007213592529297, -2.2613449096679688, 1.1383285522460938, 1.2360687255859375, 1.0380592346191406, -0.6786766052246094, -1.6120929718017578, 3.0459823608398438, 2.253864288330078, 1.13018798828125, 0.270263671875, 4.3220977783203125, 1.2314090728759766, 1.979278564453125, 0.21591949462890625, 1.7736873626708984, 1.1828536987304688, 3.7964630126953125, 1.8574905395507812, -0.032299041748046875, 0.8122520446777344, -0.07194900512695312, -1.1153564453125, 0.6659412384033203, 2.528554916381836, 2.6921005249023438, 3.3488197326660156, 0.6675796508789062, 2.326030731201172, 0.8328475952148438, 3.0763092041015625, 4.552032470703125, 5.209264755249023, 0.46222686767578125, 4.789031982421875, -3.0191421508789062], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000603.npy"} +{"epoch": 0.9115646258503401, "step": 604, "batch_size": 64, "mean": 1.7355563640594482, "std": 2.5761826038360596, "min": -4.495964050292969, "p10": -0.7110038757324219, "median": 1.3112316131591797, "p90": 5.901288032531739, "max": 8.80718994140625, "pos_frac": 0.796875, "sample": [1.2592926025390625, 3.527618408203125, -0.6956710815429688, -0.5144119262695312, 2.199779510498047, 1.699789047241211, 1.3347587585449219, 0.49776458740234375, 5.979957580566406, -0.7175750732421875, 0.2281341552734375, -0.24731063842773438, 0.35407257080078125, 3.607574462890625, 0.9025115966796875, 1.462799072265625, 2.6897659301757812, 0.6259517669677734, 2.1024246215820312, 3.4717330932617188, -4.317527770996094, 1.1075439453125, 1.2877044677734375, -1.3371353149414062, 1.273101806640625, 1.0169868469238281, 0.577911376953125, 1.7685317993164062, 1.8561859130859375, -4.495964050292969, 1.4108734130859375, 0.43491363525390625, 0.7895660400390625, -1.998321533203125, 7.0454864501953125, -1.8011550903320312, -0.027252197265625, 6.2752685546875, 6.0785675048828125, 3.3684310913085938, 3.2279224395751953, 5.71772575378418, 7.5148773193359375, 0.8773269653320312, 1.0298690795898438, 2.8122406005859375, 1.7713775634765625, 2.8141708374023438, 8.80718994140625, -0.3600578308105469, 5.9833831787109375, 2.3676300048828125, 0.3474578857421875, 0.01906585693359375, 2.62506103515625, 3.3105926513671875, 2.655284881591797, -0.2803916931152344, 0.94903564453125, 4.6611175537109375, 3.580160140991211, 0.81695556640625, -2.8875732421875, 2.63250732421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000604.npy"} +{"epoch": 0.9130763416477702, "step": 605, "batch_size": 64, "mean": 1.7350056171417236, "std": 2.4448306560516357, "min": -3.5258560180664062, "p10": -1.594474792480468, "median": 1.5264091491699219, "p90": 4.838239860534668, "max": 9.186737060546875, "pos_frac": 0.78125, "sample": [1.6595458984375, 0.0672149658203125, -0.8300418853759766, 9.186737060546875, 3.1705551147460938, 3.655529022216797, 0.2076416015625, 0.9400558471679688, 3.8091506958007812, -2.0400142669677734, 5.812858581542969, -1.8417816162109375, 2.446502685546875, -0.12261199951171875, 0.04541778564453125, -0.8172454833984375, 0.7419662475585938, 3.6783065795898438, 2.1182212829589844, 1.3583793640136719, 0.22306060791015625, -0.2711448669433594, 1.4126033782958984, -2.6548385620117188, -0.258056640625, 1.3510780334472656, 3.8641204833984375, 1.5429611206054688, 0.5831813812255859, 2.6421127319335938, 1.8056182861328125, 1.9064788818359375, -1.9315338134765625, 4.3587188720703125, -0.1084442138671875, 3.8876266479492188, 4.7767791748046875, 4.035858154296875, 1.8005523681640625, 5.5311737060546875, 2.7960357666015625, 1.4830093383789062, 1.35321044921875, 2.8609371185302734, 4.690399169921875, 1.509857177734375, 3.224945068359375, 4.864580154418945, 1.8436050415039062, -1.017425537109375, 6.1136627197265625, 4.964176177978516, 1.7898826599121094, 0.1916046142578125, -2.5608673095703125, 1.3554191589355469, 0.8986320495605469, -3.5258560180664062, 2.870025634765625, 0.20746231079101562, 0.847991943359375, 5.7557830810546875, -2.3080978393554688, 3.08709716796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000605.npy"} +{"epoch": 0.9145880574452003, "step": 606, "batch_size": 64, "mean": 1.421295404434204, "std": 2.208028554916382, "min": -2.360321044921875, "p10": -1.1186847686767576, "median": 1.2622528076171875, "p90": 3.8278612136840824, "max": 7.882659912109375, "pos_frac": 0.71875, "sample": [-0.8065681457519531, 2.890186309814453, 0.09592437744140625, 3.674835205078125, 0.44774627685546875, 0.4673118591308594, 5.0483551025390625, -2.360321044921875, -0.6449127197265625, 3.869089126586914, 5.039194107055664, -2.1758270263671875, 3.2210464477539062, 2.5410308837890625, 3.51092529296875, -1.7920303344726562, 1.8260478973388672, 0.95159912109375, -0.048614501953125, 2.0257835388183594, 7.882659912109375, -2.12652587890625, 0.4807701110839844, 1.8956985473632812, -0.96612548828125, -0.9336471557617188, 2.67950439453125, 1.1851119995117188, 1.3530654907226562, 2.286163330078125, 0.0211181640625, 0.4735984802246094, -1.5557708740234375, 1.9115314483642578, 0.034454345703125, 0.5605487823486328, 0.9513473510742188, -1.3932971954345703, 2.3371734619140625, 3.4412002563476562, 3.247875213623047, 5.300621032714844, 1.3393936157226562, 3.7316627502441406, 1.4123687744140625, 6.84320068359375, 0.02880859375, 3.2409210205078125, -0.5316848754882812, -0.9733009338378906, 2.396099090576172, -0.6501235961914062, -0.44162750244140625, 1.9746475219726562, -0.4710845947265625, 2.9408836364746094, 0.4747314453125, -1.1809921264648438, 1.1123428344726562, 4.939765930175781, 2.8871612548828125, 3.4329910278320312, 2.1583480834960938, -0.5494842529296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000606.npy"} +{"epoch": 0.9160997732426304, "step": 607, "batch_size": 64, "mean": 1.8611321449279785, "std": 2.5177743434906006, "min": -4.2893829345703125, "p10": -1.1663368225097657, "median": 1.6955289840698242, "p90": 4.134309768676758, "max": 9.644378662109375, "pos_frac": 0.75, "sample": [1.7316722869873047, 1.37542724609375, -1.1638641357421875, 1.3170318603515625, 2.745574951171875, 2.386058807373047, 3.875812530517578, 4.0544281005859375, -4.2893829345703125, -0.2673225402832031, 0.9821434020996094, 3.5226364135742188, 9.154937744140625, 5.042091369628906, 1.4583511352539062, 0.5438232421875, -1.5208854675292969, 1.8289852142333984, 2.9627761840820312, 6.10906982421875, 0.50445556640625, 2.8529701232910156, 2.8741226196289062, 4.000885009765625, 3.2668838500976562, 2.9155120849609375, 1.5643386840820312, 0.8718643188476562, -1.146148681640625, -1.1673965454101562, 1.6593856811523438, 4.168544769287109, 3.9887542724609375, 0.9244537353515625, -0.7671527862548828, 5.413673400878906, 3.8370513916015625, -0.5680160522460938, 3.7927703857421875, -0.2094268798828125, -0.8685379028320312, 1.1383819580078125, 1.05389404296875, 2.6124343872070312, 1.2565879821777344, -1.8260955810546875, 0.5347480773925781, 3.859905242919922, 9.644378662109375, 4.348134994506836, -0.2617835998535156, -1.3866195678710938, 3.44976806640625, 3.9366989135742188, 0.9107017517089844, 3.065685272216797, 0.33139991760253906, -0.48431396484375, 2.8473434448242188, 4.006797790527344, 2.528533935546875, -1.8563728332519531, -2.3513145446777344, 1.9952125549316406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000607.npy"} +{"epoch": 0.9176114890400605, "step": 608, "batch_size": 64, "mean": 1.2424057722091675, "std": 2.1615142822265625, "min": -3.999847412109375, "p10": -1.3277061462402342, "median": 1.26171875, "p90": 4.192100143432619, "max": 7.254611968994141, "pos_frac": 0.703125, "sample": [2.6116466522216797, -0.3676910400390625, 0.38321495056152344, 2.92578125, 0.2657966613769531, 3.497772216796875, 3.355224609375, -0.962493896484375, 1.8271141052246094, -1.6863937377929688, 1.5930328369140625, 0.7206039428710938, 3.196044921875, 2.3569889068603516, 0.5881805419921875, 2.7217140197753906, 4.416511535644531, -2.576629638671875, -1.3723793029785156, -0.0933380126953125, 1.729644775390625, -1.2234687805175781, -0.16321372985839844, 7.254611968994141, 1.3947677612304688, 0.15921401977539062, -0.9917449951171875, 1.8247737884521484, 0.36881065368652344, 4.320453643798828, -0.553314208984375, -2.4312667846679688, -1.670623779296875, 2.0020065307617188, -0.935302734375, 3.892608642578125, 1.6831512451171875, 0.23749542236328125, 1.51153564453125, -0.004291534423828125, 0.9810638427734375, 1.5688247680664062, 0.6326980590820312, -0.3895606994628906, -2.002532958984375, 3.1675262451171875, 0.40110015869140625, 5.26422119140625, 4.5806884765625, 1.6354255676269531, 1.9046630859375, 1.5947723388671875, 1.4834518432617188, 1.55047607421875, 0.17586898803710938, 3.8329620361328125, -3.999847412109375, 5.689785003662109, 0.5314178466796875, -0.2499542236328125, -0.04664421081542969, 3.3783035278320312, 1.1286697387695312, 4.89404296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000608.npy"} +{"epoch": 0.9191232048374905, "step": 609, "batch_size": 64, "mean": 1.9679713249206543, "std": 2.4001636505126953, "min": -3.631134033203125, "p10": -0.8183641433715816, "median": 1.6169204711914062, "p90": 4.664370727539063, "max": 8.418746948242188, "pos_frac": 0.828125, "sample": [4.4468231201171875, 1.5719146728515625, 2.9753265380859375, 1.4053115844726562, 3.7816009521484375, 0.3582305908203125, 2.6038856506347656, 2.9163055419921875, -0.30527687072753906, -0.9914360046386719, -1.1046218872070312, 1.6706733703613281, 0.11890411376953125, 1.111318588256836, 8.418746948242188, 3.6151885986328125, 5.180931091308594, 3.2703170776367188, 0.9950141906738281, 6.257808685302734, -3.631134033203125, 4.67437744140625, 1.66192626953125, 4.641021728515625, 0.4005603790283203, 2.50323486328125, 1.6744098663330078, 2.4486236572265625, -1.938690185546875, 0.3651885986328125, 6.910667419433594, 0.043338775634765625, 6.586187362670898, 3.7483749389648438, 1.916717529296875, 3.7434654235839844, 0.5380516052246094, -0.3213005065917969, 7.2056884765625, 2.7349395751953125, 0.5954437255859375, -1.7744636535644531, 0.9958877563476562, 4.165351867675781, 2.994813919067383, -2.7265167236328125, 1.3763103485107422, 0.7743606567382812, 3.6037521362304688, -0.41452980041503906, 0.381744384765625, 0.17249679565429688, 1.2896347045898438, 1.3666210174560547, 3.9978866577148438, 1.1259918212890625, 3.263050079345703, 4.515754699707031, 2.5091018676757812, -1.2923202514648438, 2.905975341796875, 1.1527881622314453, -0.226531982421875, 0.9949493408203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000609.npy"} +{"epoch": 0.9206349206349206, "step": 610, "batch_size": 64, "mean": 1.7052839994430542, "std": 3.0670664310455322, "min": -5.735260009765625, "p10": -1.562880706787109, "median": 1.2258930206298828, "p90": 5.5187070846557615, "max": 10.69036865234375, "pos_frac": 0.71875, "sample": [1.646331787109375, 7.5331268310546875, 2.5596847534179688, 5.5333709716796875, 0.3155670166015625, 4.515621185302734, 0.8171005249023438, 1.1020336151123047, -2.3956146240234375, 1.1328659057617188, 1.3409652709960938, -1.9368743896484375, 1.3189201354980469, 9.307220458984375, -0.338043212890625, 0.9116897583007812, 5.484491348266602, -0.3580169677734375, -5.735260009765625, -0.536895751953125, -0.03412628173828125, -0.571319580078125, 2.66754150390625, 1.6368942260742188, -1.2557296752929688, -1.6483383178710938, -1.8484039306640625, 8.941314697265625, 3.028614044189453, 4.8177947998046875, 0.3577117919921875, 8.38372802734375, 1.66632080078125, 2.4060134887695312, 0.6539230346679688, 0.9703330993652344, 5.116779327392578, 0.2875709533691406, 1.4370269775390625, 0.8641357421875, 2.2477951049804688, 1.7001419067382812, 2.01776123046875, -2.23553466796875, 3.5044326782226562, -1.2702064514160156, -0.73455810546875, 0.022426605224609375, 3.128683090209961, 4.305538177490234, -0.618499755859375, 2.073871612548828, 2.4777679443359375, 0.4907493591308594, -0.7395496368408203, 0.03724861145019531, -2.18505859375, 2.2021484375, 10.69036865234375, 0.21368026733398438, 5.052101135253906, -1.3634796142578125, 6.17730712890625, 1.8469696044921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000610.npy"} +{"epoch": 0.9221466364323507, "step": 611, "batch_size": 64, "mean": 1.2458233833312988, "std": 2.2708494663238525, "min": -3.2225723266601562, "p10": -1.317302131652832, "median": 1.3377714157104492, "p90": 3.4789573669433596, "max": 10.174034118652344, "pos_frac": 0.703125, "sample": [3.4774246215820312, 2.5474510192871094, -1.72607421875, 1.7708740234375, 4.945648193359375, 0.7746353149414062, 0.531219482421875, 2.954345703125, -0.7879009246826172, 1.257904052734375, -2.2292232513427734, 2.3517913818359375, 1.347188949584961, 0.5960426330566406, -0.7627792358398438, -0.18135643005371094, -1.2802906036376953, 2.8899078369140625, -3.013864517211914, 10.174034118652344, 1.4087677001953125, 2.6869277954101562, -0.1469879150390625, -0.7820358276367188, -1.3331642150878906, -0.23587989807128906, 2.4346389770507812, 0.09758377075195312, 0.7580795288085938, 3.4796142578125, 1.1547794342041016, -1.8286972045898438, -0.40076446533203125, 4.567024230957031, 1.8393630981445312, 1.3283538818359375, -2.0652618408203125, 7.3466949462890625, 2.39141845703125, 1.9713554382324219, 2.2866363525390625, 0.3474006652832031, 0.6417083740234375, 2.1013336181640625, -0.5134544372558594, 1.5820999145507812, 1.3973007202148438, 2.6041259765625, 2.268758773803711, 1.769378662109375, 1.6347274780273438, 4.832796096801758, 1.237701416015625, 2.5932540893554688, 1.5425453186035156, 1.4949569702148438, 4.146772384643555, -3.2225723266601562, 0.9005508422851562, 0.42873382568359375, -1.1960124969482422, -0.7989044189453125, -0.5818729400634766, 1.9259490966796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000611.npy"} +{"epoch": 0.9236583522297808, "step": 612, "batch_size": 64, "mean": 1.981992244720459, "std": 2.7112112045288086, "min": -4.785747528076172, "p10": -0.7328348159790038, "median": 1.6223030090332031, "p90": 6.052956008911133, "max": 9.682121276855469, "pos_frac": 0.8125, "sample": [0.138580322265625, -2.9854278564453125, 2.800933837890625, -0.9883804321289062, 0.70086669921875, -0.47740936279296875, 3.3029136657714844, 2.97247314453125, 0.11970138549804688, 0.3863677978515625, 3.7517471313476562, 6.8604736328125, 3.3249969482421875, 0.0655670166015625, 2.2940292358398438, 3.0104522705078125, 1.6184654235839844, 0.4957733154296875, 0.6382064819335938, 2.3018951416015625, 1.7728652954101562, -3.473102569580078, 0.9561309814453125, 0.86907958984375, -0.7599029541015625, 1.5048713684082031, 3.9556808471679688, -1.1102561950683594, -0.4263267517089844, 0.39404296875, 2.3680801391601562, 6.033535003662109, 0.132232666015625, 0.4243030548095703, -0.6696758270263672, 5.041313171386719, 1.2406692504882812, 3.9632339477539062, 6.5974884033203125, -2.7679290771484375, 6.061279296875, 0.8490104675292969, 5.9147491455078125, 1.29901123046875, 6.204254150390625, 6.5595245361328125, 2.4729270935058594, -0.17189407348632812, 2.4435195922851562, 9.682121276855469, 1.7788543701171875, 1.407806396484375, 6.916046142578125, 3.3549957275390625, 2.8280200958251953, 3.768035888671875, 2.3324737548828125, -0.37237548828125, 1.254119873046875, -4.785747528076172, 2.929941177368164, 4.929435729980469, 1.1867027282714844, 1.6261405944824219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000612.npy"} +{"epoch": 0.9251700680272109, "step": 613, "batch_size": 64, "mean": 2.2652783393859863, "std": 2.803166389465332, "min": -4.7428131103515625, "p10": -0.43674869537353506, "median": 1.807241439819336, "p90": 6.251707267761233, "max": 9.782066345214844, "pos_frac": 0.828125, "sample": [-0.25942230224609375, 2.0403594970703125, 0.6661148071289062, 4.231658935546875, 2.3910694122314453, 3.2475662231445312, 2.4988250732421875, 0.8943939208984375, -2.0129852294921875, -0.3562488555908203, 2.400899887084961, 1.6435165405273438, -3.8399581909179688, 2.396270751953125, 2.1722793579101562, 5.260856628417969, 0.6442451477050781, 1.3015594482421875, 4.1176605224609375, -0.0612030029296875, 0.20278358459472656, -0.20583724975585938, -2.16046142578125, 0.9876594543457031, 5.660287857055664, 1.7719287872314453, 6.603265762329102, 1.4349441528320312, 0.11904144287109375, 1.4655609130859375, 4.914222717285156, 3.1208553314208984, 2.0382461547851562, 0.2755603790283203, 1.0125045776367188, 1.3664093017578125, -0.5110092163085938, 8.257553100585938, 3.616178512573242, 9.782066345214844, 5.178098678588867, 3.6427459716796875, 1.1014404296875, 7.007537841796875, 8.114280700683594, 0.7873020172119141, 5.287208557128906, 3.3383846282958984, -0.4712486267089844, 2.876537322998047, 1.664520263671875, 0.708251953125, 0.9357872009277344, 6.5051727294921875, 2.9220733642578125, 1.8425540924072266, 2.207063674926758, 4.794990539550781, 0.8287258148193359, 8.446981430053711, 0.6428756713867188, 2.8059654235839844, -0.5758438110351562, -4.7428131103515625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000613.npy"} +{"epoch": 0.926681783824641, "step": 614, "batch_size": 64, "mean": 1.701621174812317, "std": 1.9823886156082153, "min": -4.085742950439453, "p10": -0.4411552429199217, "median": 1.3833045959472656, "p90": 3.893332672119141, "max": 9.148681640625, "pos_frac": 0.84375, "sample": [4.267208099365234, 3.750080108642578, 1.0533180236816406, 1.1692581176757812, -0.5085906982421875, 2.7933883666992188, 1.799041748046875, 1.2472801208496094, 1.0445365905761719, 1.5193290710449219, 3.0343170166015625, 2.350515365600586, 5.3321533203125, 2.654376983642578, 2.6520233154296875, 1.028085708618164, 1.7254753112792969, 3.8336410522460938, 4.5426177978515625, 3.99981689453125, 1.7997398376464844, 1.0472946166992188, -1.1145782470703125, -1.076080322265625, 9.148681640625, -0.141448974609375, 0.7408676147460938, -2.041412353515625, 2.3478050231933594, 2.6759490966796875, 0.42942047119140625, 1.1664085388183594, 0.3317413330078125, 3.661752700805664, 0.36150169372558594, 1.1548004150390625, 4.222450256347656, 0.675933837890625, -4.085742950439453, 2.4782791137695312, -0.2391338348388672, 3.8708648681640625, 2.9636459350585938, 3.4740047454833984, 1.7227554321289062, 1.796884536743164, 1.0667228698730469, 1.1718902587890625, 1.1489486694335938, -0.5847244262695312, 1.2241497039794922, 1.0842132568359375, -1.4048080444335938, -0.28380584716796875, 0.16299057006835938, 3.7768173217773438, 3.9029617309570312, 1.7232437133789062, 2.215240478515625, 0.480194091796875, 0.45198631286621094, 2.6088638305664062, 0.0217132568359375, 3.4769020080566406], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000614.npy"} +{"epoch": 0.9281934996220711, "step": 615, "batch_size": 64, "mean": 1.8358616828918457, "std": 2.6286206245422363, "min": -3.7326812744140625, "p10": -1.2302745819091796, "median": 1.6270217895507812, "p90": 5.092540550231933, "max": 9.6796875, "pos_frac": 0.703125, "sample": [-0.13689422607421875, 2.0813217163085938, 2.5113048553466797, -0.012531280517578125, 1.7382354736328125, 3.911008834838867, 1.2191085815429688, -2.5667800903320312, 5.038057327270508, 0.9729690551757812, -0.22449493408203125, 2.0311965942382812, 0.1478271484375, 2.831817626953125, 1.0187759399414062, -1.2502021789550781, 1.5155715942382812, 6.029457092285156, 4.1437530517578125, -0.1908416748046875, 3.2690258026123047, -0.0014190673828125, 7.017936706542969, -0.8211994171142578, 2.721769332885742, 1.7437591552734375, -1.3076248168945312, 2.8212432861328125, 2.1384124755859375, 4.978237152099609, 4.044273376464844, 0.4421825408935547, -1.6761016845703125, -1.8608970642089844, 2.6590957641601562, 3.5740013122558594, 5.343780517578125, 2.3201446533203125, -0.8491363525390625, 3.165496826171875, 5.354835510253906, 8.443748474121094, 4.026645660400391, 2.3395843505859375, 1.9556961059570312, 0.9660797119140625, 1.51580810546875, 1.4292144775390625, -0.766632080078125, 4.2861328125, 3.021514892578125, 9.6796875, -1.18377685546875, 0.23488235473632812, -0.840545654296875, 4.759368896484375, -0.09806060791015625, 1.2581405639648438, 5.1158905029296875, 0.6787738800048828, 1.4467697143554688, -0.9443588256835938, -3.7326812744140625, -1.98321533203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000615.npy"} +{"epoch": 0.9297052154195011, "step": 616, "batch_size": 64, "mean": 1.365952491760254, "std": 2.3698673248291016, "min": -3.3258800506591797, "p10": -0.986444091796875, "median": 1.099233627319336, "p90": 4.223157119750977, "max": 7.467742919921875, "pos_frac": 0.671875, "sample": [3.8640594482421875, 0.3617134094238281, -0.06951904296875, -0.9910812377929688, 3.085399627685547, -0.5513019561767578, 7.40216064453125, 1.8767242431640625, -0.8619499206542969, 1.599386215209961, 1.4440193176269531, 0.9761581420898438, 0.0736541748046875, -3.128448486328125, -0.16900634765625, -3.3258800506591797, 0.5773773193359375, -0.8260231018066406, 0.16328048706054688, 7.467742919921875, 0.32801055908203125, 6.567926406860352, -1.4999771118164062, -0.9368095397949219, 6.626985549926758, -0.5260848999023438, 0.36274147033691406, -1.8700027465820312, -0.04454803466796875, 3.6433258056640625, 2.6028900146484375, 1.6613922119140625, 1.1854057312011719, 1.647735595703125, 4.099498748779297, 2.644458770751953, -0.8670921325683594, 1.2010765075683594, 0.0400543212890625, 3.1388072967529297, 2.860614776611328, 2.676300048828125, 1.3453750610351562, 2.4448699951171875, 6.359779357910156, -0.31610107421875, 0.7440357208251953, -0.7118492126464844, -1.288787841796875, 1.0130615234375, -0.9756240844726562, -0.8327789306640625, 2.1758041381835938, 2.1233673095703125, 1.886962890625, 4.4104766845703125, 1.9427947998046875, 2.3492164611816406, 3.9640159606933594, -1.1457061767578125, 2.668428421020508, 4.276153564453125, 0.7359619140625, -0.2596759796142578], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000616.npy"} +{"epoch": 0.9312169312169312, "step": 617, "batch_size": 64, "mean": 1.4980677366256714, "std": 2.316788911819458, "min": -3.4710464477539062, "p10": -0.9047964096069336, "median": 1.0987319946289062, "p90": 4.556755828857423, "max": 8.544937133789062, "pos_frac": 0.75, "sample": [0.5447311401367188, 0.8237953186035156, -1.4484405517578125, 3.954315185546875, 4.7528533935546875, 0.9853496551513672, 2.1705169677734375, 2.628244400024414, -0.24925613403320312, -0.9077301025390625, 1.386260986328125, 2.3337783813476562, -0.4237213134765625, 1.6271495819091797, 0.3814067840576172, 2.248302459716797, 2.000091552734375, 1.6149063110351562, -0.5262451171875, 0.6056461334228516, 7.326873779296875, 0.20245742797851562, 0.041812896728515625, 4.365814208984375, 1.9672088623046875, -0.8979511260986328, 1.0746269226074219, 3.2155380249023438, -0.20139312744140625, 2.357006072998047, 1.2895965576171875, 0.012714385986328125, 1.1177902221679688, 4.028972625732422, 3.5447998046875, 8.544937133789062, 1.4320220947265625, 0.32196044921875, -0.01055145263671875, 4.638587951660156, 3.8627853393554688, 1.6939239501953125, -0.7736988067626953, -0.03716278076171875, 5.755199432373047, 2.642332077026367, -3.4710464477539062, -1.6835784912109375, -1.9013557434082031, 0.5679931640625, 2.3383865356445312, 4.691856384277344, 1.1937026977539062, -1.5923786163330078, 7.657135009765625, 1.0796737670898438, 0.7748870849609375, 0.2935791015625, 2.078296661376953, 0.8523101806640625, 0.9247550964355469, -0.6957664489746094, 2.7060298919677734, -1.9563064575195312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000617.npy"} +{"epoch": 0.9327286470143613, "step": 618, "batch_size": 64, "mean": 1.6541210412979126, "std": 2.2497682571411133, "min": -2.6528778076171875, "p10": -0.8913652420043945, "median": 1.3757686614990234, "p90": 4.059275817871095, "max": 8.513790130615234, "pos_frac": 0.78125, "sample": [4.664794921875, 3.334911346435547, 2.9272003173828125, 1.208648681640625, -1.05450439453125, 0.37598609924316406, -0.18196868896484375, -2.3809890747070312, 2.772430419921875, -0.012912750244140625, 0.9853687286376953, 3.4183349609375, 2.374835968017578, 2.6250534057617188, 8.513790130615234, 1.1584548950195312, 0.6248550415039062, 1.0241737365722656, 6.6771087646484375, 4.6824798583984375, 2.3752975463867188, 8.459693908691406, 0.6359920501708984, 3.1992721557617188, -0.8368606567382812, 3.85205078125, -0.7177619934082031, 2.088165283203125, 2.2885971069335938, 0.48502159118652344, -0.419158935546875, -0.6293888092041016, 2.27239990234375, 0.7090072631835938, 3.0809326171875, 2.7884674072265625, 3.2763290405273438, -1.1038894653320312, 2.6147422790527344, 2.5106735229492188, -0.9147243499755859, 0.3380775451660156, 0.5205612182617188, -0.5343589782714844, 1.0033187866210938, -2.146759033203125, 0.3470287322998047, -2.6528778076171875, 2.5880813598632812, 1.5536117553710938, 0.3907318115234375, 4.616518020629883, 2.1208648681640625, 1.6584091186523438, 0.7270126342773438, 1.332763671875, 0.7610015869140625, 1.6500091552734375, 1.3816184997558594, 3.628753662109375, 3.7739944458007812, 4.1480865478515625, -2.4655303955078125, 1.3699188232421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000618.npy"} +{"epoch": 0.9342403628117913, "step": 619, "batch_size": 64, "mean": 1.5303157567977905, "std": 2.3749375343322754, "min": -4.7578887939453125, "p10": -0.7883121490478515, "median": 0.9670219421386719, "p90": 4.986734008789063, "max": 6.9871826171875, "pos_frac": 0.71875, "sample": [2.2399559020996094, -1.0708160400390625, 0.8854598999023438, 0.6324806213378906, 6.013523101806641, 3.72796630859375, 5.0565338134765625, -2.1032028198242188, -2.6251754760742188, 4.2178802490234375, 2.485321044921875, 0.7137413024902344, 4.3995819091796875, 0.027423858642578125, 5.719764709472656, -0.5470561981201172, 1.98663330078125, 1.6429443359375, 0.5036392211914062, -0.446319580078125, 0.39324188232421875, 4.8238677978515625, -0.08738517761230469, -0.7762451171875, -0.7934837341308594, 2.25054931640625, 2.6894874572753906, 1.05804443359375, 0.066192626953125, -0.7139530181884766, 1.048583984375, -4.7578887939453125, -1.21307373046875, 0.227691650390625, 2.7183876037597656, 4.386936187744141, 0.4358329772949219, 5.789333343505859, 5.515827178955078, -1.9313812255859375, 0.3392620086669922, 5.1588134765625, 2.1271400451660156, -0.6056232452392578, 6.9871826171875, 2.9102401733398438, 1.2469539642333984, 1.9185333251953125, 2.542064666748047, 0.3362922668457031, 0.7319145202636719, 2.130596160888672, 3.9976673126220703, -0.228790283203125, 3.6746826171875, 0.21823883056640625, 0.5893096923828125, 4.236591339111328, -0.12000846862792969, -0.00594329833984375, -0.57574462890625, 2.7950515747070312, -0.5396728515625, 3.4846153259277344], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000619.npy"} +{"epoch": 0.9357520786092215, "step": 620, "batch_size": 64, "mean": 1.4362269639968872, "std": 2.185764789581299, "min": -3.0702133178710938, "p10": -0.8364492416381836, "median": 0.9948310852050781, "p90": 4.756367492675783, "max": 7.1688385009765625, "pos_frac": 0.71875, "sample": [-0.1956329345703125, -0.03076171875, -1.1186637878417969, 4.9152984619140625, -1.93475341796875, 1.070261001586914, 2.68096923828125, 4.385528564453125, 0.6322250366210938, 3.6190624237060547, 5.2978515625, 5.728534698486328, 1.04632568359375, 0.0016326904296875, 0.41815948486328125, 2.9615478515625, 1.7957382202148438, 1.6136894226074219, 0.9232177734375, 0.9433364868164062, -2.481658935546875, 0.6695289611816406, -1.79638671875, 7.1688385009765625, -3.0702133178710938, 2.182464599609375, 0.5291595458984375, -0.7901611328125, 7.103096008300781, -0.9396896362304688, 3.6118602752685547, 1.6225967407226562, 0.06708335876464844, -0.45146751403808594, 0.1741943359375, 1.2295913696289062, 0.004314422607421875, 2.5914230346679688, 2.2525577545166016, -0.3159065246582031, 3.282938003540039, 0.8093490600585938, 2.272256851196289, 1.1474475860595703, -0.7863445281982422, -0.26168060302734375, 5.243156433105469, 3.03765869140625, -0.8562870025634766, 2.3969192504882812, 0.7956123352050781, 5.1605224609375, -0.60595703125, -0.32901573181152344, -0.177459716796875, -0.24907684326171875, 2.0254383087158203, 0.6797714233398438, 2.572591781616211, 1.9227294921875, 2.7775096893310547, 3.2129364013671875, 2.993440628051758, 0.7392787933349609], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000620.npy"} +{"epoch": 0.9372637944066515, "step": 621, "batch_size": 64, "mean": 1.4205416440963745, "std": 2.2776520252227783, "min": -3.6952362060546875, "p10": -1.1678701400756835, "median": 1.1128149032592773, "p90": 5.0079391479492195, "max": 7.624603271484375, "pos_frac": 0.734375, "sample": [1.1124629974365234, 0.3873443603515625, 1.1131668090820312, -0.13296890258789062, -1.5635528564453125, -3.6952362060546875, 1.830352783203125, 7.624603271484375, -1.6052608489990234, 0.22258377075195312, 1.7251396179199219, 5.6346282958984375, 4.118125915527344, 0.46404457092285156, 5.378955841064453, 3.8179473876953125, 1.4683856964111328, -0.0572662353515625, 0.976593017578125, 3.889434814453125, -0.8330078125, 0.154754638671875, 3.3719329833984375, -1.6359004974365234, 3.8545074462890625, 5.593652725219727, 4.9353179931640625, -1.5208282470703125, 1.7862701416015625, -0.4965629577636719, 6.21478271484375, 2.5078811645507812, 2.1057815551757812, 1.768402099609375, 0.7668704986572266, 0.49600982666015625, 1.6827220916748047, 0.9462528228759766, 2.4419784545898438, 0.5247955322265625, -0.9162998199462891, -0.2108173370361328, 2.896087646484375, 2.6099624633789062, 0.24125289916992188, -0.019916534423828125, 1.5157699584960938, 0.015010833740234375, -1.1216888427734375, 1.3475837707519531, 0.3991813659667969, 2.4459800720214844, 1.5632553100585938, 0.18632888793945312, 3.9317970275878906, -1.187662124633789, -0.7910289764404297, 5.0390625, -2.581514358520508, 1.6583824157714844, -0.128662109375, 0.0253753662109375, 1.5813846588134766, 5.040740966796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000621.npy"} +{"epoch": 0.9387755102040817, "step": 622, "batch_size": 64, "mean": 1.5050253868103027, "std": 2.034341335296631, "min": -2.6682777404785156, "p10": -0.8921796798706054, "median": 1.0946292877197266, "p90": 4.226606750488282, "max": 6.885650634765625, "pos_frac": 0.828125, "sample": [3.1700210571289062, 0.3764991760253906, 1.92669677734375, -2.6682777404785156, 3.3701610565185547, -0.35869598388671875, 0.39179229736328125, 2.4391403198242188, -1.0681819915771484, 1.6179962158203125, 5.265344619750977, 2.146289825439453, 0.6021175384521484, -2.0886268615722656, 4.073738098144531, 0.4552459716796875, 1.4341392517089844, 1.4037284851074219, 3.448577880859375, 6.7287139892578125, 0.9777870178222656, 2.128549575805664, -0.8326282501220703, 0.32384681701660156, 5.173187255859375, 0.10205841064453125, 1.0594635009765625, 2.7051773071289062, 1.3840465545654297, 1.0021820068359375, 6.119941711425781, -0.756988525390625, 2.175537109375, 0.5438880920410156, 1.3578414916992188, 0.6215610504150391, 4.9460906982421875, 4.292121887207031, 3.0846939086914062, 0.6082000732421875, 3.3143310546875, 2.1108169555664062, 0.4333915710449219, 0.3869609832763672, -0.052700042724609375, 2.4267959594726562, 0.060638427734375, -0.9407119750976562, 0.20849227905273438, 1.2270088195800781, 3.3345489501953125, 0.19538497924804688, 0.2853507995605469, -1.3670578002929688, 1.1265983581542969, 0.182342529296875, 6.885650634765625, -0.9177017211914062, -1.399789810180664, 0.43048858642578125, 2.974020004272461, 2.0770435333251953, 1.0626602172851562, 2.594085693359375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000622.npy"} +{"epoch": 0.9402872260015117, "step": 623, "batch_size": 64, "mean": 1.8289923667907715, "std": 2.16849946975708, "min": -3.239227294921875, "p10": -0.5355508804321288, "median": 1.515096664428711, "p90": 4.835415649414063, "max": 6.8300323486328125, "pos_frac": 0.796875, "sample": [2.3460769653320312, 0.7031478881835938, -0.43454742431640625, -0.5691204071044922, 0.3975830078125, 4.920074462890625, 2.6837692260742188, 3.328227996826172, 3.774383544921875, 3.4044761657714844, 0.1607818603515625, 0.7784976959228516, 3.8788986206054688, 2.6341552734375, -0.36473846435546875, 6.8300323486328125, -1.2456932067871094, 4.511569976806641, 5.1592559814453125, 2.180511474609375, 1.6966133117675781, 0.6258220672607422, 3.2396926879882812, -1.9446334838867188, -1.417022705078125, 2.5655746459960938, 4.63787841796875, 2.6608505249023438, -3.239227294921875, 4.0878143310546875, 3.039276123046875, 6.823938369750977, 2.2528915405273438, 4.9310302734375, 0.545135498046875, -1.3275165557861328, 0.43746185302734375, 0.32126617431640625, 4.960453033447266, 3.0115127563476562, 3.3626022338867188, 5.475105285644531, 0.6251087188720703, -0.06877708435058594, -0.45175933837890625, 0.2758007049560547, 0.5008316040039062, 1.244527816772461, 0.3780841827392578, 1.3335800170898438, 0.8946533203125, 2.6804351806640625, 2.7761077880859375, 3.8104171752929688, -0.7316837310791016, 2.4623260498046875, 3.8892669677734375, 0.6525802612304688, 0.8536834716796875, 0.7352619171142578, 3.8319931030273438, 0.0926666259765625, -0.45722198486328125, -0.0962371826171875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000623.npy"} +{"epoch": 0.9417989417989417, "step": 624, "batch_size": 64, "mean": 1.6235092878341675, "std": 2.3766517639160156, "min": -3.7565040588378906, "p10": -0.905698776245117, "median": 1.5716285705566406, "p90": 4.622745513916016, "max": 8.519966125488281, "pos_frac": 0.71875, "sample": [-0.088348388671875, -1.4214019775390625, 2.30181884765625, 2.49151611328125, 7.476024627685547, 6.065067291259766, 0.127166748046875, 0.738433837890625, -0.6418609619140625, -1.0187721252441406, -0.12540435791015625, 1.6797409057617188, 2.027070999145508, 1.7009773254394531, 3.1115188598632812, 1.695648193359375, 1.0488624572753906, 2.926422119140625, 5.100341796875, -0.21781158447265625, -0.13589859008789062, 0.7861137390136719, -1.8685035705566406, 2.8148117065429688, 0.020898818969726562, 2.104524612426758, 0.29483795166015625, -2.3071746826171875, 0.45267486572265625, -0.1975250244140625, 4.6329193115234375, 4.342643737792969, 2.288848876953125, 2.840087890625, 2.1613731384277344, -1.4644336700439453, 5.893268585205078, 3.4167137145996094, 8.519966125488281, 2.9821701049804688, 3.137706756591797, 1.70404052734375, 3.135974884033203, 0.5358390808105469, -0.2540435791015625, 3.288055419921875, 6.4941558837890625, 0.209320068359375, -0.0954437255859375, 1.4635162353515625, 2.3431549072265625, -0.11339950561523438, -0.5113067626953125, -0.04188728332519531, -3.7565040588378906, 3.7753982543945312, 0.9526824951171875, 0.15254974365234375, 2.236541748046875, 0.7105979919433594, -2.2280731201171875, 0.7899990081787109, 2.8213882446289062, 4.599006652832031], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000624.npy"} +{"epoch": 0.9433106575963719, "step": 625, "batch_size": 64, "mean": 1.4002941846847534, "std": 2.292175054550171, "min": -3.281768798828125, "p10": -1.2901592254638667, "median": 1.0208520889282227, "p90": 4.622772598266603, "max": 8.393791198730469, "pos_frac": 0.71875, "sample": [0.6413364410400391, 3.721160888671875, 0.6050376892089844, -0.8111572265625, -1.5762710571289062, 2.600292205810547, 1.2129497528076172, 0.46965789794921875, 2.0102920532226562, 4.341461181640625, -0.15926361083984375, 0.03713226318359375, 1.7039299011230469, -0.0386810302734375, 3.821929931640625, 0.25238800048828125, 5.9540557861328125, 0.6169662475585938, -1.4582557678222656, 1.1586799621582031, 2.1760635375976562, 4.370449066162109, -0.322509765625, -3.281768798828125, 0.12105560302734375, 0.9311618804931641, 2.18072509765625, 3.3278961181640625, -0.056671142578125, 1.349090576171875, -0.6202316284179688, 4.7309112548828125, 6.384765625, 0.3911857604980469, 2.0644874572753906, -0.5054931640625, -1.7083663940429688, 4.9224090576171875, 0.8520431518554688, 1.2203369140625, 4.214813232421875, 8.393791198730469, 2.4703292846679688, 1.8017730712890625, 1.1105422973632812, -0.274932861328125, -2.1567764282226562, -0.8979339599609375, 0.4565849304199219, 1.8677597045898438, 0.3033905029296875, -0.437957763671875, -2.316770553588867, 5.138935089111328, 1.5516719818115234, -0.11255836486816406, 2.270355224609375, 0.36508941650390625, 2.2614612579345703, -2.1814613342285156, 0.6186065673828125, 3.701953887939453, 2.814382553100586, 5.02459716796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000625.npy"} +{"epoch": 0.9448223733938019, "step": 626, "batch_size": 64, "mean": 2.0213677883148193, "std": 2.8808743953704834, "min": -2.8834075927734375, "p10": -1.3001356124877927, "median": 1.627798080444336, "p90": 6.715299987792972, "max": 9.95440673828125, "pos_frac": 0.796875, "sample": [4.186164855957031, 2.6974639892578125, 3.716358184814453, -0.9533615112304688, 9.611175537109375, 4.674844741821289, 7.030204772949219, -2.6929569244384766, 0.6256389617919922, 4.74962043762207, 0.9709968566894531, 1.0867424011230469, 1.2544517517089844, 0.04408836364746094, -0.6388778686523438, 7.1862335205078125, 3.5044631958007812, -2.3656482696533203, 5.980522155761719, 0.7317104339599609, -0.9111042022705078, 0.9887847900390625, 1.6721229553222656, 5.871273040771484, 1.950204849243164, 0.8105506896972656, 1.5834732055664062, 3.1316070556640625, -2.8834075927734375, 9.95440673828125, 1.0898094177246094, -0.9057807922363281, 3.423828125, 2.1718902587890625, 1.383453369140625, -1.35321044921875, -0.9826583862304688, 1.2932147979736328, 0.41724395751953125, 2.4694290161132812, 2.2866477966308594, 1.0571041107177734, 7.281730651855469, 1.8343982696533203, -2.14208984375, 2.2207469940185547, 1.0951499938964844, 3.438610076904297, 2.5036468505859375, 1.9180526733398438, 3.354808807373047, 0.3430061340332031, -2.7426223754882812, 1.69183349609375, -1.5022964477539062, 4.5175323486328125, 0.6876678466796875, 7.13092041015625, 2.681276321411133, 2.25579833984375, 7.0423736572265625, -1.1762943267822266, 0.14705657958984375, 0.8675079345703125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000626.npy"} +{"epoch": 0.9463340891912321, "step": 627, "batch_size": 64, "mean": 2.0926780700683594, "std": 2.671339988708496, "min": -2.8788833618164062, "p10": -0.9364276885986326, "median": 1.6361579895019531, "p90": 6.680667114257813, "max": 7.602817535400391, "pos_frac": 0.78125, "sample": [2.4893341064453125, 2.287099838256836, 3.5316162109375, 6.564178466796875, 1.1144790649414062, 3.6618194580078125, 6.8984375, 5.47265625, 5.949920654296875, 0.819915771484375, -2.1472930908203125, 3.2894439697265625, 7.147403717041016, 1.459625244140625, 2.4466514587402344, 0.832061767578125, -0.675201416015625, 2.0054168701171875, 7.602817535400391, 0.164825439453125, 3.0069198608398438, 0.8056240081787109, 0.6080703735351562, 1.819366455078125, -0.0540313720703125, -2.8788833618164062, 4.1810455322265625, 0.02785491943359375, 3.819772720336914, -0.008157730102539062, 0.008087158203125, 7.307666778564453, 0.2831878662109375, -0.37422943115234375, 1.442840576171875, 2.7874069213867188, -1.8606185913085938, 0.47017669677734375, -0.17435264587402344, 3.823802947998047, 7.3091583251953125, 1.716531753540039, -0.19981765747070312, -1.0483818054199219, -1.7273101806640625, 1.2350845336914062, 0.6379547119140625, 1.5557842254638672, 0.7227401733398438, 6.81805419921875, 1.736368179321289, -2.1663742065429688, 0.3920631408691406, 3.4317398071289062, 5.43055534362793, 2.23809814453125, 0.8287200927734375, -1.719512939453125, 4.256858825683594, -0.14858245849609375, 2.4539451599121094, 4.582118988037109, 2.9082374572753906, 6.7305908203125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000627.npy"} +{"epoch": 0.9478458049886621, "step": 628, "batch_size": 64, "mean": 2.146991729736328, "std": 2.778433322906494, "min": -4.243186950683594, "p10": -0.7413055419921875, "median": 2.034256935119629, "p90": 6.045819091796876, "max": 8.787811279296875, "pos_frac": 0.828125, "sample": [7.90260124206543, 1.3997650146484375, 2.802692413330078, 3.2856884002685547, -0.39009857177734375, 0.282501220703125, 2.8214874267578125, 0.8956298828125, 5.5041961669921875, 2.103527069091797, 8.787811279296875, 7.744178771972656, 3.6736907958984375, 1.0814056396484375, 2.620859146118164, -0.25955963134765625, -3.9698257446289062, -3.75701904296875, 3.7819671630859375, 0.9629936218261719, 4.036352157592773, 1.7706451416015625, 0.45912742614746094, 1.5490264892578125, 2.4050827026367188, 3.64349365234375, -0.33922576904296875, 3.470733642578125, 2.1762008666992188, 6.537818908691406, 2.3055171966552734, 2.5959930419921875, -0.7231903076171875, 6.610898971557617, 1.5269584655761719, 5.8796844482421875, -0.8482818603515625, 0.19581222534179688, 1.6658554077148438, 0.1310272216796875, 3.0966529846191406, 0.783905029296875, 1.964986801147461, 3.7235984802246094, 2.8865203857421875, 2.8007354736328125, 7.8624420166015625, 3.4906368255615234, 6.1170196533203125, 3.616975784301758, 1.2296943664550781, 0.2411956787109375, 0.3430347442626953, -2.5035839080810547, 1.3580894470214844, 4.7200927734375, 0.47676849365234375, 2.71319580078125, 0.0469970703125, 0.9319992065429688, -1.412008285522461, 5.586753845214844, -0.7490692138671875, -4.243186950683594], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000628.npy"} +{"epoch": 0.9493575207860923, "step": 629, "batch_size": 64, "mean": 1.6754741668701172, "std": 2.070347785949707, "min": -2.130046844482422, "p10": -0.5789676666259765, "median": 1.3142738342285156, "p90": 4.489609527587891, "max": 8.335376739501953, "pos_frac": 0.8125, "sample": [0.08374786376953125, -0.22509002685546875, -2.130046844482422, 1.754110336303711, 1.1301918029785156, 0.30089569091796875, 0.27616119384765625, 3.0345287322998047, 2.1743698120117188, 0.0057373046875, 1.0017471313476562, 1.553314208984375, 1.565805435180664, 8.335376739501953, 1.1719188690185547, 0.20956039428710938, 1.9267520904541016, 1.3989791870117188, 1.422607421875, 4.893913269042969, -0.83148193359375, -0.26288604736328125, -0.6744060516357422, -0.6022872924804688, -0.8242950439453125, 0.1709136962890625, 4.258636474609375, 2.954742431640625, 0.5130329132080078, 4.5663604736328125, 0.0784912109375, -0.16440582275390625, 2.4643173217773438, 2.1824092864990234, 3.5189895629882812, 0.662200927734375, 3.562450408935547, 0.338470458984375, -0.37217140197753906, -0.5245552062988281, 0.8545379638671875, 0.4350395202636719, 4.2178802490234375, 0.39923667907714844, 0.8502349853515625, 1.3906173706054688, 1.2754898071289062, 2.7839889526367188, 2.469308853149414, 2.5773181915283203, 5.494606018066406, 0.7035331726074219, 4.86700439453125, 0.8754444122314453, 3.89483642578125, 3.1645660400390625, -1.4203338623046875, 4.310523986816406, 6.180721282958984, 5.098972320556641, 1.353057861328125, 4.190544128417969, -1.7054710388183594, 2.0695877075195312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000629.npy"} +{"epoch": 0.9508692365835223, "step": 630, "batch_size": 64, "mean": 1.9126391410827637, "std": 2.4552526473999023, "min": -1.6246604919433594, "p10": -0.6080814361572264, "median": 1.3138427734375, "p90": 4.974632263183595, "max": 8.65216064453125, "pos_frac": 0.734375, "sample": [-0.6475715637207031, 1.270355224609375, 0.4441184997558594, -0.5159378051757812, 4.040990829467773, 5.4215850830078125, 0.5593318939208984, -0.2712554931640625, 6.970207214355469, 3.2208709716796875, -0.3713722229003906, 3.4425392150878906, -1.2278518676757812, 5.0597076416015625, -0.36806488037109375, 4.422649383544922, -1.6246604919433594, 2.015016555786133, 3.5281448364257812, 7.767852783203125, -1.1376190185546875, 0.5828323364257812, 2.9601364135742188, 2.4687042236328125, 4.3529815673828125, 3.125579833984375, -1.1108055114746094, 1.5545978546142578, -0.33990478515625, 1.357330322265625, -0.09262466430664062, 5.466804504394531, -0.4283485412597656, 4.776123046875, 4.583221435546875, 0.2966461181640625, 2.6443519592285156, 1.08343505859375, 2.254161834716797, 0.23763275146484375, 8.65216064453125, 4.090654373168945, 3.6609554290771484, -1.4893016815185547, 7.685340881347656, 2.1326217651367188, 0.8361434936523438, 1.8928184509277344, 2.3701419830322266, 0.0164031982421875, 0.867401123046875, 0.02349853515625, -0.010288238525390625, 4.7040557861328125, 1.8679428100585938, -1.2332744598388672, -0.14108657836914062, 0.11583709716796875, 0.6973304748535156, 4.535621643066406, -0.1207275390625, 2.295379638671875, 0.9728317260742188, 0.21455001831054688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000630.npy"} +{"epoch": 0.9523809523809523, "step": 631, "batch_size": 64, "mean": 2.244609832763672, "std": 2.815060615539551, "min": -4.975799560546875, "p10": -0.8427995681762694, "median": 1.9227685928344727, "p90": 5.736634826660157, "max": 11.62164306640625, "pos_frac": 0.796875, "sample": [3.419940948486328, 0.15201568603515625, 1.4883575439453125, 3.5619125366210938, 3.4394874572753906, 4.413688659667969, 4.815093994140625, 2.5427780151367188, 0.5668926239013672, 2.2219161987304688, -4.975799560546875, -1.5397624969482422, 1.9124507904052734, 0.80584716796875, -0.32257843017578125, 0.25025177001953125, -0.3779621124267578, 2.227191925048828, 2.6334762573242188, 5.64154052734375, 6.2422332763671875, 0.02532958984375, 11.62164306640625, 4.580348968505859, 4.73809814453125, 2.692829132080078, 1.5978240966796875, 6.528167724609375, -0.4812030792236328, 1.0720577239990234, 0.7238044738769531, 1.0005874633789062, -1.5583724975585938, 1.9111213684082031, 3.3769760131835938, 3.053985595703125, 3.4473447799682617, 3.153594970703125, 8.94464111328125, 4.0011138916015625, 1.2740478515625, -2.3350791931152344, -0.2733612060546875, 1.5269298553466797, -0.5323104858398438, -1.828887939453125, 1.7153663635253906, 1.8779621124267578, -1.4630661010742188, 0.26312255859375, -0.8696823120117188, 2.5787200927734375, 0.7039031982421875, 5.083953857421875, -0.7800731658935547, 4.0928192138671875, 1.7272567749023438, 6.179351806640625, 1.9330863952636719, 5.471832275390625, 2.331451416015625, 7.14288330078125, 2.508556365966797, 5.7773895263671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000631.npy"} +{"epoch": 0.9538926681783825, "step": 632, "batch_size": 64, "mean": 2.19085693359375, "std": 2.2975857257843018, "min": -3.4480714797973633, "p10": -0.38521156311035154, "median": 2.103337287902832, "p90": 4.957321357727051, "max": 8.327861785888672, "pos_frac": 0.828125, "sample": [3.3419322967529297, 5.0048828125, 1.11083984375, 0.411895751953125, 3.291278839111328, 4.924673080444336, 0.2762279510498047, 5.867027282714844, 0.3024139404296875, 2.8976364135742188, -0.0088653564453125, -0.7825469970703125, 2.326019287109375, -0.3986320495605469, -0.2730579376220703, 3.9211158752441406, 2.756481170654297, 0.9438133239746094, 4.212541580200195, -3.4480714797973633, 1.6294097900390625, 0.9936695098876953, -2.8752288818359375, 2.3312454223632812, 1.1845779418945312, 4.8652496337890625, 2.123199462890625, 3.2049331665039062, 2.0068531036376953, 1.951446533203125, 4.9713134765625, 6.374237060546875, 1.6890983581542969, -0.23677825927734375, 1.9832382202148438, 2.7824249267578125, 3.1913986206054688, 2.2239513397216797, 4.5879974365234375, 2.5565929412841797, 4.00323486328125, 3.1277694702148438, 3.0324478149414062, 1.8404922485351562, 0.7718620300292969, 3.858135223388672, 2.2442626953125, 1.097311019897461, 1.6620635986328125, 3.9838218688964844, 6.844757080078125, -2.1156005859375, 3.8023681640625, 8.327861785888672, -1.7194957733154297, 0.968414306640625, 4.891670227050781, 5.018707275390625, 1.781494140625, 2.083475112915039, 1.5208663940429688, 1.2187042236328125, -1.8923225402832031, -0.3538970947265625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000632.npy"} +{"epoch": 0.9554043839758125, "step": 633, "batch_size": 64, "mean": 1.7362934350967407, "std": 2.4641273021698, "min": -3.4561500549316406, "p10": -0.8007537841796873, "median": 1.4874324798583984, "p90": 5.279849243164063, "max": 9.308868408203125, "pos_frac": 0.734375, "sample": [0.3808307647705078, 1.661996841430664, 2.3321990966796875, 2.8234939575195312, 4.679893493652344, 1.9062652587890625, 2.0277252197265625, -0.3091621398925781, 1.1029605865478516, 2.3794326782226562, 0.2017974853515625, -0.6573219299316406, 5.443092346191406, 0.510498046875, -0.6125679016113281, 3.2417068481445312, 0.9544906616210938, 0.8813934326171875, 1.1605224609375, -1.2666778564453125, 1.4934005737304688, -0.17967987060546875, 4.2837371826171875, 5.1448822021484375, 2.0447959899902344, 2.07708740234375, 0.10747718811035156, -0.2780799865722656, 1.5034751892089844, 2.2407188415527344, 2.8047637939453125, 0.4947643280029297, 3.2248783111572266, 0.4089202880859375, -0.511138916015625, 2.2194557189941406, 0.22068023681640625, -1.5358810424804688, -0.19049835205078125, -2.8466796875, -0.030426025390625, -0.8622245788574219, 0.49155235290527344, 9.308868408203125, 2.9658279418945312, -0.045501708984375, 1.4814643859863281, -1.3435039520263672, 5.930450439453125, 4.706268310546875, 5.528099060058594, 2.498128890991211, 3.8412094116210938, 5.088184356689453, 1.0051021575927734, -2.1724414825439453, 0.21051406860351562, -0.5021781921386719, 4.3955841064453125, 5.405464172363281, 6.811309814453125, -3.4561500549316406, 5.3376922607421875, 2.9598388671875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000633.npy"} +{"epoch": 0.9569160997732427, "step": 634, "batch_size": 64, "mean": 1.0649151802062988, "std": 2.407684326171875, "min": -2.9448318481445312, "p10": -1.6761226654052734, "median": 0.4356803894042969, "p90": 4.3220771789550785, "max": 7.597263336181641, "pos_frac": 0.59375, "sample": [-0.3273963928222656, -0.194183349609375, 1.2251815795898438, 6.196022033691406, 3.7634963989257812, -0.9420528411865234, -1.6015739440917969, -0.25530242919921875, -2.1552963256835938, -1.5805587768554688, 0.21700096130371094, -0.2456207275390625, -0.9560527801513672, -0.19276809692382812, 0.4204864501953125, 1.5863285064697266, 2.2794055938720703, 1.0795154571533203, 0.26683807373046875, -0.8607845306396484, -1.8810272216796875, -0.5071277618408203, 3.161376953125, -2.9448318481445312, 6.4829864501953125, -0.6478519439697266, 4.397686004638672, 2.4049434661865234, 5.0606842041015625, 0.10398101806640625, -2.9126434326171875, 2.7548656463623047, 1.9482307434082031, -1.6463890075683594, 3.4762439727783203, 0.2324848175048828, -2.3214111328125, -1.6888656616210938, 7.0185394287109375, 3.3919029235839844, 3.3294219970703125, 0.33673095703125, -0.714141845703125, 1.7474098205566406, 0.885955810546875, -1.8811454772949219, -0.05119132995605469, 1.0210704803466797, 7.597263336181641, 0.6444053649902344, -0.11439132690429688, -0.5323410034179688, 1.8258895874023438, -0.022735595703125, 1.2344894409179688, 1.2180290222167969, 4.145656585693359, 2.0013427734375, 1.5784797668457031, 4.633216857910156, 0.45087432861328125, 1.4466514587402344, -0.2345428466796875, 4.001708984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000634.npy"} +{"epoch": 0.9584278155706727, "step": 635, "batch_size": 64, "mean": 1.6092685461044312, "std": 2.7757174968719482, "min": -3.16485595703125, "p10": -1.589514923095703, "median": 1.0441818237304688, "p90": 5.27186737060547, "max": 9.295280456542969, "pos_frac": 0.625, "sample": [-0.9601707458496094, 3.0447120666503906, -2.6426219940185547, -0.47572898864746094, 6.465705871582031, 1.6604995727539062, -1.6488456726074219, -2.7902965545654297, -0.5348434448242188, 6.5167999267578125, 0.7146263122558594, 2.8348922729492188, 0.44873046875, -0.5102081298828125, 4.67694091796875, 3.3394813537597656, 4.274242401123047, 1.3705978393554688, 6.10772705078125, 3.1365432739257812, -3.16485595703125, -0.4049530029296875, -0.3994598388671875, 7.475059509277344, 0.194976806640625, -0.02581024169921875, 3.2141571044921875, -0.5145797729492188, -0.2659587860107422, 0.9555397033691406, 1.84466552734375, -1.3814315795898438, 4.185146331787109, 1.3734664916992188, -0.42328643798828125, 2.4014511108398438, 4.6916046142578125, -0.22134780883789062, 2.8629913330078125, -1.4510765075683594, 4.2991180419921875, 4.959999084472656, -0.3882331848144531, 7.5743865966796875, 1.0229339599609375, -1.9863052368164062, 3.1674423217773438, 3.7210922241210938, 5.405525207519531, -0.7588043212890625, 2.1493568420410156, 9.295280456542969, 1.0654296875, -0.15761566162109375, -1.8144454956054688, 0.2064361572265625, 4.018829345703125, 0.278472900390625, 0.35455322265625, 2.147634506225586, -2.1295852661132812, 2.2016944885253906, 2.8434371948242188, -0.4585304260253906], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000635.npy"} +{"epoch": 0.9599395313681028, "step": 636, "batch_size": 64, "mean": 1.9465044736862183, "std": 1.9336965084075928, "min": -1.8944473266601562, "p10": -0.28747901916503904, "median": 1.8664913177490234, "p90": 4.161130523681641, "max": 6.5283203125, "pos_frac": 0.84375, "sample": [3.098278045654297, -0.6175460815429688, 2.0039634704589844, 0.10684585571289062, 1.9345512390136719, 2.81951904296875, 2.678974151611328, -0.04302978515625, 0.7401580810546875, 4.1201629638671875, 2.33935546875, 0.4909038543701172, 4.178688049316406, 2.9881324768066406, 1.8143234252929688, 1.825653076171875, 2.2017822265625, 1.1576080322265625, 2.275554656982422, 2.0326461791992188, 1.2289581298828125, 1.7689781188964844, 0.3880767822265625, 3.031007766723633, 1.4149112701416016, 0.7182769775390625, 6.5283203125, 0.05640411376953125, -0.2354278564453125, 0.7841720581054688, 0.2518348693847656, 2.3492164611816406, 6.0094757080078125, 2.0251312255859375, -0.6585159301757812, 2.55181884765625, 1.7623023986816406, 3.0620689392089844, -0.7506484985351562, 5.317329406738281, 2.961557388305664, 3.9169769287109375, -0.2595558166503906, 6.451568603515625, 0.003406524658203125, -1.8944473266601562, 5.0304107666015625, 2.8635711669921875, 1.8338890075683594, 3.3739700317382812, 3.36126708984375, 0.8592453002929688, 3.2307586669921875, 6.201652526855469, 3.9521484375, -1.4006195068359375, -1.7574386596679688, 0.9381771087646484, 1.4308280944824219, 3.8003387451171875, 0.7831192016601562, -0.29944610595703125, 1.5456047058105469, 1.8990936279296875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000636.npy"} +{"epoch": 0.9614512471655329, "step": 637, "batch_size": 64, "mean": 1.5811973810195923, "std": 2.567361354827881, "min": -4.561553955078125, "p10": -0.810865020751953, "median": 1.0814552307128906, "p90": 5.69444923400879, "max": 7.935962677001953, "pos_frac": 0.71875, "sample": [-0.0022735595703125, 2.068572998046875, 1.8604354858398438, 6.832855224609375, 0.6494598388671875, 2.98095703125, -0.7134475708007812, 1.2940349578857422, 4.0372314453125, 0.8516654968261719, -0.1340656280517578, 0.9829483032226562, 2.2661075592041016, -0.15169525146484375, 1.0768966674804688, 2.3600940704345703, 0.21566390991210938, 1.0732383728027344, 7.935962677001953, 0.020109176635742188, 0.7011260986328125, -0.02477264404296875, 0.9488372802734375, 1.1405029296875, 0.0392913818359375, 1.2578887939453125, 0.08321380615234375, 5.767143249511719, 4.9619598388671875, 7.917694091796875, 6.9298095703125, -0.06133270263671875, -2.2546539306640625, 3.2393341064453125, -0.260589599609375, -0.3300323486328125, -3.272674560546875, 2.1965255737304688, 1.0860137939453125, 4.742851257324219, -0.102508544921875, 1.6951904296875, -0.3121223449707031, 1.5090465545654297, 3.224853515625, 1.0486984252929688, -1.0786514282226562, 2.12750244140625, -0.6204872131347656, 1.2758407592773438, 1.4447021484375, 6.5597686767578125, 0.8235054016113281, 2.9403762817382812, -0.8526153564453125, -1.49267578125, 5.524829864501953, 1.7409896850585938, 0.2812919616699219, -4.561553955078125, 2.0927352905273438, 7.102756500244141, -0.9187240600585938, 1.430999755859375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000637.npy"} +{"epoch": 0.9629629629629629, "step": 638, "batch_size": 64, "mean": 2.252800464630127, "std": 2.5250089168548584, "min": -3.4429779052734375, "p10": -0.32550601959228503, "median": 1.6777420043945312, "p90": 5.40420322418213, "max": 9.198379516601562, "pos_frac": 0.8125, "sample": [0.1530303955078125, 4.607736587524414, 5.535980224609375, 0.6366043090820312, -2.3719329833984375, 0.6189651489257812, 5.581209182739258, 1.1533050537109375, 0.5837345123291016, -0.000926971435546875, 1.4414825439453125, -0.12805938720703125, 0.18970870971679688, 2.997528076171875, 1.424774169921875, 5.500091552734375, -1.609161376953125, -1.0753402709960938, 4.648796081542969, 1.692626953125, 2.77984619140625, 2.4942550659179688, 4.339141845703125, 4.0857086181640625, -3.4429779052734375, 9.198379516601562, 3.1593093872070312, 4.626089096069336, 0.3351573944091797, 0.906829833984375, 1.5856609344482422, 5.180463790893555, 3.774566650390625, 6.381191253662109, 3.653961181640625, -0.3688945770263672, -0.22426605224609375, 2.897003173828125, 5.015533447265625, 4.078121185302734, 4.132621765136719, 3.681324005126953, 2.9138870239257812, -0.1405029296875, 4.9615020751953125, -0.6288108825683594, 0.15143394470214844, 3.365375518798828, 1.170501708984375, 4.686580657958984, 0.29254913330078125, 3.340789794921875, 6.964508056640625, 1.6628570556640625, -1.6762123107910156, 2.2843379974365234, 0.18030929565429688, 6.930164337158203, 4.788259506225586, 0.52728271484375, 0.359527587890625, -0.040103912353515625, 0.6017379760742188, 1.6340808868408203], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000638.npy"} +{"epoch": 0.9644746787603931, "step": 639, "batch_size": 64, "mean": 1.729904294013977, "std": 2.438458204269409, "min": -2.6193199157714844, "p10": -0.7938732147216796, "median": 1.6051559448242188, "p90": 4.328815841674805, "max": 9.21993637084961, "pos_frac": 0.71875, "sample": [2.449674606323242, 3.745941162109375, -0.6153564453125, 4.9356689453125, 2.658477783203125, 2.052032470703125, 1.3336639404296875, -2.6193199157714844, -0.30961036682128906, 3.228992462158203, 0.07244873046875, 2.0945816040039062, 7.8150177001953125, 1.56146240234375, 1.8821563720703125, 1.7678756713867188, 1.4810714721679688, -0.6753063201904297, -2.0753402709960938, 2.5817718505859375, 0.02285003662109375, 4.213466644287109, 1.1780586242675781, 3.159698486328125, -1.7224903106689453, 3.9770240783691406, 0.500244140625, 2.925159454345703, -0.0357666015625, 4.343170166015625, 6.117218017578125, -1.0727882385253906, 0.6124134063720703, 1.5445404052734375, -0.5107097625732422, 0.2343273162841797, 1.7937393188476562, 2.5605010986328125, 9.21993637084961, 3.081787109375, 3.468198776245117, 2.1472930908203125, 3.3585128784179688, 1.9063873291015625, -0.450408935546875, 0.6261558532714844, -0.5550994873046875, -0.8113574981689453, 1.6051483154296875, -0.7530765533447266, 5.976417541503906, 4.295322418212891, -0.31528472900390625, 1.7424850463867188, 1.60516357421875, -2.3768749237060547, 0.1287078857421875, -0.234893798828125, -0.442291259765625, 7.9846954345703125, -1.179586410522461, 1.5833206176757812, 2.7670116424560547, 3.1296463012695312], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000639.npy"} +{"epoch": 0.9659863945578231, "step": 640, "batch_size": 64, "mean": 1.5351800918579102, "std": 2.332674741744995, "min": -4.914878845214844, "p10": -1.1611362457275392, "median": 1.4455232620239258, "p90": 4.707024002075196, "max": 5.9287109375, "pos_frac": 0.765625, "sample": [2.0374412536621094, 2.9750804901123047, 4.1520843505859375, -2.291440963745117, 0.3872833251953125, 3.1627273559570312, 3.8076553344726562, 0.5199813842773438, 0.828887939453125, 4.735877990722656, -0.7635421752929688, 0.0625, 4.428533554077148, 2.108736038208008, -1.3454818725585938, 0.9756622314453125, 1.1980228424072266, 2.0530967712402344, 1.3611164093017578, 4.4773406982421875, -4.16156005859375, -0.3819465637207031, 1.6568679809570312, 1.1708984375, -0.6706886291503906, -1.1640739440917969, 3.0918502807617188, 1.2583389282226562, -1.9869728088378906, 0.48668861389160156, 4.836446762084961, -0.0150909423828125, 0.0379486083984375, 3.210174560546875, 5.4387359619140625, 0.11909103393554688, -1.1542816162109375, 0.362335205078125, 3.3581619262695312, 3.6541290283203125, 1.0458145141601562, 3.9569454193115234, 0.9467315673828125, 1.539886474609375, 4.639698028564453, 1.7721481323242188, 1.5299301147460938, -4.914878845214844, 1.6540813446044922, 2.206634521484375, 2.367870330810547, 5.9287109375, 1.1248741149902344, 3.028533935546875, -0.48714447021484375, -0.8214988708496094, 5.291868209838867, -2.0299835205078125, 0.273162841796875, 2.569507598876953, -0.023181915283203125, 5.469549179077148, 1.6812973022460938, 5.482353210449219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000640.npy"} +{"epoch": 0.9674981103552532, "step": 641, "batch_size": 64, "mean": 1.4259059429168701, "std": 2.306586742401123, "min": -4.7402191162109375, "p10": -0.6843345642089842, "median": 1.4492530822753906, "p90": 3.515293884277344, "max": 9.302703857421875, "pos_frac": 0.796875, "sample": [-1.9844894409179688, -3.2878646850585938, 2.4224987030029297, 1.8368721008300781, 2.7187728881835938, 0.0464324951171875, 2.3964500427246094, 2.4297103881835938, -0.9101963043212891, -0.07161712646484375, 1.4337615966796875, 1.1694717407226562, 1.7090988159179688, 4.711418151855469, 1.71038818359375, 0.08697891235351562, 0.0539398193359375, 3.09698486328125, 3.9295406341552734, 1.7568206787109375, 1.9432907104492188, -0.8869171142578125, -0.4260711669921875, -0.47315216064453125, 2.9937496185302734, 3.5283355712890625, 2.3933258056640625, 2.2051849365234375, 2.9407958984375, 2.5235424041748047, 1.6302337646484375, 3.48486328125, 0.22920989990234375, 1.4708881378173828, -0.09743499755859375, -4.7402191162109375, 1.2464065551757812, 2.9236297607421875, -0.77484130859375, 0.20153045654296875, -4.448211669921875, 7.899066925048828, 1.1365203857421875, 0.19777679443359375, 0.9141578674316406, 6.291866302490234, 0.47930145263671875, 0.0058422088623046875, 0.47414398193359375, 1.0358428955078125, 9.302703857421875, 2.6220664978027344, 3.9297924041748047, 1.246185302734375, -0.249114990234375, 1.2726249694824219, 2.8496856689453125, -0.19889068603515625, 0.6831588745117188, 1.5843505859375, 1.4647445678710938, 0.14347076416015625, 1.7093963623046875, 3.3401737213134766], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000641.npy"} +{"epoch": 0.9690098261526833, "step": 642, "batch_size": 64, "mean": 1.4081451892852783, "std": 2.3406200408935547, "min": -4.999427795410156, "p10": -1.0912887573242187, "median": 1.6187286376953125, "p90": 5.039256286621095, "max": 6.138648986816406, "pos_frac": 0.671875, "sample": [5.644935607910156, 1.7657470703125, -0.0371246337890625, -1.0369224548339844, 1.6419601440429688, -1.0670318603515625, 2.0699310302734375, 5.468711853027344, 1.70562744140625, -4.999427795410156, 5.1798858642578125, 0.28119850158691406, 1.0012741088867188, -0.27854156494140625, 2.4656524658203125, -0.07314682006835938, 3.104694366455078, 5.670875549316406, -1.1550140380859375, -0.23903274536132812, 0.566009521484375, 2.0326690673828125, 1.9681320190429688, -0.9176559448242188, 3.5429611206054688, 3.2591781616210938, 6.018218994140625, 2.1285400390625, 1.9541778564453125, -0.12630462646484375, 1.8251724243164062, 2.3465042114257812, 3.1608619689941406, 1.0431938171386719, -1.2163124084472656, 4.209815979003906, -2.4059906005859375, 1.026031494140625, 0.4282684326171875, -0.9417572021484375, -0.030620574951171875, 2.0407333374023438, 0.4696216583251953, 0.4539794921875, 2.1387367248535156, 1.5954971313476562, -2.5344085693359375, 3.088937759399414, 4.71112060546875, 6.138648986816406, 0.0028228759765625, -1.0298385620117188, 3.2971134185791016, 4.203212738037109, 5.6587982177734375, 1.9309501647949219, -1.1016845703125, 1.8766098022460938, -0.2759723663330078, -0.9329910278320312, -0.382049560546875, 0.02581787109375, -1.7689666748046875, 3.5292510986328125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000642.npy"} +{"epoch": 0.9705215419501134, "step": 643, "batch_size": 64, "mean": 1.1184437274932861, "std": 3.8146462440490723, "min": -23.472518920898438, "p10": -1.9768779754638668, "median": 1.5841712951660156, "p90": 4.402660369873047, "max": 6.775493621826172, "pos_frac": 0.78125, "sample": [0.04815673828125, 1.8552837371826172, 1.5763778686523438, -3.7076263427734375, 0.7191991806030273, 4.797889709472656, 1.2505970001220703, 2.0607757568359375, 3.745025634765625, 0.9210891723632812, -0.3438568115234375, 1.2978267669677734, 1.6104049682617188, 0.181640625, 1.6610641479492188, -23.472518920898438, 0.00299072265625, -0.47156333923339844, 4.396705627441406, 5.800750732421875, 0.7400779724121094, 2.6789779663085938, 3.0551223754882812, 1.7015914916992188, 0.3860130310058594, 5.57135009765625, 2.5680999755859375, -2.433380126953125, 6.775493621826172, 2.8587188720703125, 2.3247909545898438, 2.453014373779297, -0.6953125, 3.1666908264160156, 0.850555419921875, 0.8883514404296875, 0.7208156585693359, 2.2195911407470703, 4.9873046875, 3.2723865509033203, 4.314458847045898, -2.56878662109375, 3.6236419677734375, -2.150726318359375, 2.4464263916015625, -1.565765380859375, -0.7215080261230469, -3.360889434814453, 1.5136127471923828, 1.7758636474609375, 0.7075347900390625, 1.5936641693115234, 5.127410888671875, 2.1454010009765625, 2.01776123046875, -2.164398193359375, -0.62060546875, 3.7303085327148438, 1.3910541534423828, 0.7353935241699219, -1.5712318420410156, 4.40521240234375, 1.5919647216796875, 1.1641387939453125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000643.npy"} +{"epoch": 0.9720332577475435, "step": 644, "batch_size": 64, "mean": 1.4850530624389648, "std": 2.5589046478271484, "min": -3.83770751953125, "p10": -1.2246330261230465, "median": 1.2425756454467773, "p90": 4.689080047607422, "max": 8.651046752929688, "pos_frac": 0.734375, "sample": [2.5048980712890625, 0.2499713897705078, -3.6225662231445312, 2.0599136352539062, -0.7803382873535156, -1.8912849426269531, 1.5197410583496094, 6.883668899536133, 6.312381744384766, 2.9089889526367188, 1.7554702758789062, -0.4808998107910156, 3.00140380859375, 4.109058380126953, 0.572998046875, 0.11988067626953125, 0.6821212768554688, 0.4492950439453125, -0.28383636474609375, 5.75477409362793, -0.11859893798828125, 3.48748779296875, -2.7419967651367188, 0.6659469604492188, -0.144683837890625, 2.6583786010742188, -0.2867774963378906, 1.40716552734375, 0.47605133056640625, -0.7920303344726562, 2.5798778533935547, -3.83770751953125, 1.317453384399414, 1.9537162780761719, 4.653041839599609, -1.4100341796875, 3.1142578125, 6.894992828369141, 3.5011825561523438, 0.8687572479248047, 1.4373931884765625, 3.0727996826171875, 0.080810546875, 1.0982913970947266, -0.34804534912109375, -3.5916366577148438, -0.25423622131347656, 4.704524993896484, 1.2913341522216797, 4.4621734619140625, 2.6397247314453125, 1.3056411743164062, -0.22537803649902344, 0.2389068603515625, 1.6695632934570312, 2.4338760375976562, -2.3498077392578125, 0.7940025329589844, 2.8200759887695312, 0.368682861328125, 1.193817138671875, 8.651046752929688, 1.0495033264160156, 6.428215026855469], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000644.npy"} +{"epoch": 0.9735449735449735, "step": 645, "batch_size": 64, "mean": 1.6724815368652344, "std": 2.1912691593170166, "min": -2.8128509521484375, "p10": -0.6689521789550779, "median": 1.4475488662719727, "p90": 3.9967304229736342, "max": 8.542160034179688, "pos_frac": 0.8125, "sample": [0.3628120422363281, 4.744354248046875, 3.2434158325195312, -1.1218414306640625, 2.8377838134765625, 8.430953979492188, 2.4133682250976562, 1.5933570861816406, 1.8262519836425781, -0.8745574951171875, -1.0192222595214844, 1.0935516357421875, 1.7196063995361328, 0.2554645538330078, 4.5461578369140625, 1.0110855102539062, 3.2220115661621094, 0.8958797454833984, 0.3708934783935547, 0.13054275512695312, 5.1562042236328125, -0.39614105224609375, 0.8282089233398438, 1.7650833129882812, 3.0917816162109375, -0.2730712890625, 0.33638763427734375, 1.6754302978515625, 3.6658897399902344, 2.704639434814453, 0.715118408203125, 0.12273788452148438, 2.158740997314453, 0.49437713623046875, -0.7623748779296875, 3.2443161010742188, -2.8128509521484375, -0.06366729736328125, -1.22650146484375, 2.24700927734375, 2.5608901977539062, 1.5679931640625, 8.542160034179688, 0.69146728515625, 0.3374137878417969, 2.695648193359375, 2.6491012573242188, 1.3489398956298828, 2.009368896484375, 1.0657806396484375, -0.45096588134765625, 4.138519287109375, 1.7726860046386719, 7.9562530517578125, 0.913726806640625, -0.33104705810546875, 3.1343002319335938, 0.4599037170410156, 1.5461578369140625, -2.23577880859375, 0.9038009643554688, 3.1529083251953125, 2.949922561645508, 1.3064804077148438], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000645.npy"} +{"epoch": 0.9750566893424036, "step": 646, "batch_size": 64, "mean": 1.9151103496551514, "std": 2.5365631580352783, "min": -4.673038482666016, "p10": -1.1098388671875, "median": 1.842641830444336, "p90": 5.248524665832521, "max": 9.174224853515625, "pos_frac": 0.8125, "sample": [0.6010971069335938, 0.5092544555664062, 0.1341705322265625, 0.7731704711914062, 4.30186653137207, 2.9591064453125, -1.0489349365234375, 4.114013671875, 3.8000030517578125, 3.8017311096191406, -0.9754219055175781, 1.7618560791015625, 2.3456649780273438, 6.209739685058594, 2.3581695556640625, -0.29906272888183594, 2.0694427490234375, 0.107940673828125, 6.050212860107422, 3.6006431579589844, 4.013175964355469, 4.502471923828125, -1.7022933959960938, -0.16341400146484375, 0.5037765502929688, 3.9064292907714844, 1.8288612365722656, -1.5101852416992188, 0.9738121032714844, 1.6304244995117188, 2.32037353515625, 1.9221725463867188, 2.274932861328125, 5.430980682373047, 2.1735458374023438, 0.24072265625, 1.2033367156982422, -1.3247299194335938, 0.85296630859375, 3.2399253845214844, 0.232666015625, -1.1359405517578125, 2.4200897216796875, 1.0622711181640625, 3.2530670166015625, 9.174224853515625, 2.125946044921875, 5.9741668701171875, 1.1197223663330078, 8.321037292480469, 4.822793960571289, -0.538848876953125, 2.1739959716796875, 1.947113037109375, -4.673038482666016, 1.1069107055664062, 4.5850677490234375, -1.4633769989013672, 0.6037445068359375, 5.9013671875, -2.3599891662597656, 0.3668975830078125, 1.8564224243164062, 0.19880294799804688], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000646.npy"} +{"epoch": 0.9765684051398337, "step": 647, "batch_size": 64, "mean": 2.065751075744629, "std": 2.741248607635498, "min": -4.17486572265625, "p10": -0.8534923553466797, "median": 1.7903022766113281, "p90": 5.700257873535157, "max": 8.392852783203125, "pos_frac": 0.765625, "sample": [-0.010417938232421875, 2.5018692016601562, 3.348276138305664, 3.6839828491210938, 5.778045654296875, 1.7722930908203125, -0.163360595703125, 7.610862731933594, -1.796844482421875, 5.5187530517578125, 2.9534912109375, 1.895660400390625, 3.2074813842773438, 0.15748977661132812, 1.8083114624023438, 1.6654052734375, 6.740966796875, -0.004474639892578125, -1.408548355102539, -4.17486572265625, 4.030265808105469, 3.2797107696533203, 4.848670959472656, 0.10876083374023438, 8.162467956542969, 4.8524322509765625, 3.9850997924804688, -0.7830314636230469, 4.034145355224609, -0.6175079345703125, 1.2676277160644531, 0.62982177734375, 2.215087890625, -0.13002777099609375, 1.9011154174804688, -0.5279827117919922, 0.7237701416015625, 3.9071178436279297, 0.7566146850585938, -0.8836898803710938, 3.0340824127197266, 1.467437744140625, 0.5146369934082031, 3.4599990844726562, -2.5492935180664062, 1.183523178100586, 0.41870880126953125, 3.072967529296875, 1.0204925537109375, 1.8208980560302734, -0.8904266357421875, 6.610862731933594, 1.1797981262207031, -3.6729736328125, -0.5018310546875, 5.297557830810547, 0.3257408142089844, 8.392852783203125, 0.6043853759765625, 4.92742919921875, 7.091758728027344, 0.11183929443359375, 3.0243091583251953, 3.4184722900390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000647.npy"} +{"epoch": 0.9780801209372638, "step": 648, "batch_size": 64, "mean": 1.6904385089874268, "std": 2.3993003368377686, "min": -3.9536991119384766, "p10": -0.8579494476318359, "median": 1.5195274353027344, "p90": 4.843616104125977, "max": 7.559638977050781, "pos_frac": 0.75, "sample": [2.9305191040039062, 4.600090026855469, 1.8754653930664062, -3.9536991119384766, 6.463798522949219, -0.7525100708007812, -2.7844085693359375, 2.1928062438964844, 1.12225341796875, 2.1234359741210938, 0.6054573059082031, 2.5613327026367188, 1.4357681274414062, -0.7705001831054688, -1.4460296630859375, -1.7903518676757812, 2.866344451904297, -0.06563568115234375, 6.8201446533203125, 3.264312744140625, 6.018974304199219, 0.47039031982421875, 2.0445556640625, 6.450496673583984, 3.7908935546875, 0.46614837646484375, -0.0289306640625, 5.021688461303711, 1.5645065307617188, 7.559638977050781, 1.6442718505859375, -0.32244110107421875, 3.920806884765625, 4.002532958984375, 0.4914131164550781, -1.0992355346679688, -0.1709136962890625, 0.8292655944824219, 1.7697277069091797, 4.768829345703125, 2.240753173828125, 2.084278106689453, 4.875667572021484, 0.09060287475585938, -0.4411964416503906, -0.2350616455078125, 3.6410064697265625, 3.2002182006835938, 0.4543609619140625, 1.8310470581054688, 2.3501930236816406, 2.3904571533203125, 3.203907012939453, -2.8254241943359375, 1.176727294921875, 0.7852783203125, 4.587089538574219, 0.31507110595703125, 0.002166748046875, -0.27634429931640625, 1.47454833984375, -0.8954277038574219, 1.0108184814453125, 0.6561183929443359], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000648.npy"} +{"epoch": 0.9795918367346939, "step": 649, "batch_size": 64, "mean": 1.858307957649231, "std": 2.3925724029541016, "min": -3.754627227783203, "p10": -1.681984519958496, "median": 1.944009780883789, "p90": 4.946227645874024, "max": 7.675323486328125, "pos_frac": 0.78125, "sample": [5.0855865478515625, 2.5178260803222656, -0.042606353759765625, 2.5705642700195312, 2.2096805572509766, 4.708545684814453, 3.6954498291015625, 2.7230072021484375, 0.9506683349609375, 4.777606964111328, 1.3712100982666016, 5.5648345947265625, 1.96795654296875, -3.754627227783203, 3.253997802734375, 1.1430587768554688, 0.8118362426757812, 2.780923843383789, 2.768217086791992, 1.5475997924804688, 5.703241348266602, -0.6777801513671875, -0.5033454895019531, -2.6785812377929688, -1.902099609375, 4.435523986816406, 4.528602600097656, 0.5472126007080078, -1.9833984375, -1.7621917724609375, 3.8367462158203125, 0.9703445434570312, 3.2256202697753906, 0.8961105346679688, 3.141559600830078, 1.6600189208984375, 1.9545173645019531, 3.295543670654297, 4.3919830322265625, 1.7823657989501953, 1.9737472534179688, 5.835521697998047, 1.933502197265625, -1.1903038024902344, 0.1551666259765625, 5.152137756347656, 0.5571556091308594, 0.9897041320800781, 2.3585281372070312, 0.7689495086669922, -1.95684814453125, 5.01849365234375, -1.7007007598876953, 1.893646240234375, 7.675323486328125, -0.9642524719238281, -1.6383132934570312, 0.6642608642578125, 3.1446151733398438, 3.207752227783203, 4.646888732910156, 3.1222763061523438, -0.4514961242675781, 0.222625732421875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000649.npy"} +{"epoch": 0.981103552532124, "step": 650, "batch_size": 64, "mean": 1.5724914073944092, "std": 2.490751266479492, "min": -3.397216796875, "p10": -1.5828330993652342, "median": 1.353363037109375, "p90": 5.184601783752441, "max": 7.535299301147461, "pos_frac": 0.75, "sample": [-1.2885894775390625, 6.5296783447265625, -3.397216796875, 5.126880645751953, 0.21880340576171875, 5.652778625488281, 1.3463287353515625, 0.728912353515625, 1.3603973388671875, 0.2590904235839844, 2.3632125854492188, 4.450677871704102, -1.73773193359375, 2.2487716674804688, 0.07721138000488281, 5.835693359375, 5.609375, 1.596954345703125, 2.2816238403320312, 1.0956077575683594, 2.3881874084472656, 1.5458526611328125, -0.6450634002685547, -1.2667255401611328, 3.8161048889160156, 3.2703475952148438, 7.535299301147461, 5.196735382080078, 2.1720542907714844, -1.7548599243164062, -0.22633743286132812, -3.0751419067382812, 0.24491500854492188, 3.9497718811035156, -2.300048828125, 0.23659515380859375, 3.7977733612060547, 3.0526123046875, 0.12259292602539062, -1.6730880737304688, 2.5335693359375, 5.156290054321289, -2.0864715576171875, 4.0534515380859375, -0.48187255859375, 0.235931396484375, 5.0790557861328125, 1.8116950988769531, 0.819854736328125, 0.39803123474121094, 1.2523555755615234, 0.3509788513183594, 1.925638198852539, 0.6106224060058594, -0.3775463104248047, 3.5593433380126953, -1.3722381591796875, 2.0582427978515625, 2.1653175354003906, 5.25396728515625, -0.00885009765625, 0.3237113952636719, 1.6664237976074219, -1.034088134765625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000650.npy"} +{"epoch": 0.982615268329554, "step": 651, "batch_size": 64, "mean": 1.5707223415374756, "std": 2.259972095489502, "min": -2.720579147338867, "p10": -0.6675706863403319, "median": 1.060511589050293, "p90": 4.4741950988769545, "max": 7.500581741333008, "pos_frac": 0.78125, "sample": [0.6599884033203125, -0.17787933349609375, 1.3858184814453125, 0.483978271484375, 1.1973342895507812, -2.720579147338867, -0.4847145080566406, 1.5927104949951172, -1.4115676879882812, 3.9161815643310547, 0.7910919189453125, 0.8143310546875, -0.20479583740234375, -1.1074676513671875, 2.1207847595214844, -1.9658889770507812, -0.10812759399414062, 7.4744110107421875, 0.1088409423828125, 1.215576171875, 1.8846359252929688, 0.4202766418457031, 0.00905609130859375, 0.0498199462890625, 0.7878570556640625, 0.17970848083496094, 3.2434539794921875, 3.9723129272460938, 0.6331787109375, 6.305938720703125, 2.450906753540039, 2.7439613342285156, 2.118610382080078, 0.02689361572265625, 2.501678466796875, 0.45107269287109375, 1.9581718444824219, 3.5936737060546875, -0.4824371337890625, 1.6799774169921875, 4.0467071533203125, 1.512298583984375, 5.404232025146484, 0.8379707336425781, -0.5548992156982422, 7.500581741333008, 6.762111663818359, 6.81837272644043, -0.33545684814453125, 3.267404556274414, 0.9236888885498047, -0.7158584594726562, 0.13712692260742188, 2.4543533325195312, 1.57940673828125, 1.7891082763671875, 2.2951583862304688, 1.4026374816894531, 4.2047576904296875, -1.314361572265625, 0.36554718017578125, -0.8013114929199219, 4.589668273925781, 0.2482147216796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000651.npy"} +{"epoch": 0.9841269841269841, "step": 652, "batch_size": 64, "mean": 1.6457829475402832, "std": 2.2728028297424316, "min": -3.1839065551757812, "p10": -1.0776756286621094, "median": 1.5246505737304688, "p90": 4.5704284667968755, "max": 6.740547180175781, "pos_frac": 0.734375, "sample": [-0.4589729309082031, 3.660186767578125, 4.651065826416016, 2.0129776000976562, 2.1123275756835938, 3.9907760620117188, 5.3836517333984375, 0.8896751403808594, 5.943962097167969, 1.4772491455078125, 3.5541915893554688, -3.1839065551757812, 0.16965103149414062, 2.411771774291992, 1.77374267578125, 2.0813827514648438, 0.9230461120605469, 3.237823486328125, 2.6114273071289062, -0.417877197265625, -1.2241477966308594, 4.2003631591796875, 1.4451904296875, 0.0153045654296875, 1.5302047729492188, -0.42498016357421875, -1.826171875, -0.7477874755859375, 4.506202697753906, 2.3857574462890625, 2.1072921752929688, -0.6959724426269531, 1.890716552734375, 3.7809295654296875, 3.3451156616210938, -0.921783447265625, 6.740547180175781, 0.6013145446777344, -0.7869186401367188, -2.8311996459960938, 0.7000656127929688, 5.515460968017578, 1.5190963745117188, 1.0578346252441406, -0.75030517578125, 0.3991584777832031, 4.597953796386719, 2.3648681640625, -0.1964092254638672, 4.012214660644531, 4.0462646484375, 0.33358001708984375, 3.3934326171875, 1.7361869812011719, -1.5864753723144531, 1.337799072265625, 0.95928955078125, 0.9987087249755859, -1.0260162353515625, 5.85291862487793, 2.6099090576171875, -1.651885986328125, -1.0998153686523438, 4.292144775390625], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000652.npy"} +{"epoch": 0.9856386999244142, "step": 653, "batch_size": 64, "mean": 1.5419931411743164, "std": 2.4511232376098633, "min": -2.9544811248779297, "p10": -0.8172874450683594, "median": 1.0186767578125, "p90": 4.852294158935547, "max": 9.052520751953125, "pos_frac": 0.671875, "sample": [4.899150848388672, 5.887687683105469, -0.20224761962890625, -0.39870452880859375, 0.7301559448242188, 1.0695877075195312, 2.8573379516601562, 1.0042343139648438, 6.215301513671875, -0.4965972900390625, -0.8400955200195312, 1.0667648315429688, 4.742961883544922, 0.0103302001953125, 6.652587890625, 3.25518798828125, 2.2239723205566406, 6.014804840087891, 0.73699951171875, 4.022972106933594, 4.3858642578125, 4.186103820800781, -0.17946434020996094, -0.44142913818359375, 0.8512096405029297, -0.7558326721191406, 2.520599365234375, -0.28199005126953125, -0.4074687957763672, 0.066131591796875, 9.052520751953125, -1.870697021484375, -0.419189453125, 0.6143264770507812, 2.5691986083984375, -1.7031707763671875, 3.295032501220703, 2.3629226684570312, 1.4604949951171875, -0.4303779602050781, 0.3787841796875, -2.5466156005859375, -0.084381103515625, 1.192108154296875, 4.072929382324219, -2.9544811248779297, 3.591306686401367, -0.764068603515625, 5.832344055175781, 1.2957763671875, 1.0331192016601562, 3.2489776611328125, -1.99365234375, -0.5532150268554688, -1.1001758575439453, 0.00899505615234375, 2.846210479736328, 1.9889144897460938, 0.8468265533447266, 3.2134552001953125, 0.8500938415527344, 1.489217758178711, -0.02153778076171875, 2.4894561767578125], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000653.npy"} +{"epoch": 0.9871504157218443, "step": 654, "batch_size": 64, "mean": 1.8974156379699707, "std": 2.5741994380950928, "min": -4.085235595703125, "p10": -0.7750770568847656, "median": 1.3309545516967773, "p90": 5.377378082275393, "max": 9.4322509765625, "pos_frac": 0.828125, "sample": [1.5673904418945312, 3.66375732421875, -1.368072509765625, 1.5309333801269531, 0.37723541259765625, -2.0862350463867188, 2.641407012939453, 0.7827320098876953, 2.4572792053222656, 1.3643569946289062, 0.23737335205078125, -1.381072998046875, 6.120475769042969, 1.2834701538085938, -0.4215240478515625, 1.0598411560058594, 0.8639297485351562, -1.5687923431396484, 4.555902481079102, 0.32118988037109375, 1.1873397827148438, 1.5772972106933594, 4.340461730957031, 8.863323211669922, 0.3668975830078125, 0.7681808471679688, 0.027997970581054688, -0.8161087036132812, -0.13993072509765625, 1.8689651489257812, 0.8426589965820312, 2.56201171875, 9.4322509765625, 0.7018527984619141, 2.324249267578125, 2.843292236328125, 2.05731201171875, 0.3712577819824219, 1.1676712036132812, 0.5987396240234375, 3.757139205932617, -4.085235595703125, 2.916748046875, -1.1329917907714844, 1.5600109100341797, -0.6793365478515625, 7.036273956298828, 5.664787292480469, 0.8907108306884766, 1.8981742858886719, 0.3170013427734375, 8.250991821289062, 1.2975521087646484, 3.7183151245117188, 5.838020324707031, -0.5784149169921875, 2.9068756103515625, 1.4879188537597656, 0.0998382568359375, 3.5297012329101562, 3.9632720947265625, 1.1891632080078125, 3.9340362548828125, 4.706756591796875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000654.npy"} +{"epoch": 0.9886621315192744, "step": 655, "batch_size": 64, "mean": 1.825459599494934, "std": 2.1549086570739746, "min": -2.168804168701172, "p10": -0.5314635276794432, "median": 1.6167879104614258, "p90": 4.688555908203125, "max": 10.069854736328125, "pos_frac": 0.859375, "sample": [3.014087677001953, 0.6783714294433594, -0.36522674560546875, 1.5501155853271484, 0.810272216796875, 6.205146789550781, 2.804901123046875, 1.6834602355957031, 0.14031982421875, 1.7254714965820312, 0.09654998779296875, 1.1472930908203125, 2.3736801147460938, 0.21955108642578125, -1.1489028930664062, -0.8123626708984375, 2.019254684448242, 3.4835586547851562, 6.235832214355469, 0.0135650634765625, 4.846248626708984, -2.0587997436523438, 4.044965744018555, 2.3838958740234375, 1.0624008178710938, 0.20526885986328125, 0.90008544921875, -1.6383552551269531, 5.28399658203125, 1.1849193572998047, 2.230194091796875, 0.7335052490234375, 2.2001495361328125, 0.46895599365234375, 4.742645263671875, 0.6727981567382812, 1.169015884399414, 3.119701385498047, 2.319061279296875, 10.069854736328125, 0.39716339111328125, -1.7158660888671875, -0.3155059814453125, 4.2641448974609375, 0.8105850219726562, -2.168804168701172, 1.836263656616211, 2.5637969970703125, 4.562347412109375, 3.1261749267578125, 1.2677574157714844, 1.028961181640625, 3.0267333984375, 0.9082908630371094, 0.7868766784667969, 2.01568603515625, 4.977672576904297, 2.0648193359375, 3.3183212280273438, 1.22918701171875, -0.6027078628540039, 2.333303451538086, 2.2916412353515625, 3.0071258544921875], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000655.npy"} +{"epoch": 0.9901738473167044, "step": 656, "batch_size": 64, "mean": 2.1840014457702637, "std": 2.5649070739746094, "min": -2.9134464263916016, "p10": -0.5784217834472656, "median": 1.564620018005371, "p90": 5.663577270507814, "max": 10.062545776367188, "pos_frac": 0.84375, "sample": [5.928363800048828, 0.33907318115234375, 4.995811462402344, 2.0020294189453125, 0.7813568115234375, 0.844268798828125, 0.7238578796386719, 0.6391448974609375, 3.557098388671875, -0.254302978515625, 1.0557479858398438, 4.7778167724609375, 0.9942054748535156, -0.5784378051757812, 0.6212673187255859, 4.3755340576171875, 1.5312347412109375, -1.0826339721679688, 1.4959793090820312, 3.8326663970947266, 10.062545776367188, 2.1659393310546875, -2.9134464263916016, 4.452716827392578, 4.4745330810546875, -1.2499198913574219, 0.5498046875, 5.4123992919921875, -1.5023956298828125, 0.10733413696289062, 7.378425598144531, 0.527069091796875, 1.5980052947998047, 1.4000320434570312, 0.440887451171875, 8.146942138671875, 2.826141357421875, 0.7434196472167969, 4.5418548583984375, 3.728271484375, 2.4636688232421875, -0.7634735107421875, 0.9835891723632812, 3.740314483642578, 2.3686981201171875, 2.3249244689941406, 0.5021419525146484, 4.296257019042969, 0.2467041015625, 2.659820556640625, 5.7712249755859375, -1.6750259399414062, 1.864471435546875, 6.368019104003906, 3.5418701171875, 1.8109664916992188, 0.3885841369628906, -0.5783843994140625, 0.6116561889648438, 1.436452865600586, -0.4827728271484375, 1.92572021484375, 3.144510269165039, 7.355522155761719], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000656.npy"} +{"epoch": 0.9916855631141346, "step": 657, "batch_size": 64, "mean": 2.0198724269866943, "std": 2.6450440883636475, "min": -3.3935546875, "p10": -1.2594583511352537, "median": 1.8894729614257812, "p90": 5.457502365112306, "max": 9.216983795166016, "pos_frac": 0.765625, "sample": [4.770538330078125, 2.7521514892578125, 0.06796646118164062, 3.5542144775390625, 2.5439205169677734, 1.4648857116699219, 5.778472900390625, 6.283298492431641, 4.129547119140625, -2.6834144592285156, -0.4851264953613281, -1.1242656707763672, 2.1093902587890625, 0.10532951354980469, 4.427001953125, 1.0784072875976562, 4.1136016845703125, 7.078973770141602, -1.7840118408203125, 3.0913772583007812, -1.931732177734375, 2.699462890625, -1.5842514038085938, 1.0326557159423828, 3.018064498901367, 5.530086517333984, 1.933502197265625, 1.525390625, -0.2625732421875, 1.7350997924804688, 3.741912841796875, 2.9900455474853516, 1.8454437255859375, 5.288139343261719, 3.020204544067383, 1.502044677734375, 2.541667938232422, 9.216983795166016, 4.497367858886719, 0.6477031707763672, 2.720001220703125, -0.3760986328125, 0.08001136779785156, 3.4550857543945312, -0.8015670776367188, -1.7988128662109375, 8.893997192382812, 0.5076122283935547, -0.945587158203125, 1.3351306915283203, 1.1719207763671875, -3.3935546875, 2.8315582275390625, 0.0468597412109375, 1.1467151641845703, -0.3968353271484375, -1.3173980712890625, 2.893526077270508, 3.85992431640625, 5.903709411621094, -0.2240753173828125, 2.810344696044922, 0.534027099609375, 4.075868606567383], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000657.npy"} +{"epoch": 0.9931972789115646, "step": 658, "batch_size": 64, "mean": 1.656302809715271, "std": 2.801319122314453, "min": -3.591278076171875, "p10": -1.089685821533203, "median": 0.8039951324462891, "p90": 5.77467956542969, "max": 10.115198135375977, "pos_frac": 0.71875, "sample": [2.7540817260742188, 0.8353767395019531, 3.5032958984375, -1.493255615234375, 0.5080108642578125, 0.38994789123535156, -3.502979278564453, -0.14617919921875, 2.278566360473633, 4.2009735107421875, 0.7079925537109375, 0.9678802490234375, -3.591278076171875, 5.987865447998047, 3.097442626953125, 0.9484481811523438, -0.4757232666015625, 0.772613525390625, 7.5343017578125, -0.431243896484375, 0.05665779113769531, 0.192108154296875, -0.14295196533203125, 3.686267852783203, 5.06414794921875, 4.477729797363281, -0.9471282958984375, 2.372884750366211, 4.201179504394531, 1.0622520446777344, 0.10925102233886719, -0.38953590393066406, 1.2382087707519531, 4.2606353759765625, 0.3065376281738281, 3.9140701293945312, 5.3075714111328125, -1.1110496520996094, -0.19884109497070312, 0.556793212890625, 5.9748687744140625, 0.5580234527587891, 2.412689208984375, 4.406044006347656, -0.6375675201416016, 2.403057098388672, 0.414093017578125, 6.0461273193359375, 10.115198135375977, 1.5301094055175781, 0.061248779296875, -1.1875247955322266, -0.5514488220214844, -3.437807083129883, 1.9306640625, 2.2772979736328125, -1.0398368835449219, -2.2894248962402344, 7.112945556640625, -0.7914390563964844, 6.59332275390625, 0.44867515563964844, 4.3185577392578125, 0.4725761413574219], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000658.npy"} +{"epoch": 0.9947089947089947, "step": 659, "batch_size": 64, "mean": 1.394887089729309, "std": 2.515268564224243, "min": -4.593311309814453, "p10": -1.5231531143188477, "median": 1.1660995483398438, "p90": 4.718669128417971, "max": 7.182914733886719, "pos_frac": 0.6875, "sample": [1.1778717041015625, 3.3704471588134766, 5.9673309326171875, 1.6991539001464844, -0.28215980529785156, 4.9637603759765625, 0.9703559875488281, 3.2969608306884766, 1.5348396301269531, 6.7993316650390625, 5.06842041015625, -1.6525421142578125, 0.7163848876953125, -0.2895240783691406, 0.5543003082275391, 1.154327392578125, 2.5771865844726562, -2.6015777587890625, 2.2487010955810547, -2.8125457763671875, 4.009376525878906, -0.18080520629882812, -0.04041290283203125, 3.422607421875, 0.6178569793701172, -0.516510009765625, 1.0055694580078125, 1.7233848571777344, -0.1268749237060547, 3.5284271240234375, 0.9801177978515625, 0.43350982666015625, 3.80108642578125, 1.117706298828125, 0.060699462890625, -0.6725082397460938, 5.588005065917969, 3.2771072387695312, 7.182914733886719, 1.8131484985351562, 3.14080810546875, 0.54010009765625, -1.5325927734375, 2.5677146911621094, -0.1820964813232422, 0.21573638916015625, 3.2796268463134766, 2.6736221313476562, 1.1939544677734375, -1.1790237426757812, 1.2972412109375, 2.2156429290771484, -4.593311309814453, -2.0290088653564453, 4.14678955078125, 1.7614212036132812, -0.20195388793945312, -4.095161437988281, 7.175323486328125, 1.8021469116210938, -1.5011272430419922, 3.0317249298095703, -0.7588348388671875, -1.181396484375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000659.npy"} +{"epoch": 0.9962207105064248, "step": 660, "batch_size": 64, "mean": 1.917556881904602, "std": 2.366765022277832, "min": -3.2264404296875, "p10": -0.7596099853515624, "median": 1.8877229690551758, "p90": 5.358875274658207, "max": 8.858604431152344, "pos_frac": 0.828125, "sample": [6.181407928466797, 1.8115234375, 0.2009906768798828, 0.5942459106445312, 2.4126853942871094, -0.8743247985839844, 0.533233642578125, 2.938709259033203, -1.2724761962890625, -3.2264404296875, 6.415958404541016, 8.858604431152344, 3.118976593017578, 1.2869949340820312, 2.0244312286376953, 3.54693603515625, 3.4635238647460938, 1.9896373748779297, 1.8692302703857422, -0.8037567138671875, 1.0724258422851562, -2.489990234375, 6.142936706542969, 0.3898773193359375, 2.3235015869140625, 5.738761901855469, -1.5432281494140625, 1.0869522094726562, 2.2576332092285156, -0.6566009521484375, 0.23395347595214844, 3.10162353515625, 7.455078125, 1.7181396484375, 0.5097751617431641, 0.057342529296875, 1.5596084594726562, 2.0328025817871094, 2.781169891357422, -0.6469879150390625, 2.0322799682617188, 2.930387496948242, 3.5480575561523438, 2.060087203979492, 4.47247314453125, 3.115070343017578, -0.930450439453125, -0.3983001708984375, 2.5849552154541016, 3.6704483032226562, 1.0735969543457031, 7.538032531738281, 0.41916656494140625, 0.36940956115722656, 3.4158859252929688, 0.9785537719726562, 1.9389686584472656, 2.8369293212890625, 1.3543167114257812, 0.04243278503417969, 0.415313720703125, 1.9062156677246094, -0.2921905517578125, 3.4471282958984375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000660.npy"} +{"epoch": 0.9977324263038548, "step": 661, "batch_size": 64, "mean": 1.730008840560913, "std": 2.638085126876831, "min": -4.637947082519531, "p10": -1.5019088745117186, "median": 1.5706424713134766, "p90": 5.536384201049805, "max": 6.886390686035156, "pos_frac": 0.78125, "sample": [0.8999481201171875, -1.0488967895507812, -2.1782684326171875, -1.1848526000976562, 3.489349365234375, 1.261932373046875, 5.550067901611328, 0.414764404296875, -4.4096221923828125, 2.0312862396240234, -1.8846588134765625, 0.320892333984375, 0.22088623046875, 5.206016540527344, 3.5196380615234375, 5.50445556640625, 6.886390686035156, 1.4921760559082031, 5.845672607421875, 0.5517425537109375, -4.637947082519531, 2.10882568359375, 5.770509719848633, 0.34024620056152344, -0.7412872314453125, 1.64910888671875, 0.9523143768310547, -1.7755966186523438, 0.6083908081054688, 0.4770965576171875, 3.888622283935547, 2.246591567993164, -3.0829811096191406, 6.083030700683594, 5.95611572265625, 0.8895225524902344, 2.57745361328125, 5.219932556152344, 0.8448982238769531, 4.7821197509765625, 3.5512847900390625, 1.2398452758789062, 0.4104156494140625, 5.624683380126953, 3.93707275390625, 1.9777374267578125, 5.236335754394531, 0.8777332305908203, 0.7376613616943359, 1.2812213897705078, -1.2996826171875, 2.5203094482421875, -1.5885772705078125, -0.13239097595214844, 2.2995567321777344, 3.3813705444335938, 2.1549301147460938, 1.7088623046875, 1.81097412109375, 4.988067626953125, 2.5947189331054688, -1.115203857421875, -0.8967666625976562, 2.7745208740234375], "npy": "outputs/llama-3-8b-base-new-dpo-hh-harmless-4xh200-batch-64-q_t-0.5-s_star-1.0/margin_logs/step_0000661.npy"} diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..03c1da3 --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d494715da1d6ba19155be3534fc81b7e9269cb35ddc288ca96f9cc6364b413e +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..a385562 --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934f9082dd3a245583c5f3fee9fbe0e3fe31ea43871afbebfa8f83b1748b96fb +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..d751643 --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83d5ee0632d0b5921c268fbb09bcf63c64a947ee206b320d4991192bfe4105bd +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..d688710 --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6839abb413e5ff7618597a4eb7574ecf5d666b4983a49d3f51f4424ff2da894 +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..f6e49fe --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0c981dbfd8e7d1fb805cc705fccca32a2898307d3b88cf40a4e64cec1db739 +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..95a8eca --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:744c7e3ffa02a6d236d2b70e85da707de296563d6c050218fdda7aa505cb661b +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..7050ba4 --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b47f2671a1dc3b131bddbabad3141fd3ae5845a1827e99cbd50930fb3d6a37 +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..cfc9393 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 0.999244142101285, + "total_flos": 0.0, + "train_loss": 1.1173522615757363, + "train_runtime": 1752.852, + "train_samples": 42336, + "train_samples_per_second": 24.153, + "train_steps_per_second": 0.377 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..9e19e4b --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,12653 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.999244142101285, + "eval_steps": 200, + "global_step": 661, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0015117157974300832, + "fcm_dpo/beta": 0.11051072180271149, + "fcm_dpo/delta": 0.49971169233322144, + "fcm_dpo/margin": -0.0013532638549804688, + "fcm_dpo/q_t": 0.5000448226928711, + "grad_norm": 31.18895149230957, + "learning_rate": 0.0, + "logits/chosen": 0.13337239623069763, + "logits/rejected": 0.12492949515581131, + "logps/chosen": -64.5841293334961, + "logps/ref_chosen": -64.61280822753906, + "logps/ref_rejected": -64.17195129394531, + "logps/rejected": -64.14192199707031, + "loss": 1.3866, + "margin_dpo/margin_mean": -0.0013527870178222656, + "margin_dpo/margin_std": 0.2561596930027008, + "step": 1 + }, + { + "epoch": 0.0030234315948601664, + "fcm_dpo/beta": 0.11625976860523224, + "fcm_dpo/delta": 0.49490365386009216, + "fcm_dpo/margin": 0.037450045347213745, + "fcm_dpo/q_t": 0.4989655911922455, + "grad_norm": 32.422725677490234, + "learning_rate": 7.462686567164179e-09, + "logits/chosen": 0.09414851665496826, + "logits/rejected": 0.07363267242908478, + "logps/chosen": -56.101890563964844, + "logps/ref_chosen": -56.0989990234375, + "logps/ref_rejected": -66.59971618652344, + "logps/rejected": -66.64006042480469, + "loss": 1.3819, + "margin_dpo/margin_mean": 0.03744968771934509, + "margin_dpo/margin_std": 0.27811938524246216, + "step": 2 + }, + { + "epoch": 0.0045351473922902496, + "fcm_dpo/beta": 0.12837310135364532, + "fcm_dpo/delta": 0.4961715638637543, + "fcm_dpo/margin": 0.002266407012939453, + "fcm_dpo/q_t": 0.4999309778213501, + "grad_norm": 40.0953483581543, + "learning_rate": 1.4925373134328357e-08, + "logits/chosen": 0.0993770956993103, + "logits/rejected": 0.06136491894721985, + "logps/chosen": -65.4115219116211, + "logps/ref_chosen": -65.45726013183594, + "logps/ref_rejected": -90.82853698730469, + "logps/rejected": -90.78506469726562, + "loss": 1.3859, + "margin_dpo/margin_mean": 0.0022667646408081055, + "margin_dpo/margin_std": 0.26775944232940674, + "step": 3 + }, + { + "epoch": 0.006046863189720333, + "fcm_dpo/beta": 0.14178214967250824, + "fcm_dpo/delta": 0.4972817301750183, + "fcm_dpo/margin": -0.031194627285003662, + "fcm_dpo/q_t": 0.5010493993759155, + "grad_norm": 48.728790283203125, + "learning_rate": 2.2388059701492534e-08, + "logits/chosen": 0.10073457658290863, + "logits/rejected": 0.08476720750331879, + "logps/chosen": -76.87289428710938, + "logps/ref_chosen": -76.86018371582031, + "logps/ref_rejected": -79.91523742675781, + "logps/rejected": -79.8967514038086, + "loss": 1.3908, + "margin_dpo/margin_mean": -0.031194984912872314, + "margin_dpo/margin_std": 0.3357463479042053, + "step": 4 + }, + { + "epoch": 0.007558578987150416, + "fcm_dpo/beta": 0.1644459217786789, + "fcm_dpo/delta": 0.49898889660835266, + "fcm_dpo/margin": -0.022104412317276, + "fcm_dpo/q_t": 0.5009359121322632, + "grad_norm": 48.499725341796875, + "learning_rate": 2.9850746268656714e-08, + "logits/chosen": 0.08101461827754974, + "logits/rejected": 0.04222995042800903, + "logps/chosen": -62.999996185302734, + "logps/ref_chosen": -62.97134017944336, + "logps/ref_rejected": -79.9192123413086, + "logps/rejected": -79.92576599121094, + "loss": 1.3905, + "margin_dpo/margin_mean": -0.02210336923599243, + "margin_dpo/margin_std": 0.283627986907959, + "step": 5 + }, + { + "epoch": 0.009070294784580499, + "fcm_dpo/beta": 0.1644459217786789, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.07127216458320618, + "fcm_dpo/q_t": 0.5029286742210388, + "grad_norm": 48.75336456298828, + "learning_rate": 3.731343283582089e-08, + "logits/chosen": 0.13917648792266846, + "logits/rejected": 0.09997415542602539, + "logps/chosen": -51.33795166015625, + "logps/ref_chosen": -51.30736541748047, + "logps/ref_rejected": -82.77239227294922, + "logps/rejected": -82.73170471191406, + "loss": 1.3986, + "margin_dpo/margin_mean": -0.07127270102500916, + "margin_dpo/margin_std": 0.29276320338249207, + "step": 6 + }, + { + "epoch": 0.010582010582010581, + "fcm_dpo/beta": 0.1816796362400055, + "fcm_dpo/delta": 0.4983155131340027, + "fcm_dpo/margin": -0.0058057308197021484, + "fcm_dpo/q_t": 0.500307023525238, + "grad_norm": 49.4698600769043, + "learning_rate": 4.477611940298507e-08, + "logits/chosen": 0.017587212845683098, + "logits/rejected": -0.02612943761050701, + "logps/chosen": -51.4460334777832, + "logps/ref_chosen": -51.45941162109375, + "logps/ref_rejected": -66.3828125, + "logps/rejected": -66.3636245727539, + "loss": 1.388, + "margin_dpo/margin_mean": -0.005805850028991699, + "margin_dpo/margin_std": 0.2854662537574768, + "step": 7 + }, + { + "epoch": 0.012093726379440665, + "fcm_dpo/beta": 0.1816796362400055, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.029055893421173096, + "fcm_dpo/q_t": 0.5013211965560913, + "grad_norm": 52.10679244995117, + "learning_rate": 5.223880597014925e-08, + "logits/chosen": 0.09742860496044159, + "logits/rejected": 0.07472395896911621, + "logps/chosen": -62.214691162109375, + "logps/ref_chosen": -62.197547912597656, + "logps/ref_rejected": -74.66180419921875, + "logps/rejected": -74.64989471435547, + "loss": 1.3925, + "margin_dpo/margin_mean": -0.02905610203742981, + "margin_dpo/margin_std": 0.3362266421318054, + "step": 8 + }, + { + "epoch": 0.013605442176870748, + "fcm_dpo/beta": 0.21118226647377014, + "fcm_dpo/delta": 0.9957462549209595, + "fcm_dpo/margin": 0.02298596501350403, + "fcm_dpo/q_t": 0.498937726020813, + "grad_norm": 66.31742858886719, + "learning_rate": 5.970149253731343e-08, + "logits/chosen": 0.17290404438972473, + "logits/rejected": 0.1132848858833313, + "logps/chosen": -55.65576934814453, + "logps/ref_chosen": -55.629722595214844, + "logps/ref_rejected": -86.21221923828125, + "logps/rejected": -86.2612533569336, + "loss": 1.3826, + "margin_dpo/margin_mean": 0.02298620343208313, + "margin_dpo/margin_std": 0.30049267411231995, + "step": 9 + }, + { + "epoch": 0.015117157974300832, + "fcm_dpo/beta": 0.24479255080223083, + "fcm_dpo/delta": 0.4950849413871765, + "fcm_dpo/margin": 0.01253315806388855, + "fcm_dpo/q_t": 0.4993648827075958, + "grad_norm": 72.88284301757812, + "learning_rate": 6.71641791044776e-08, + "logits/chosen": 0.11379828304052353, + "logits/rejected": 0.08385583758354187, + "logps/chosen": -62.68762969970703, + "logps/ref_chosen": -62.69060134887695, + "logps/ref_rejected": -90.610107421875, + "logps/rejected": -90.61967468261719, + "loss": 1.3853, + "margin_dpo/margin_mean": 0.01253288984298706, + "margin_dpo/margin_std": 0.3701857328414917, + "step": 10 + }, + { + "epoch": 0.016628873771730914, + "fcm_dpo/beta": 0.26968804001808167, + "fcm_dpo/delta": 0.484273761510849, + "fcm_dpo/margin": 0.04408371448516846, + "fcm_dpo/q_t": 0.4974249601364136, + "grad_norm": 79.30926513671875, + "learning_rate": 7.462686567164178e-08, + "logits/chosen": 0.10774752497673035, + "logits/rejected": 0.10090956091880798, + "logps/chosen": -65.75579071044922, + "logps/ref_chosen": -65.76712036132812, + "logps/ref_rejected": -72.4764633178711, + "logps/rejected": -72.50921630859375, + "loss": 1.376, + "margin_dpo/margin_mean": 0.04408392310142517, + "margin_dpo/margin_std": 0.2787271738052368, + "step": 11 + }, + { + "epoch": 0.018140589569160998, + "fcm_dpo/beta": 0.26968804001808167, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.013816118240356445, + "fcm_dpo/q_t": 0.5009292364120483, + "grad_norm": 76.66712951660156, + "learning_rate": 8.208955223880596e-08, + "logits/chosen": 0.03750212490558624, + "logits/rejected": 0.02106173150241375, + "logps/chosen": -60.710899353027344, + "logps/ref_chosen": -60.704891204833984, + "logps/ref_rejected": -69.41564178466797, + "logps/rejected": -69.4078369140625, + "loss": 1.3914, + "margin_dpo/margin_mean": -0.013815999031066895, + "margin_dpo/margin_std": 0.2728922367095947, + "step": 12 + }, + { + "epoch": 0.019652305366591082, + "fcm_dpo/beta": 0.26968804001808167, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.035725027322769165, + "fcm_dpo/q_t": 0.5023995637893677, + "grad_norm": 78.57530212402344, + "learning_rate": 8.955223880597014e-08, + "logits/chosen": 0.10675495862960815, + "logits/rejected": 0.044550854712724686, + "logps/chosen": -49.91514587402344, + "logps/ref_chosen": -49.90925598144531, + "logps/ref_rejected": -92.37818145751953, + "logps/rejected": -92.34834289550781, + "loss": 1.3971, + "margin_dpo/margin_mean": -0.03572601079940796, + "margin_dpo/margin_std": 0.24918314814567566, + "step": 13 + }, + { + "epoch": 0.021164021164021163, + "fcm_dpo/beta": 0.31274157762527466, + "fcm_dpo/delta": 0.9786568880081177, + "fcm_dpo/margin": 0.075018972158432, + "fcm_dpo/q_t": 0.49468034505844116, + "grad_norm": 91.86367797851562, + "learning_rate": 9.701492537313432e-08, + "logits/chosen": 0.08078277111053467, + "logits/rejected": 0.06322959065437317, + "logps/chosen": -60.599849700927734, + "logps/ref_chosen": -60.61879348754883, + "logps/ref_rejected": -71.79306030273438, + "logps/rejected": -71.84913635253906, + "loss": 1.3649, + "margin_dpo/margin_mean": 0.07501909136772156, + "margin_dpo/margin_std": 0.2812075614929199, + "step": 14 + }, + { + "epoch": 0.022675736961451247, + "fcm_dpo/beta": 0.3279946446418762, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.02524346113204956, + "fcm_dpo/q_t": 0.5020579099655151, + "grad_norm": 109.66602325439453, + "learning_rate": 1.044776119402985e-07, + "logits/chosen": 0.06694771349430084, + "logits/rejected": 0.023975659161806107, + "logps/chosen": -63.49495315551758, + "logps/ref_chosen": -63.46953582763672, + "logps/ref_rejected": -88.88951110839844, + "logps/rejected": -88.88968658447266, + "loss": 1.3976, + "margin_dpo/margin_mean": -0.025244086980819702, + "margin_dpo/margin_std": 0.3348177969455719, + "step": 15 + }, + { + "epoch": 0.02418745275888133, + "fcm_dpo/beta": 0.34503084421157837, + "fcm_dpo/delta": 0.4941604435443878, + "fcm_dpo/margin": 0.014522925019264221, + "fcm_dpo/q_t": 0.4988110661506653, + "grad_norm": 93.19413757324219, + "learning_rate": 1.1194029850746268e-07, + "logits/chosen": 0.11576000601053238, + "logits/rejected": 0.07864248752593994, + "logps/chosen": -46.55487823486328, + "logps/ref_chosen": -46.53229904174805, + "logps/ref_rejected": -74.27533721923828, + "logps/rejected": -74.31243896484375, + "loss": 1.3833, + "margin_dpo/margin_mean": 0.014522776007652283, + "margin_dpo/margin_std": 0.27633634209632874, + "step": 16 + }, + { + "epoch": 0.025699168556311415, + "fcm_dpo/beta": 0.39912450313568115, + "fcm_dpo/delta": 0.487219899892807, + "fcm_dpo/margin": -0.00017780065536499023, + "fcm_dpo/q_t": 0.5003235936164856, + "grad_norm": 134.72512817382812, + "learning_rate": 1.1940298507462686e-07, + "logits/chosen": 0.04781803488731384, + "logits/rejected": 0.02929597906768322, + "logps/chosen": -64.07421875, + "logps/ref_chosen": -64.07783508300781, + "logps/ref_rejected": -86.40876770019531, + "logps/rejected": -86.40497589111328, + "loss": 1.3906, + "margin_dpo/margin_mean": -0.00017789006233215332, + "margin_dpo/margin_std": 0.3190717101097107, + "step": 17 + }, + { + "epoch": 0.027210884353741496, + "fcm_dpo/beta": 0.4197884202003479, + "fcm_dpo/delta": 0.4926441013813019, + "fcm_dpo/margin": -0.029925107955932617, + "fcm_dpo/q_t": 0.5029613971710205, + "grad_norm": 120.34202575683594, + "learning_rate": 1.2686567164179106e-07, + "logits/chosen": 0.10955735296010971, + "logits/rejected": 0.06258425116539001, + "logps/chosen": -44.86344528198242, + "logps/ref_chosen": -44.87433624267578, + "logps/ref_rejected": -70.97604370117188, + "logps/rejected": -70.93523406982422, + "loss": 1.4019, + "margin_dpo/margin_mean": -0.029924869537353516, + "margin_dpo/margin_std": 0.30913057923316956, + "step": 18 + }, + { + "epoch": 0.02872260015117158, + "fcm_dpo/beta": 0.5113855004310608, + "fcm_dpo/delta": 0.9829479455947876, + "fcm_dpo/margin": 0.03535567224025726, + "fcm_dpo/q_t": 0.4958151578903198, + "grad_norm": 156.30233764648438, + "learning_rate": 1.343283582089552e-07, + "logits/chosen": 0.07092909514904022, + "logits/rejected": 0.05735887587070465, + "logps/chosen": -68.16175842285156, + "logps/ref_chosen": -68.1598129272461, + "logps/ref_rejected": -81.17138671875, + "logps/rejected": -81.20869445800781, + "loss": 1.3751, + "margin_dpo/margin_mean": 0.03535632789134979, + "margin_dpo/margin_std": 0.33560460805892944, + "step": 19 + }, + { + "epoch": 0.030234315948601664, + "fcm_dpo/beta": 0.621636152267456, + "fcm_dpo/delta": 0.9787266254425049, + "fcm_dpo/margin": 0.03790883719921112, + "fcm_dpo/q_t": 0.49468833208084106, + "grad_norm": 184.15403747558594, + "learning_rate": 1.4179104477611938e-07, + "logits/chosen": 0.11737000942230225, + "logits/rejected": 0.09504500031471252, + "logps/chosen": -53.65413284301758, + "logps/ref_chosen": -53.67856216430664, + "logps/ref_rejected": -74.16911315917969, + "logps/rejected": -74.18260192871094, + "loss": 1.3704, + "margin_dpo/margin_mean": 0.03790910542011261, + "margin_dpo/margin_std": 0.27916550636291504, + "step": 20 + }, + { + "epoch": 0.031746031746031744, + "fcm_dpo/beta": 0.719708263874054, + "fcm_dpo/delta": 0.49354374408721924, + "fcm_dpo/margin": 0.006038039922714233, + "fcm_dpo/q_t": 0.49931585788726807, + "grad_norm": 208.18948364257812, + "learning_rate": 1.4925373134328355e-07, + "logits/chosen": 0.09322724491357803, + "logits/rejected": 0.06828559935092926, + "logps/chosen": -64.68141174316406, + "logps/ref_chosen": -64.70155334472656, + "logps/ref_rejected": -81.02095031738281, + "logps/rejected": -81.0068359375, + "loss": 1.397, + "margin_dpo/margin_mean": 0.0060374438762664795, + "margin_dpo/margin_std": 0.34163737297058105, + "step": 21 + }, + { + "epoch": 0.03325774754346183, + "fcm_dpo/beta": 0.7948847413063049, + "fcm_dpo/delta": 0.4967557489871979, + "fcm_dpo/margin": -0.008793145418167114, + "fcm_dpo/q_t": 0.501756489276886, + "grad_norm": 235.72225952148438, + "learning_rate": 1.5671641791044775e-07, + "logits/chosen": 0.020929213613271713, + "logits/rejected": -0.0007745649782009423, + "logps/chosen": -58.05523681640625, + "logps/ref_chosen": -58.03599166870117, + "logps/ref_rejected": -80.72721862792969, + "logps/rejected": -80.7376708984375, + "loss": 1.4021, + "margin_dpo/margin_mean": -0.008793413639068604, + "margin_dpo/margin_std": 0.23543663322925568, + "step": 22 + }, + { + "epoch": 0.03476946334089191, + "fcm_dpo/beta": 0.8356242179870605, + "fcm_dpo/delta": 0.4879206120967865, + "fcm_dpo/margin": -0.007578670978546143, + "fcm_dpo/q_t": 0.5014785528182983, + "grad_norm": 285.7133483886719, + "learning_rate": 1.6417910447761193e-07, + "logits/chosen": 0.1404346227645874, + "logits/rejected": 0.11506737768650055, + "logps/chosen": -66.38188934326172, + "logps/ref_chosen": -66.35608673095703, + "logps/ref_rejected": -93.02769470214844, + "logps/rejected": -93.04591369628906, + "loss": 1.4157, + "margin_dpo/margin_mean": -0.007578998804092407, + "margin_dpo/margin_std": 0.3775022029876709, + "step": 23 + }, + { + "epoch": 0.036281179138321996, + "fcm_dpo/beta": 0.8763637542724609, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.023401737213134766, + "fcm_dpo/q_t": 0.5050784349441528, + "grad_norm": 237.49981689453125, + "learning_rate": 1.716417910447761e-07, + "logits/chosen": 0.13775691390037537, + "logits/rejected": 0.10474318265914917, + "logps/chosen": -54.4842529296875, + "logps/ref_chosen": -54.461238861083984, + "logps/ref_rejected": -68.33817291259766, + "logps/rejected": -68.33778381347656, + "loss": 1.4189, + "margin_dpo/margin_mean": -0.02340218424797058, + "margin_dpo/margin_std": 0.2496114820241928, + "step": 24 + }, + { + "epoch": 0.03779289493575208, + "fcm_dpo/beta": 1.0109740495681763, + "fcm_dpo/delta": 0.933335542678833, + "fcm_dpo/margin": 0.07055863738059998, + "fcm_dpo/q_t": 0.48359498381614685, + "grad_norm": 284.9603271484375, + "learning_rate": 1.7910447761194027e-07, + "logits/chosen": 0.09323587268590927, + "logits/rejected": 0.04254044592380524, + "logps/chosen": -60.0122184753418, + "logps/ref_chosen": -60.00420379638672, + "logps/ref_rejected": -90.47376251220703, + "logps/rejected": -90.55233764648438, + "loss": 1.3345, + "margin_dpo/margin_mean": 0.07055890560150146, + "margin_dpo/margin_std": 0.27643194794654846, + "step": 25 + }, + { + "epoch": 0.039304610733182165, + "fcm_dpo/beta": 1.2268295288085938, + "fcm_dpo/delta": 0.9872031211853027, + "fcm_dpo/margin": 0.011123806238174438, + "fcm_dpo/q_t": 0.49779731035232544, + "grad_norm": 363.8032531738281, + "learning_rate": 1.8656716417910447e-07, + "logits/chosen": 0.12300634384155273, + "logits/rejected": 0.1041463315486908, + "logps/chosen": -56.83332061767578, + "logps/ref_chosen": -56.81915283203125, + "logps/ref_rejected": -77.84333038330078, + "logps/rejected": -77.86862182617188, + "loss": 1.418, + "margin_dpo/margin_mean": 0.011123299598693848, + "margin_dpo/margin_std": 0.3511189818382263, + "step": 26 + }, + { + "epoch": 0.04081632653061224, + "fcm_dpo/beta": 1.4186409711837769, + "fcm_dpo/delta": 0.4878283143043518, + "fcm_dpo/margin": -0.018930166959762573, + "fcm_dpo/q_t": 0.5060181617736816, + "grad_norm": 426.4020690917969, + "learning_rate": 1.9402985074626865e-07, + "logits/chosen": 0.10950794070959091, + "logits/rejected": 0.08432013541460037, + "logps/chosen": -62.87256622314453, + "logps/ref_chosen": -62.87702560424805, + "logps/ref_rejected": -71.34437561035156, + "logps/rejected": -71.32098388671875, + "loss": 1.465, + "margin_dpo/margin_mean": -0.0189303457736969, + "margin_dpo/margin_std": 0.3256291151046753, + "step": 27 + }, + { + "epoch": 0.042328042328042326, + "fcm_dpo/beta": 1.5392485857009888, + "fcm_dpo/delta": 0.40797513723373413, + "fcm_dpo/margin": 0.019986987113952637, + "fcm_dpo/q_t": 0.49472981691360474, + "grad_norm": 428.8187561035156, + "learning_rate": 2.0149253731343282e-07, + "logits/chosen": 0.05481361597776413, + "logits/rejected": 0.04620601236820221, + "logps/chosen": -59.83418273925781, + "logps/ref_chosen": -59.8333740234375, + "logps/ref_rejected": -70.39804077148438, + "logps/rejected": -70.4188232421875, + "loss": 1.4194, + "margin_dpo/margin_mean": 0.019986748695373535, + "margin_dpo/margin_std": 0.31196290254592896, + "step": 28 + }, + { + "epoch": 0.04383975812547241, + "fcm_dpo/beta": 1.7656760215759277, + "fcm_dpo/delta": 0.9155327081680298, + "fcm_dpo/margin": 0.05319638550281525, + "fcm_dpo/q_t": 0.47983771562576294, + "grad_norm": 544.4055786132812, + "learning_rate": 2.08955223880597e-07, + "logits/chosen": 0.13988614082336426, + "logits/rejected": 0.12211866676807404, + "logps/chosen": -74.13853454589844, + "logps/ref_chosen": -74.12020111083984, + "logps/ref_rejected": -83.33099365234375, + "logps/rejected": -83.40251159667969, + "loss": 1.3481, + "margin_dpo/margin_mean": 0.053196460008621216, + "margin_dpo/margin_std": 0.2625643312931061, + "step": 29 + }, + { + "epoch": 0.045351473922902494, + "fcm_dpo/beta": 2.0389435291290283, + "fcm_dpo/delta": 0.4901547431945801, + "fcm_dpo/margin": -0.004268288612365723, + "fcm_dpo/q_t": 0.5039973258972168, + "grad_norm": 641.551025390625, + "learning_rate": 2.1641791044776117e-07, + "logits/chosen": 0.13575318455696106, + "logits/rejected": 0.08044232428073883, + "logps/chosen": -50.786643981933594, + "logps/ref_chosen": -50.75128936767578, + "logps/ref_rejected": -89.29063415527344, + "logps/rejected": -89.32171630859375, + "loss": 1.4942, + "margin_dpo/margin_mean": -0.004268676042556763, + "margin_dpo/margin_std": 0.32202666997909546, + "step": 30 + }, + { + "epoch": 0.04686318972033258, + "fcm_dpo/beta": 2.3525500297546387, + "fcm_dpo/delta": 0.93133544921875, + "fcm_dpo/margin": 0.0310114324092865, + "fcm_dpo/q_t": 0.4873350262641907, + "grad_norm": 821.974365234375, + "learning_rate": 2.2388059701492537e-07, + "logits/chosen": 0.10764593631029129, + "logits/rejected": 0.061387479305267334, + "logps/chosen": -65.36897277832031, + "logps/ref_chosen": -65.33675384521484, + "logps/ref_rejected": -100.76666259765625, + "logps/rejected": -100.82989501953125, + "loss": 1.4747, + "margin_dpo/margin_mean": 0.03101155161857605, + "margin_dpo/margin_std": 0.35841095447540283, + "step": 31 + }, + { + "epoch": 0.04837490551776266, + "fcm_dpo/beta": 2.5499143600463867, + "fcm_dpo/delta": 0.3668806552886963, + "fcm_dpo/margin": 0.045269906520843506, + "fcm_dpo/q_t": 0.47517523169517517, + "grad_norm": 814.4436645507812, + "learning_rate": 2.3134328358208954e-07, + "logits/chosen": 0.10604210197925568, + "logits/rejected": 0.09800020605325699, + "logps/chosen": -67.19169616699219, + "logps/ref_chosen": -67.18333435058594, + "logps/ref_rejected": -82.80763244628906, + "logps/rejected": -82.86127471923828, + "loss": 1.4012, + "margin_dpo/margin_mean": 0.045270055532455444, + "margin_dpo/margin_std": 0.2860063314437866, + "step": 32 + }, + { + "epoch": 0.049886621315192746, + "fcm_dpo/beta": 2.7437379360198975, + "fcm_dpo/delta": 0.3657742738723755, + "fcm_dpo/margin": 0.042653635144233704, + "fcm_dpo/q_t": 0.4748280644416809, + "grad_norm": 934.1159057617188, + "learning_rate": 2.388059701492537e-07, + "logits/chosen": 0.0492779016494751, + "logits/rejected": 0.022379783913493156, + "logps/chosen": -64.0654067993164, + "logps/ref_chosen": -64.03948211669922, + "logps/ref_rejected": -75.68357849121094, + "logps/rejected": -75.75216674804688, + "loss": 1.4557, + "margin_dpo/margin_mean": 0.042654380202293396, + "margin_dpo/margin_std": 0.3218376338481903, + "step": 33 + }, + { + "epoch": 0.05139833711262283, + "fcm_dpo/beta": 3.2462871074676514, + "fcm_dpo/delta": 0.868577241897583, + "fcm_dpo/margin": 0.04391145706176758, + "fcm_dpo/q_t": 0.47198355197906494, + "grad_norm": 959.0977172851562, + "learning_rate": 2.4626865671641786e-07, + "logits/chosen": 0.0971483588218689, + "logits/rejected": 0.0673779547214508, + "logps/chosen": -53.695762634277344, + "logps/ref_chosen": -53.6642951965332, + "logps/ref_rejected": -65.77989959716797, + "logps/rejected": -65.85527038574219, + "loss": 1.4312, + "margin_dpo/margin_mean": 0.04391142725944519, + "margin_dpo/margin_std": 0.2787018120288849, + "step": 34 + }, + { + "epoch": 0.05291005291005291, + "fcm_dpo/beta": 3.5613765716552734, + "fcm_dpo/delta": 0.49954742193222046, + "fcm_dpo/margin": 0.0079115629196167, + "fcm_dpo/q_t": 0.4954897165298462, + "grad_norm": 1141.895751953125, + "learning_rate": 2.537313432835821e-07, + "logits/chosen": 0.04493723437190056, + "logits/rejected": 0.022909432649612427, + "logps/chosen": -61.07041549682617, + "logps/ref_chosen": -61.01686096191406, + "logps/ref_rejected": -72.78598022460938, + "logps/rejected": -72.84745025634766, + "loss": 1.6389, + "margin_dpo/margin_mean": 0.0079115629196167, + "margin_dpo/margin_std": 0.32450151443481445, + "step": 35 + }, + { + "epoch": 0.05442176870748299, + "fcm_dpo/beta": 3.89394474029541, + "fcm_dpo/delta": 0.39777103066444397, + "fcm_dpo/margin": 0.013609737157821655, + "fcm_dpo/q_t": 0.5010133981704712, + "grad_norm": 1230.4146728515625, + "learning_rate": 2.611940298507462e-07, + "logits/chosen": 0.1099657341837883, + "logits/rejected": 0.056441109627485275, + "logps/chosen": -50.61913299560547, + "logps/ref_chosen": -50.53736114501953, + "logps/ref_rejected": -78.11678314208984, + "logps/rejected": -78.212158203125, + "loss": 1.6768, + "margin_dpo/margin_mean": 0.013609647750854492, + "margin_dpo/margin_std": 0.32924020290374756, + "step": 36 + }, + { + "epoch": 0.055933484504913075, + "fcm_dpo/beta": 4.411220550537109, + "fcm_dpo/delta": 0.5480049252510071, + "fcm_dpo/margin": 0.10784178972244263, + "fcm_dpo/q_t": 0.4541955292224884, + "grad_norm": 1664.73095703125, + "learning_rate": 2.686567164179104e-07, + "logits/chosen": 0.08195307105779648, + "logits/rejected": 0.004701277241110802, + "logps/chosen": -59.608680725097656, + "logps/ref_chosen": -59.55394744873047, + "logps/ref_rejected": -108.27702331542969, + "logps/rejected": -108.43960571289062, + "loss": 1.4659, + "margin_dpo/margin_mean": 0.10784146189689636, + "margin_dpo/margin_std": 0.4072269797325134, + "step": 37 + }, + { + "epoch": 0.05744520030234316, + "fcm_dpo/beta": 4.725587844848633, + "fcm_dpo/delta": 0.22491098940372467, + "fcm_dpo/margin": 0.04019525647163391, + "fcm_dpo/q_t": 0.4804103374481201, + "grad_norm": 1523.71044921875, + "learning_rate": 2.761194029850746e-07, + "logits/chosen": 0.056998323649168015, + "logits/rejected": 0.04311235621571541, + "logps/chosen": -65.85867309570312, + "logps/ref_chosen": -65.78836059570312, + "logps/ref_rejected": -76.1619873046875, + "logps/rejected": -76.27249908447266, + "loss": 1.7981, + "margin_dpo/margin_mean": 0.040194928646087646, + "margin_dpo/margin_std": 0.36567050218582153, + "step": 38 + }, + { + "epoch": 0.05895691609977324, + "fcm_dpo/beta": 5.4121809005737305, + "fcm_dpo/delta": 0.8187992572784424, + "fcm_dpo/margin": 0.03373938798904419, + "fcm_dpo/q_t": 0.4426850378513336, + "grad_norm": 1836.44189453125, + "learning_rate": 2.8358208955223876e-07, + "logits/chosen": 0.14140446484088898, + "logits/rejected": 0.11523914337158203, + "logps/chosen": -57.26402282714844, + "logps/ref_chosen": -57.17681121826172, + "logps/ref_rejected": -79.486328125, + "logps/rejected": -79.60729217529297, + "loss": 1.9268, + "margin_dpo/margin_mean": 0.033740073442459106, + "margin_dpo/margin_std": 0.3646671772003174, + "step": 39 + }, + { + "epoch": 0.06046863189720333, + "fcm_dpo/beta": 5.907527923583984, + "fcm_dpo/delta": 0.297378271818161, + "fcm_dpo/margin": 0.006361484527587891, + "fcm_dpo/q_t": 0.4902653098106384, + "grad_norm": 2190.686767578125, + "learning_rate": 2.9104477611940296e-07, + "logits/chosen": 0.10774768888950348, + "logits/rejected": 0.058216311037540436, + "logps/chosen": -61.42626953125, + "logps/ref_chosen": -61.33416748046875, + "logps/ref_rejected": -79.10697174072266, + "logps/rejected": -79.20543670654297, + "loss": 2.1253, + "margin_dpo/margin_mean": 0.00636136531829834, + "margin_dpo/margin_std": 0.3456147611141205, + "step": 40 + }, + { + "epoch": 0.06198034769463341, + "fcm_dpo/beta": 6.096949100494385, + "fcm_dpo/delta": 0.3107817769050598, + "fcm_dpo/margin": 0.03788435459136963, + "fcm_dpo/q_t": 0.4918346107006073, + "grad_norm": 2313.122314453125, + "learning_rate": 2.985074626865671e-07, + "logits/chosen": 0.06469070911407471, + "logits/rejected": 0.04410509765148163, + "logps/chosen": -67.65518188476562, + "logps/ref_chosen": -67.5467300415039, + "logps/ref_rejected": -83.87788391113281, + "logps/rejected": -84.02423095703125, + "loss": 2.1107, + "margin_dpo/margin_mean": 0.03788486123085022, + "margin_dpo/margin_std": 0.3886951506137848, + "step": 41 + }, + { + "epoch": 0.06349206349206349, + "fcm_dpo/beta": 6.579242706298828, + "fcm_dpo/delta": 0.22767893970012665, + "fcm_dpo/margin": 0.00848454236984253, + "fcm_dpo/q_t": 0.4764997959136963, + "grad_norm": 2254.65283203125, + "learning_rate": 3.059701492537313e-07, + "logits/chosen": 0.04841721057891846, + "logits/rejected": 0.026929516345262527, + "logps/chosen": -61.369564056396484, + "logps/ref_chosen": -61.26485824584961, + "logps/ref_rejected": -76.3629150390625, + "logps/rejected": -76.47610473632812, + "loss": 2.2164, + "margin_dpo/margin_mean": 0.008484512567520142, + "margin_dpo/margin_std": 0.33282727003097534, + "step": 42 + }, + { + "epoch": 0.06500377928949358, + "fcm_dpo/beta": 7.226245880126953, + "fcm_dpo/delta": 0.5711226463317871, + "fcm_dpo/margin": 0.06204667687416077, + "fcm_dpo/q_t": 0.44025903940200806, + "grad_norm": 2534.08984375, + "learning_rate": 3.134328358208955e-07, + "logits/chosen": 0.062497012317180634, + "logits/rejected": 0.051983729004859924, + "logps/chosen": -71.88862609863281, + "logps/ref_chosen": -71.80902862548828, + "logps/ref_rejected": -81.12464141845703, + "logps/rejected": -81.26628875732422, + "loss": 1.9347, + "margin_dpo/margin_mean": 0.0620463490486145, + "margin_dpo/margin_std": 0.32832396030426025, + "step": 43 + }, + { + "epoch": 0.06651549508692366, + "fcm_dpo/beta": 7.3753557205200195, + "fcm_dpo/delta": 0.0, + "fcm_dpo/margin": -0.003143906593322754, + "fcm_dpo/q_t": 0.5123412609100342, + "grad_norm": 3110.311279296875, + "learning_rate": 3.2089552238805965e-07, + "logits/chosen": 0.058967474848032, + "logits/rejected": 0.02790246158838272, + "logps/chosen": -66.68571472167969, + "logps/ref_chosen": -66.55043029785156, + "logps/ref_rejected": -85.06198120117188, + "logps/rejected": -85.19412994384766, + "loss": 2.7173, + "margin_dpo/margin_mean": -0.00314408540725708, + "margin_dpo/margin_std": 0.39732399582862854, + "step": 44 + }, + { + "epoch": 0.06802721088435375, + "fcm_dpo/beta": 7.334336280822754, + "fcm_dpo/delta": -0.0020287036895751953, + "fcm_dpo/margin": 0.13652004301548004, + "fcm_dpo/q_t": 0.35875630378723145, + "grad_norm": 1952.19775390625, + "learning_rate": 3.2835820895522385e-07, + "logits/chosen": 0.10706457495689392, + "logits/rejected": 0.05497971177101135, + "logps/chosen": -62.34738540649414, + "logps/ref_chosen": -62.24385452270508, + "logps/ref_rejected": -92.96665954589844, + "logps/rejected": -93.20671081542969, + "loss": 1.5806, + "margin_dpo/margin_mean": 0.1365204155445099, + "margin_dpo/margin_std": 0.33906716108322144, + "step": 45 + }, + { + "epoch": 0.06953892668178382, + "fcm_dpo/beta": 7.5740742683410645, + "fcm_dpo/delta": 0.09216267615556717, + "fcm_dpo/margin": 0.12037345767021179, + "fcm_dpo/q_t": 0.4014553129673004, + "grad_norm": 2483.462646484375, + "learning_rate": 3.3582089552238805e-07, + "logits/chosen": 0.10916443914175034, + "logits/rejected": 0.06387359648942947, + "logps/chosen": -61.58867263793945, + "logps/ref_chosen": -61.498905181884766, + "logps/ref_rejected": -78.91172790527344, + "logps/rejected": -79.12187194824219, + "loss": 1.8488, + "margin_dpo/margin_mean": 0.12037333846092224, + "margin_dpo/margin_std": 0.37311580777168274, + "step": 46 + }, + { + "epoch": 0.0710506424792139, + "fcm_dpo/beta": 7.277153968811035, + "fcm_dpo/delta": -0.1604071408510208, + "fcm_dpo/margin": 0.15663331747055054, + "fcm_dpo/q_t": 0.3513562083244324, + "grad_norm": 1870.4586181640625, + "learning_rate": 3.432835820895522e-07, + "logits/chosen": 0.04342661425471306, + "logits/rejected": -0.00011028675362467766, + "logps/chosen": -51.65697479248047, + "logps/ref_chosen": -51.578346252441406, + "logps/ref_rejected": -68.2215576171875, + "logps/rejected": -68.45681762695312, + "loss": 1.3277, + "margin_dpo/margin_mean": 0.1566331386566162, + "margin_dpo/margin_std": 0.3138054609298706, + "step": 47 + }, + { + "epoch": 0.07256235827664399, + "fcm_dpo/beta": 8.184296607971191, + "fcm_dpo/delta": 0.7369337677955627, + "fcm_dpo/margin": 0.03382202982902527, + "fcm_dpo/q_t": 0.485114187002182, + "grad_norm": 2785.64453125, + "learning_rate": 3.507462686567164e-07, + "logits/chosen": 0.16853903234004974, + "logits/rejected": 0.1380309760570526, + "logps/chosen": -51.94841003417969, + "logps/ref_chosen": -51.79365158081055, + "logps/ref_rejected": -64.22503662109375, + "logps/rejected": -64.41361999511719, + "loss": 2.617, + "margin_dpo/margin_mean": 0.03382223844528198, + "margin_dpo/margin_std": 0.38840365409851074, + "step": 48 + }, + { + "epoch": 0.07407407407407407, + "fcm_dpo/beta": 9.334107398986816, + "fcm_dpo/delta": 0.5954843163490295, + "fcm_dpo/margin": 0.044813498854637146, + "fcm_dpo/q_t": 0.4577757716178894, + "grad_norm": 2963.8173828125, + "learning_rate": 3.5820895522388055e-07, + "logits/chosen": 0.038059771060943604, + "logits/rejected": 0.01622236706316471, + "logps/chosen": -58.26384353637695, + "logps/ref_chosen": -58.13460159301758, + "logps/ref_rejected": -64.63206481933594, + "logps/rejected": -64.80612182617188, + "loss": 2.5832, + "margin_dpo/margin_mean": 0.04481416940689087, + "margin_dpo/margin_std": 0.3690647482872009, + "step": 49 + }, + { + "epoch": 0.07558578987150416, + "fcm_dpo/beta": 9.334890365600586, + "fcm_dpo/delta": -0.06085062772035599, + "fcm_dpo/margin": 0.11278587579727173, + "fcm_dpo/q_t": 0.4165031909942627, + "grad_norm": 2980.169921875, + "learning_rate": 3.6567164179104475e-07, + "logits/chosen": 0.11696229875087738, + "logits/rejected": 0.08666031062602997, + "logps/chosen": -52.99673080444336, + "logps/ref_chosen": -52.85643768310547, + "logps/ref_rejected": -72.17460632324219, + "logps/rejected": -72.42768859863281, + "loss": 2.2159, + "margin_dpo/margin_mean": 0.11278638243675232, + "margin_dpo/margin_std": 0.39860716462135315, + "step": 50 + }, + { + "epoch": 0.07709750566893424, + "fcm_dpo/beta": 8.363540649414062, + "fcm_dpo/delta": -0.5726056098937988, + "fcm_dpo/margin": 0.15886437892913818, + "fcm_dpo/q_t": 0.4000622630119324, + "grad_norm": 2550.51953125, + "learning_rate": 3.7313432835820895e-07, + "logits/chosen": 0.08093120157718658, + "logits/rejected": 0.05302312225103378, + "logps/chosen": -63.80950927734375, + "logps/ref_chosen": -63.65644073486328, + "logps/ref_rejected": -86.13229370117188, + "logps/rejected": -86.44422912597656, + "loss": 1.8986, + "margin_dpo/margin_mean": 0.15886464715003967, + "margin_dpo/margin_std": 0.420296311378479, + "step": 51 + }, + { + "epoch": 0.07860922146636433, + "fcm_dpo/beta": 8.45156478881836, + "fcm_dpo/delta": 0.09679965674877167, + "fcm_dpo/margin": 0.10791899263858795, + "fcm_dpo/q_t": 0.4053017497062683, + "grad_norm": 3315.59912109375, + "learning_rate": 3.805970149253731e-07, + "logits/chosen": 0.07245868444442749, + "logits/rejected": 0.023480474948883057, + "logps/chosen": -68.0364990234375, + "logps/ref_chosen": -67.8402099609375, + "logps/ref_rejected": -96.97090911865234, + "logps/rejected": -97.27511596679688, + "loss": 2.2996, + "margin_dpo/margin_mean": 0.10791890323162079, + "margin_dpo/margin_std": 0.4656970500946045, + "step": 52 + }, + { + "epoch": 0.0801209372637944, + "fcm_dpo/beta": 8.50004768371582, + "fcm_dpo/delta": 0.018193505704402924, + "fcm_dpo/margin": 0.11565050482749939, + "fcm_dpo/q_t": 0.371160626411438, + "grad_norm": 2222.445556640625, + "learning_rate": 3.880597014925373e-07, + "logits/chosen": 0.07711566984653473, + "logits/rejected": 0.06644274294376373, + "logps/chosen": -57.06134796142578, + "logps/ref_chosen": -56.87813949584961, + "logps/ref_rejected": -60.75569152832031, + "logps/rejected": -61.05455017089844, + "loss": 1.7873, + "margin_dpo/margin_mean": 0.11565083265304565, + "margin_dpo/margin_std": 0.32278263568878174, + "step": 53 + }, + { + "epoch": 0.08163265306122448, + "fcm_dpo/beta": 8.553638458251953, + "fcm_dpo/delta": 0.20764021575450897, + "fcm_dpo/margin": 0.09295859932899475, + "fcm_dpo/q_t": 0.40855568647384644, + "grad_norm": 2532.174072265625, + "learning_rate": 3.9552238805970144e-07, + "logits/chosen": 0.03949524462223053, + "logits/rejected": 0.024343054741621017, + "logps/chosen": -47.497901916503906, + "logps/ref_chosen": -47.26692199707031, + "logps/ref_rejected": -62.19426727294922, + "logps/rejected": -62.51820373535156, + "loss": 2.2098, + "margin_dpo/margin_mean": 0.09295853972434998, + "margin_dpo/margin_std": 0.3836787939071655, + "step": 54 + }, + { + "epoch": 0.08314436885865457, + "fcm_dpo/beta": 7.794929027557373, + "fcm_dpo/delta": -0.9819808006286621, + "fcm_dpo/margin": 0.23201557993888855, + "fcm_dpo/q_t": 0.3249555230140686, + "grad_norm": 2186.456298828125, + "learning_rate": 4.0298507462686564e-07, + "logits/chosen": 0.029716331511735916, + "logits/rejected": -0.04705657809972763, + "logps/chosen": -50.511959075927734, + "logps/ref_chosen": -50.32619094848633, + "logps/ref_rejected": -92.44389343261719, + "logps/rejected": -92.8616714477539, + "loss": 1.4207, + "margin_dpo/margin_mean": 0.2320151925086975, + "margin_dpo/margin_std": 0.42721158266067505, + "step": 55 + }, + { + "epoch": 0.08465608465608465, + "fcm_dpo/beta": 7.265107154846191, + "fcm_dpo/delta": -0.08848509937524796, + "fcm_dpo/margin": 0.1485108733177185, + "fcm_dpo/q_t": 0.32371559739112854, + "grad_norm": 1776.4510498046875, + "learning_rate": 4.1044776119402984e-07, + "logits/chosen": 0.12203441560268402, + "logits/rejected": 0.09960527718067169, + "logps/chosen": -56.979713439941406, + "logps/ref_chosen": -56.766971588134766, + "logps/ref_rejected": -66.30504608154297, + "logps/rejected": -66.66629791259766, + "loss": 1.5337, + "margin_dpo/margin_mean": 0.1485109031200409, + "margin_dpo/margin_std": 0.36086180806159973, + "step": 56 + }, + { + "epoch": 0.08616780045351474, + "fcm_dpo/beta": 7.139953136444092, + "fcm_dpo/delta": -0.0027963966131210327, + "fcm_dpo/margin": 0.14022627472877502, + "fcm_dpo/q_t": 0.3702242970466614, + "grad_norm": 2208.6005859375, + "learning_rate": 4.17910447761194e-07, + "logits/chosen": 0.09269669651985168, + "logits/rejected": 0.02815322019159794, + "logps/chosen": -58.0312614440918, + "logps/ref_chosen": -57.76774597167969, + "logps/ref_rejected": -82.75698852539062, + "logps/rejected": -83.16073608398438, + "loss": 1.9812, + "margin_dpo/margin_mean": 0.14022645354270935, + "margin_dpo/margin_std": 0.4531812369823456, + "step": 57 + }, + { + "epoch": 0.08767951625094482, + "fcm_dpo/beta": 6.859474182128906, + "fcm_dpo/delta": -0.0021596550941467285, + "fcm_dpo/margin": 0.13939353823661804, + "fcm_dpo/q_t": 0.4166187345981598, + "grad_norm": 2155.36083984375, + "learning_rate": 4.253731343283582e-07, + "logits/chosen": 0.04018617421388626, + "logits/rejected": 0.024990694597363472, + "logps/chosen": -72.99790954589844, + "logps/ref_chosen": -72.76408386230469, + "logps/ref_rejected": -84.49275207519531, + "logps/rejected": -84.865966796875, + "loss": 2.1325, + "margin_dpo/margin_mean": 0.13939306139945984, + "margin_dpo/margin_std": 0.4941212832927704, + "step": 58 + }, + { + "epoch": 0.08919123204837491, + "fcm_dpo/beta": 7.301891326904297, + "fcm_dpo/delta": -0.22430884838104248, + "fcm_dpo/margin": 0.15986737608909607, + "fcm_dpo/q_t": 0.36295658349990845, + "grad_norm": 1929.1551513671875, + "learning_rate": 4.3283582089552234e-07, + "logits/chosen": 0.10927902162075043, + "logits/rejected": 0.04400138556957245, + "logps/chosen": -50.06452178955078, + "logps/ref_chosen": -49.820777893066406, + "logps/ref_rejected": -77.14368438720703, + "logps/rejected": -77.54730224609375, + "loss": 1.5376, + "margin_dpo/margin_mean": 0.15986764430999756, + "margin_dpo/margin_std": 0.36004385352134705, + "step": 59 + }, + { + "epoch": 0.09070294784580499, + "fcm_dpo/beta": 7.388426780700684, + "fcm_dpo/delta": 0.45878517627716064, + "fcm_dpo/margin": 0.07655715942382812, + "fcm_dpo/q_t": 0.44889310002326965, + "grad_norm": 2771.480712890625, + "learning_rate": 4.4029850746268654e-07, + "logits/chosen": 0.09602642804384232, + "logits/rejected": 0.09458990395069122, + "logps/chosen": -63.469207763671875, + "logps/ref_chosen": -63.22477340698242, + "logps/ref_rejected": -61.360477447509766, + "logps/rejected": -61.68146896362305, + "loss": 2.1317, + "margin_dpo/margin_mean": 0.07655695080757141, + "margin_dpo/margin_std": 0.3793250024318695, + "step": 60 + }, + { + "epoch": 0.09221466364323508, + "fcm_dpo/beta": 7.838181495666504, + "fcm_dpo/delta": 0.23324143886566162, + "fcm_dpo/margin": 0.09999506175518036, + "fcm_dpo/q_t": 0.40534743666648865, + "grad_norm": 2478.5703125, + "learning_rate": 4.4776119402985074e-07, + "logits/chosen": 0.12898309528827667, + "logits/rejected": 0.0962858498096466, + "logps/chosen": -49.336029052734375, + "logps/ref_chosen": -49.01679992675781, + "logps/ref_rejected": -74.90817260742188, + "logps/rejected": -75.327392578125, + "loss": 2.1133, + "margin_dpo/margin_mean": 0.09999510645866394, + "margin_dpo/margin_std": 0.3832091987133026, + "step": 61 + }, + { + "epoch": 0.09372637944066516, + "fcm_dpo/beta": 7.966916084289551, + "fcm_dpo/delta": -0.13704264163970947, + "fcm_dpo/margin": 0.13938570022583008, + "fcm_dpo/q_t": 0.36962825059890747, + "grad_norm": 2647.16162109375, + "learning_rate": 4.552238805970149e-07, + "logits/chosen": 0.11322169005870819, + "logits/rejected": 0.07340162247419357, + "logps/chosen": -63.07339859008789, + "logps/ref_chosen": -62.751869201660156, + "logps/ref_rejected": -78.93360900878906, + "logps/rejected": -79.39452362060547, + "loss": 2.0748, + "margin_dpo/margin_mean": 0.13938573002815247, + "margin_dpo/margin_std": 0.46554332971572876, + "step": 62 + }, + { + "epoch": 0.09523809523809523, + "fcm_dpo/beta": 7.405724048614502, + "fcm_dpo/delta": -0.43855780363082886, + "fcm_dpo/margin": 0.1857583224773407, + "fcm_dpo/q_t": 0.3208463490009308, + "grad_norm": 2495.121826171875, + "learning_rate": 4.626865671641791e-07, + "logits/chosen": 0.17758890986442566, + "logits/rejected": 0.15252208709716797, + "logps/chosen": -60.78285598754883, + "logps/ref_chosen": -60.51525115966797, + "logps/ref_rejected": -85.11021423339844, + "logps/rejected": -85.56358337402344, + "loss": 1.4936, + "margin_dpo/margin_mean": 0.1857585310935974, + "margin_dpo/margin_std": 0.3698121905326843, + "step": 63 + }, + { + "epoch": 0.09674981103552532, + "fcm_dpo/beta": 7.346306324005127, + "fcm_dpo/delta": 0.42287638783454895, + "fcm_dpo/margin": 0.08198145031929016, + "fcm_dpo/q_t": 0.44880834221839905, + "grad_norm": 2452.107421875, + "learning_rate": 4.701492537313433e-07, + "logits/chosen": 0.07220865786075592, + "logits/rejected": 0.047520771622657776, + "logps/chosen": -51.5291748046875, + "logps/ref_chosen": -51.20684814453125, + "logps/ref_rejected": -66.93081665039062, + "logps/rejected": -67.33512878417969, + "loss": 2.1847, + "margin_dpo/margin_mean": 0.08198148012161255, + "margin_dpo/margin_std": 0.39635199308395386, + "step": 64 + }, + { + "epoch": 0.0982615268329554, + "fcm_dpo/beta": 6.500675201416016, + "fcm_dpo/delta": -1.11879301071167, + "fcm_dpo/margin": 0.2926085889339447, + "fcm_dpo/q_t": 0.2865545451641083, + "grad_norm": 2000.08251953125, + "learning_rate": 4.776119402985074e-07, + "logits/chosen": 0.1627321094274521, + "logits/rejected": 0.13373470306396484, + "logps/chosen": -67.60956573486328, + "logps/ref_chosen": -67.2886962890625, + "logps/ref_rejected": -74.44281005859375, + "logps/rejected": -75.05628967285156, + "loss": 1.3075, + "margin_dpo/margin_mean": 0.29260820150375366, + "margin_dpo/margin_std": 0.4580235481262207, + "step": 65 + }, + { + "epoch": 0.09977324263038549, + "fcm_dpo/beta": 6.545133590698242, + "fcm_dpo/delta": 0.5023878216743469, + "fcm_dpo/margin": 0.08048596978187561, + "fcm_dpo/q_t": 0.435330331325531, + "grad_norm": 2383.31005859375, + "learning_rate": 4.850746268656717e-07, + "logits/chosen": 0.08348944783210754, + "logits/rejected": 0.05969405546784401, + "logps/chosen": -71.08229064941406, + "logps/ref_chosen": -70.743408203125, + "logps/ref_rejected": -77.26499938964844, + "logps/rejected": -77.68437194824219, + "loss": 1.9852, + "margin_dpo/margin_mean": 0.08048596978187561, + "margin_dpo/margin_std": 0.4329206943511963, + "step": 66 + }, + { + "epoch": 0.10128495842781557, + "fcm_dpo/beta": 6.768010139465332, + "fcm_dpo/delta": 0.0033745458349585533, + "fcm_dpo/margin": 0.1473006308078766, + "fcm_dpo/q_t": 0.39285334944725037, + "grad_norm": 2144.995849609375, + "learning_rate": 4.925373134328357e-07, + "logits/chosen": 0.06257347017526627, + "logits/rejected": 0.007243716157972813, + "logps/chosen": -60.883270263671875, + "logps/ref_chosen": -60.60260009765625, + "logps/ref_rejected": -75.22235870361328, + "logps/rejected": -75.65032958984375, + "loss": 1.6865, + "margin_dpo/margin_mean": 0.1473003625869751, + "margin_dpo/margin_std": 0.44022125005722046, + "step": 67 + }, + { + "epoch": 0.10279667422524566, + "fcm_dpo/beta": 6.251596450805664, + "fcm_dpo/delta": -0.4169546067714691, + "fcm_dpo/margin": 0.21536040306091309, + "fcm_dpo/q_t": 0.33271753787994385, + "grad_norm": 1849.2083740234375, + "learning_rate": 5e-07, + "logits/chosen": 0.04055653512477875, + "logits/rejected": 0.01153562217950821, + "logps/chosen": -77.89004516601562, + "logps/ref_chosen": -77.52836608886719, + "logps/ref_rejected": -93.17778015136719, + "logps/rejected": -93.75480651855469, + "loss": 1.5844, + "margin_dpo/margin_mean": 0.21536031365394592, + "margin_dpo/margin_std": 0.4642139673233032, + "step": 68 + }, + { + "epoch": 0.10430839002267574, + "fcm_dpo/beta": 6.393548488616943, + "fcm_dpo/delta": -0.062498897314071655, + "fcm_dpo/margin": 0.16213825345039368, + "fcm_dpo/q_t": 0.3610564172267914, + "grad_norm": 1912.0982666015625, + "learning_rate": 4.999965034812934e-07, + "logits/chosen": 0.06711920350790024, + "logits/rejected": 0.02523168735206127, + "logps/chosen": -66.31640625, + "logps/ref_chosen": -65.94305419921875, + "logps/ref_rejected": -89.7735595703125, + "logps/rejected": -90.30905151367188, + "loss": 1.8845, + "margin_dpo/margin_mean": 0.16213801503181458, + "margin_dpo/margin_std": 0.45384150743484497, + "step": 69 + }, + { + "epoch": 0.10582010582010581, + "fcm_dpo/beta": 5.698063850402832, + "fcm_dpo/delta": -0.3884986937046051, + "fcm_dpo/margin": 0.13598540425300598, + "fcm_dpo/q_t": 0.3947943449020386, + "grad_norm": 1745.823486328125, + "learning_rate": 4.999860140229787e-07, + "logits/chosen": 0.11856390535831451, + "logits/rejected": 0.09516175091266632, + "logps/chosen": -62.308258056640625, + "logps/ref_chosen": -61.95791244506836, + "logps/ref_rejected": -75.80945587158203, + "logps/rejected": -76.29579162597656, + "loss": 1.7483, + "margin_dpo/margin_mean": 0.1359853446483612, + "margin_dpo/margin_std": 0.397053986787796, + "step": 70 + }, + { + "epoch": 0.1073318216175359, + "fcm_dpo/beta": 6.111842632293701, + "fcm_dpo/delta": 0.5105581283569336, + "fcm_dpo/margin": 0.08478209376335144, + "fcm_dpo/q_t": 0.4329409897327423, + "grad_norm": 2060.398193359375, + "learning_rate": 4.999685319184688e-07, + "logits/chosen": 0.0789206326007843, + "logits/rejected": 0.06302116811275482, + "logps/chosen": -63.77684020996094, + "logps/ref_chosen": -63.34757995605469, + "logps/ref_rejected": -67.49658203125, + "logps/rejected": -68.0106201171875, + "loss": 2.1644, + "margin_dpo/margin_mean": 0.08478212356567383, + "margin_dpo/margin_std": 0.4536302983760834, + "step": 71 + }, + { + "epoch": 0.10884353741496598, + "fcm_dpo/beta": 5.628780364990234, + "fcm_dpo/delta": -0.7731190919876099, + "fcm_dpo/margin": 0.29211413860321045, + "fcm_dpo/q_t": 0.3161046504974365, + "grad_norm": 1577.560302734375, + "learning_rate": 4.999440576567755e-07, + "logits/chosen": 0.0920741856098175, + "logits/rejected": 0.029652319848537445, + "logps/chosen": -56.157230377197266, + "logps/ref_chosen": -55.85929870605469, + "logps/ref_rejected": -68.45423889160156, + "logps/rejected": -69.04428100585938, + "loss": 1.1926, + "margin_dpo/margin_mean": 0.29211464524269104, + "margin_dpo/margin_std": 0.4848480224609375, + "step": 72 + }, + { + "epoch": 0.11035525321239607, + "fcm_dpo/beta": 5.599390029907227, + "fcm_dpo/delta": 0.03130987286567688, + "fcm_dpo/margin": 0.17031516134738922, + "fcm_dpo/q_t": 0.40101659297943115, + "grad_norm": 1934.7177734375, + "learning_rate": 4.999125919224965e-07, + "logits/chosen": 0.06832102686166763, + "logits/rejected": 0.05396275222301483, + "logps/chosen": -69.59184265136719, + "logps/ref_chosen": -69.13880920410156, + "logps/ref_rejected": -79.04586791992188, + "logps/rejected": -79.66921997070312, + "loss": 1.8098, + "margin_dpo/margin_mean": 0.17031550407409668, + "margin_dpo/margin_std": 0.48382318019866943, + "step": 73 + }, + { + "epoch": 0.11186696900982615, + "fcm_dpo/beta": 5.374726295471191, + "fcm_dpo/delta": -0.2641603350639343, + "fcm_dpo/margin": 0.22797469794750214, + "fcm_dpo/q_t": 0.36758503317832947, + "grad_norm": 1349.6485595703125, + "learning_rate": 4.998741355957963e-07, + "logits/chosen": 0.09487976133823395, + "logits/rejected": 0.04454671964049339, + "logps/chosen": -50.26209259033203, + "logps/ref_chosen": -49.923736572265625, + "logps/ref_rejected": -81.73213958740234, + "logps/rejected": -82.2984619140625, + "loss": 1.4011, + "margin_dpo/margin_mean": 0.22797417640686035, + "margin_dpo/margin_std": 0.46810248494148254, + "step": 74 + }, + { + "epoch": 0.11337868480725624, + "fcm_dpo/beta": 4.875063896179199, + "fcm_dpo/delta": -0.2485802173614502, + "fcm_dpo/margin": 0.24743963778018951, + "fcm_dpo/q_t": 0.34337544441223145, + "grad_norm": 1073.2872314453125, + "learning_rate": 4.998286897523808e-07, + "logits/chosen": 0.07122410833835602, + "logits/rejected": 0.04160505533218384, + "logps/chosen": -46.41517639160156, + "logps/ref_chosen": -46.06875228881836, + "logps/ref_rejected": -66.1181411743164, + "logps/rejected": -66.71200561523438, + "loss": 1.2434, + "margin_dpo/margin_mean": 0.24743930995464325, + "margin_dpo/margin_std": 0.4698425531387329, + "step": 75 + }, + { + "epoch": 0.11489040060468632, + "fcm_dpo/beta": 5.057272911071777, + "fcm_dpo/delta": 0.1727055013179779, + "fcm_dpo/margin": 0.16627338528633118, + "fcm_dpo/q_t": 0.3987014889717102, + "grad_norm": 1406.1661376953125, + "learning_rate": 4.997762556634679e-07, + "logits/chosen": 0.10183432698249817, + "logits/rejected": 0.058754947036504745, + "logps/chosen": -54.42407989501953, + "logps/ref_chosen": -54.06275177001953, + "logps/ref_rejected": -74.87464141845703, + "logps/rejected": -75.4022445678711, + "loss": 1.5553, + "margin_dpo/margin_mean": 0.16627269983291626, + "margin_dpo/margin_std": 0.4644964933395386, + "step": 76 + }, + { + "epoch": 0.1164021164021164, + "fcm_dpo/beta": 5.224140167236328, + "fcm_dpo/delta": 0.3271293044090271, + "fcm_dpo/margin": 0.1326499581336975, + "fcm_dpo/q_t": 0.41528427600860596, + "grad_norm": 1620.2890625, + "learning_rate": 4.99716834795752e-07, + "logits/chosen": 0.10804985463619232, + "logits/rejected": 0.06888192892074585, + "logps/chosen": -53.52935028076172, + "logps/ref_chosen": -53.07609176635742, + "logps/ref_rejected": -74.45601654052734, + "logps/rejected": -75.04192352294922, + "loss": 1.6511, + "margin_dpo/margin_mean": 0.1326504349708557, + "margin_dpo/margin_std": 0.44195854663848877, + "step": 77 + }, + { + "epoch": 0.11791383219954649, + "fcm_dpo/beta": 5.861191749572754, + "fcm_dpo/delta": 0.4837532937526703, + "fcm_dpo/margin": 0.09206165373325348, + "fcm_dpo/q_t": 0.40167397260665894, + "grad_norm": 2161.646240234375, + "learning_rate": 4.996504288113623e-07, + "logits/chosen": 0.039431821554899216, + "logits/rejected": 0.01997038722038269, + "logps/chosen": -68.12590026855469, + "logps/ref_chosen": -67.72541809082031, + "logps/ref_rejected": -79.03926849365234, + "logps/rejected": -79.53181457519531, + "loss": 2.4878, + "margin_dpo/margin_mean": 0.09206125140190125, + "margin_dpo/margin_std": 0.583328366279602, + "step": 78 + }, + { + "epoch": 0.11942554799697656, + "fcm_dpo/beta": 6.234781265258789, + "fcm_dpo/delta": 0.15417495369911194, + "fcm_dpo/margin": 0.13618767261505127, + "fcm_dpo/q_t": 0.389265775680542, + "grad_norm": 1901.2857666015625, + "learning_rate": 4.995770395678171e-07, + "logits/chosen": 0.150520920753479, + "logits/rejected": 0.09114135801792145, + "logps/chosen": -52.598140716552734, + "logps/ref_chosen": -52.16064453125, + "logps/ref_rejected": -83.31062316894531, + "logps/rejected": -83.8843002319336, + "loss": 1.8135, + "margin_dpo/margin_mean": 0.13618725538253784, + "margin_dpo/margin_std": 0.4358598589897156, + "step": 79 + }, + { + "epoch": 0.12093726379440665, + "fcm_dpo/beta": 6.20821475982666, + "fcm_dpo/delta": -0.13912838697433472, + "fcm_dpo/margin": 0.17933352291584015, + "fcm_dpo/q_t": 0.37054312229156494, + "grad_norm": 1953.0830078125, + "learning_rate": 4.994966691179711e-07, + "logits/chosen": 0.09082512557506561, + "logits/rejected": 0.035024385899305344, + "logps/chosen": -61.85608673095703, + "logps/ref_chosen": -61.410560607910156, + "logps/ref_rejected": -78.66004943847656, + "logps/rejected": -79.284912109375, + "loss": 1.7668, + "margin_dpo/margin_mean": 0.1793329417705536, + "margin_dpo/margin_std": 0.4744713306427002, + "step": 80 + }, + { + "epoch": 0.12244897959183673, + "fcm_dpo/beta": 5.641842842102051, + "fcm_dpo/delta": -0.34320950508117676, + "fcm_dpo/margin": 0.2292810082435608, + "fcm_dpo/q_t": 0.34941136837005615, + "grad_norm": 1636.6348876953125, + "learning_rate": 4.994093197099587e-07, + "logits/chosen": 0.07053244113922119, + "logits/rejected": 0.03839043155312538, + "logps/chosen": -64.20402526855469, + "logps/ref_chosen": -63.80437088012695, + "logps/ref_rejected": -79.3484115600586, + "logps/rejected": -79.97734069824219, + "loss": 1.4575, + "margin_dpo/margin_mean": 0.22928106784820557, + "margin_dpo/margin_std": 0.4831709563732147, + "step": 81 + }, + { + "epoch": 0.12396069538926682, + "fcm_dpo/beta": 5.125918388366699, + "fcm_dpo/delta": -0.6294834017753601, + "fcm_dpo/margin": 0.29933592677116394, + "fcm_dpo/q_t": 0.2602464258670807, + "grad_norm": 1189.437255859375, + "learning_rate": 4.993149937871306e-07, + "logits/chosen": 0.07535186409950256, + "logits/rejected": 0.013600241392850876, + "logps/chosen": -49.17131805419922, + "logps/ref_chosen": -48.817893981933594, + "logps/ref_rejected": -70.31497955322266, + "logps/rejected": -70.96774291992188, + "loss": 1.0653, + "margin_dpo/margin_mean": 0.2993359863758087, + "margin_dpo/margin_std": 0.41738319396972656, + "step": 82 + }, + { + "epoch": 0.1254724111866969, + "fcm_dpo/beta": 4.8786211013793945, + "fcm_dpo/delta": -0.19270329177379608, + "fcm_dpo/margin": 0.23940634727478027, + "fcm_dpo/q_t": 0.3403007388114929, + "grad_norm": 1305.392333984375, + "learning_rate": 4.992136939879856e-07, + "logits/chosen": 0.1480909138917923, + "logits/rejected": 0.09921949356794357, + "logps/chosen": -57.55104064941406, + "logps/ref_chosen": -57.15077209472656, + "logps/ref_rejected": -75.1710205078125, + "logps/rejected": -75.81069946289062, + "loss": 1.3796, + "margin_dpo/margin_mean": 0.23940622806549072, + "margin_dpo/margin_std": 0.48909199237823486, + "step": 83 + }, + { + "epoch": 0.12698412698412698, + "fcm_dpo/beta": 5.00314998626709, + "fcm_dpo/delta": 0.3583996295928955, + "fcm_dpo/margin": 0.13308590650558472, + "fcm_dpo/q_t": 0.4098473787307739, + "grad_norm": 1782.189208984375, + "learning_rate": 4.991054231460969e-07, + "logits/chosen": 0.12158288061618805, + "logits/rejected": 0.08126094937324524, + "logps/chosen": -65.28231048583984, + "logps/ref_chosen": -64.77729797363281, + "logps/ref_rejected": -84.71949768066406, + "logps/rejected": -85.35758972167969, + "loss": 2.005, + "margin_dpo/margin_mean": 0.1330859661102295, + "margin_dpo/margin_std": 0.5464334487915039, + "step": 84 + }, + { + "epoch": 0.12849584278155707, + "fcm_dpo/beta": 4.760544300079346, + "fcm_dpo/delta": -0.5031009912490845, + "fcm_dpo/margin": 0.3008922040462494, + "fcm_dpo/q_t": 0.3266737163066864, + "grad_norm": 1381.7164306640625, + "learning_rate": 4.989901842900325e-07, + "logits/chosen": 0.10575494170188904, + "logits/rejected": 0.06379462033510208, + "logps/chosen": -50.625328063964844, + "logps/ref_chosen": -50.25169372558594, + "logps/ref_rejected": -66.55439758300781, + "logps/rejected": -67.22891235351562, + "loss": 1.3231, + "margin_dpo/margin_mean": 0.30089178681373596, + "margin_dpo/margin_std": 0.5666717290878296, + "step": 85 + }, + { + "epoch": 0.13000755857898716, + "fcm_dpo/beta": 4.623910427093506, + "fcm_dpo/delta": -0.011702943593263626, + "fcm_dpo/margin": 0.21848827600479126, + "fcm_dpo/q_t": 0.3801780939102173, + "grad_norm": 1446.2276611328125, + "learning_rate": 4.988679806432711e-07, + "logits/chosen": 0.11094961315393448, + "logits/rejected": 0.09372542053461075, + "logps/chosen": -61.207275390625, + "logps/ref_chosen": -60.72917938232422, + "logps/ref_rejected": -72.30961608886719, + "logps/rejected": -73.0062026977539, + "loss": 1.5695, + "margin_dpo/margin_mean": 0.2184884250164032, + "margin_dpo/margin_std": 0.5658543109893799, + "step": 86 + }, + { + "epoch": 0.13151927437641722, + "fcm_dpo/beta": 4.357776641845703, + "fcm_dpo/delta": -0.31460562348365784, + "fcm_dpo/margin": 0.2916297912597656, + "fcm_dpo/q_t": 0.3396541476249695, + "grad_norm": 1350.0787353515625, + "learning_rate": 4.987388156241114e-07, + "logits/chosen": 0.08588653057813644, + "logits/rejected": 0.03219534084200859, + "logps/chosen": -66.21820831298828, + "logps/ref_chosen": -65.75796508789062, + "logps/ref_rejected": -84.81159973144531, + "logps/rejected": -85.56346130371094, + "loss": 1.3305, + "margin_dpo/margin_mean": 0.29163050651550293, + "margin_dpo/margin_std": 0.5484292507171631, + "step": 87 + }, + { + "epoch": 0.1330309901738473, + "fcm_dpo/beta": 4.051568984985352, + "fcm_dpo/delta": -0.3438121974468231, + "fcm_dpo/margin": 0.3187229037284851, + "fcm_dpo/q_t": 0.3379303812980652, + "grad_norm": 1034.83447265625, + "learning_rate": 4.986026928455767e-07, + "logits/chosen": 0.1511228084564209, + "logits/rejected": 0.12557803094387054, + "logps/chosen": -63.23377227783203, + "logps/ref_chosen": -62.82402801513672, + "logps/ref_rejected": -74.9607162475586, + "logps/rejected": -75.68917846679688, + "loss": 1.2325, + "margin_dpo/margin_mean": 0.31872305274009705, + "margin_dpo/margin_std": 0.5725570917129517, + "step": 88 + }, + { + "epoch": 0.1345427059712774, + "fcm_dpo/beta": 4.111684799194336, + "fcm_dpo/delta": -0.005461782217025757, + "fcm_dpo/margin": 0.2426835298538208, + "fcm_dpo/q_t": 0.36708956956863403, + "grad_norm": 1158.6134033203125, + "learning_rate": 4.984596161153135e-07, + "logits/chosen": 0.18977168202400208, + "logits/rejected": 0.11028344929218292, + "logps/chosen": -41.57792663574219, + "logps/ref_chosen": -41.191436767578125, + "logps/ref_rejected": -85.44769287109375, + "logps/rejected": -86.07687377929688, + "loss": 1.5568, + "margin_dpo/margin_mean": 0.24268493056297302, + "margin_dpo/margin_std": 0.5871646404266357, + "step": 89 + }, + { + "epoch": 0.1360544217687075, + "fcm_dpo/beta": 3.9873642921447754, + "fcm_dpo/delta": -0.07796984910964966, + "fcm_dpo/margin": 0.26826444268226624, + "fcm_dpo/q_t": 0.3403598964214325, + "grad_norm": 1097.566650390625, + "learning_rate": 4.983095894354857e-07, + "logits/chosen": 0.08516630530357361, + "logits/rejected": 0.03322757035493851, + "logps/chosen": -56.98159408569336, + "logps/ref_chosen": -56.58390808105469, + "logps/ref_rejected": -86.86978149414062, + "logps/rejected": -87.53573608398438, + "loss": 1.3846, + "margin_dpo/margin_mean": 0.26826387643814087, + "margin_dpo/margin_std": 0.6303993463516235, + "step": 90 + }, + { + "epoch": 0.13756613756613756, + "fcm_dpo/beta": 3.869006633758545, + "fcm_dpo/delta": -0.14137369394302368, + "fcm_dpo/margin": 0.2906471788883209, + "fcm_dpo/q_t": 0.34270262718200684, + "grad_norm": 1045.7557373046875, + "learning_rate": 4.98152617002662e-07, + "logits/chosen": 0.10475227236747742, + "logits/rejected": 0.06226480007171631, + "logps/chosen": -52.82066345214844, + "logps/ref_chosen": -52.38234329223633, + "logps/ref_rejected": -72.17642211914062, + "logps/rejected": -72.90538787841797, + "loss": 1.3782, + "margin_dpo/margin_mean": 0.29064705967903137, + "margin_dpo/margin_std": 0.6195999383926392, + "step": 91 + }, + { + "epoch": 0.13907785336356765, + "fcm_dpo/beta": 4.068203926086426, + "fcm_dpo/delta": 0.24122354388237, + "fcm_dpo/margin": 0.1886722892522812, + "fcm_dpo/q_t": 0.40757930278778076, + "grad_norm": 1103.524658203125, + "learning_rate": 4.979887032076988e-07, + "logits/chosen": 0.13855835795402527, + "logits/rejected": 0.10122767090797424, + "logps/chosen": -53.529884338378906, + "logps/ref_chosen": -53.00870132446289, + "logps/ref_rejected": -79.77812957763672, + "logps/rejected": -80.48798370361328, + "loss": 1.5458, + "margin_dpo/margin_mean": 0.18867191672325134, + "margin_dpo/margin_std": 0.5168828368186951, + "step": 92 + }, + { + "epoch": 0.14058956916099774, + "fcm_dpo/beta": 4.217402458190918, + "fcm_dpo/delta": 0.23529532551765442, + "fcm_dpo/margin": 0.1851963996887207, + "fcm_dpo/q_t": 0.3896501064300537, + "grad_norm": 1094.4332275390625, + "learning_rate": 4.978178526356172e-07, + "logits/chosen": 0.10360229760408401, + "logits/rejected": 0.07716604322195053, + "logps/chosen": -45.443016052246094, + "logps/ref_chosen": -44.90705108642578, + "logps/ref_rejected": -58.7879524230957, + "logps/rejected": -59.50910949707031, + "loss": 1.5698, + "margin_dpo/margin_mean": 0.185196191072464, + "margin_dpo/margin_std": 0.5235726237297058, + "step": 93 + }, + { + "epoch": 0.1421012849584278, + "fcm_dpo/beta": 3.851269006729126, + "fcm_dpo/delta": -0.4641948938369751, + "fcm_dpo/margin": 0.3565562069416046, + "fcm_dpo/q_t": 0.2976396679878235, + "grad_norm": 884.6483764648438, + "learning_rate": 4.976400700654751e-07, + "logits/chosen": 0.16318684816360474, + "logits/rejected": 0.12688644230365753, + "logps/chosen": -60.265010833740234, + "logps/ref_chosen": -59.93777084350586, + "logps/ref_rejected": -79.3138427734375, + "logps/rejected": -79.99763488769531, + "loss": 1.2145, + "margin_dpo/margin_mean": 0.3565560281276703, + "margin_dpo/margin_std": 0.6176527142524719, + "step": 94 + }, + { + "epoch": 0.1436130007558579, + "fcm_dpo/beta": 3.51151704788208, + "fcm_dpo/delta": -0.556129515171051, + "fcm_dpo/margin": 0.41667065024375916, + "fcm_dpo/q_t": 0.27452635765075684, + "grad_norm": 725.6941528320312, + "learning_rate": 4.974553604702332e-07, + "logits/chosen": 0.04962404444813728, + "logits/rejected": -0.009035153314471245, + "logps/chosen": -60.609527587890625, + "logps/ref_chosen": -60.168487548828125, + "logps/ref_rejected": -90.73665618896484, + "logps/rejected": -91.59436798095703, + "loss": 0.8553, + "margin_dpo/margin_mean": 0.4166697561740875, + "margin_dpo/margin_std": 0.5119737386703491, + "step": 95 + }, + { + "epoch": 0.14512471655328799, + "fcm_dpo/beta": 3.3476004600524902, + "fcm_dpo/delta": -0.3292371928691864, + "fcm_dpo/margin": 0.38447052240371704, + "fcm_dpo/q_t": 0.3057492971420288, + "grad_norm": 765.777587890625, + "learning_rate": 4.972637290166157e-07, + "logits/chosen": 0.09734475612640381, + "logits/rejected": 0.05707583576440811, + "logps/chosen": -61.152687072753906, + "logps/ref_chosen": -60.66877746582031, + "logps/ref_rejected": -88.30673217773438, + "logps/rejected": -89.17510986328125, + "loss": 1.0612, + "margin_dpo/margin_mean": 0.3844701647758484, + "margin_dpo/margin_std": 0.554520845413208, + "step": 96 + }, + { + "epoch": 0.14663643235071808, + "fcm_dpo/beta": 3.2319469451904297, + "fcm_dpo/delta": 0.1403380036354065, + "fcm_dpo/margin": 0.2673853933811188, + "fcm_dpo/q_t": 0.3827268183231354, + "grad_norm": 1068.170654296875, + "learning_rate": 4.970651810649666e-07, + "logits/chosen": 0.04420812800526619, + "logits/rejected": 0.002325967885553837, + "logps/chosen": -65.61512756347656, + "logps/ref_chosen": -65.04412078857422, + "logps/ref_rejected": -78.42092895507812, + "logps/rejected": -79.25931549072266, + "loss": 1.3514, + "margin_dpo/margin_mean": 0.2673855423927307, + "margin_dpo/margin_std": 0.6004109382629395, + "step": 97 + }, + { + "epoch": 0.14814814814814814, + "fcm_dpo/beta": 3.42587947845459, + "fcm_dpo/delta": 0.1983877718448639, + "fcm_dpo/margin": 0.23879370093345642, + "fcm_dpo/q_t": 0.3823162317276001, + "grad_norm": 922.5922241210938, + "learning_rate": 4.968597221690985e-07, + "logits/chosen": 0.13439376652240753, + "logits/rejected": 0.10842472314834595, + "logps/chosen": -55.90753936767578, + "logps/ref_chosen": -55.503231048583984, + "logps/ref_rejected": -72.81553649902344, + "logps/rejected": -73.45864868164062, + "loss": 1.2262, + "margin_dpo/margin_mean": 0.23879370093345642, + "margin_dpo/margin_std": 0.4960702657699585, + "step": 98 + }, + { + "epoch": 0.14965986394557823, + "fcm_dpo/beta": 3.3395771980285645, + "fcm_dpo/delta": -0.2737107276916504, + "fcm_dpo/margin": 0.3713855743408203, + "fcm_dpo/q_t": 0.31995880603790283, + "grad_norm": 838.7493286132812, + "learning_rate": 4.966473580761389e-07, + "logits/chosen": 0.14741893112659454, + "logits/rejected": 0.11150997132062912, + "logps/chosen": -58.98797607421875, + "logps/ref_chosen": -58.57563781738281, + "logps/ref_rejected": -78.693603515625, + "logps/rejected": -79.47733306884766, + "loss": 1.0331, + "margin_dpo/margin_mean": 0.37138599157333374, + "margin_dpo/margin_std": 0.5738701820373535, + "step": 99 + }, + { + "epoch": 0.15117157974300832, + "fcm_dpo/beta": 3.413600206375122, + "fcm_dpo/delta": 0.0910910964012146, + "fcm_dpo/margin": 0.2643635869026184, + "fcm_dpo/q_t": 0.3801841139793396, + "grad_norm": 1048.221923828125, + "learning_rate": 4.964280947263676e-07, + "logits/chosen": 0.12782281637191772, + "logits/rejected": 0.12069296091794968, + "logps/chosen": -80.02874755859375, + "logps/ref_chosen": -79.58343505859375, + "logps/ref_rejected": -92.152587890625, + "logps/rejected": -92.86225891113281, + "loss": 1.5245, + "margin_dpo/margin_mean": 0.264363557100296, + "margin_dpo/margin_std": 0.6884479522705078, + "step": 100 + }, + { + "epoch": 0.15268329554043839, + "fcm_dpo/beta": 3.2258787155151367, + "fcm_dpo/delta": -0.34300971031188965, + "fcm_dpo/margin": 0.40265652537345886, + "fcm_dpo/q_t": 0.29698413610458374, + "grad_norm": 702.3856811523438, + "learning_rate": 4.96201938253052e-07, + "logits/chosen": 0.1229773610830307, + "logits/rejected": 0.08786555379629135, + "logps/chosen": -52.74365234375, + "logps/ref_chosen": -52.332786560058594, + "logps/ref_rejected": -69.55589294433594, + "logps/rejected": -70.36941528320312, + "loss": 0.9957, + "margin_dpo/margin_mean": 0.40265610814094543, + "margin_dpo/margin_std": 0.5584487318992615, + "step": 101 + }, + { + "epoch": 0.15419501133786848, + "fcm_dpo/beta": 3.221522808074951, + "fcm_dpo/delta": 0.1677694320678711, + "fcm_dpo/margin": 0.26210707426071167, + "fcm_dpo/q_t": 0.3745608925819397, + "grad_norm": 933.2718505859375, + "learning_rate": 4.959688949822748e-07, + "logits/chosen": 0.05017256736755371, + "logits/rejected": 0.012172428891062737, + "logps/chosen": -65.21527099609375, + "logps/ref_chosen": -64.74348449707031, + "logps/ref_rejected": -69.06132507324219, + "logps/rejected": -69.79522705078125, + "loss": 1.3826, + "margin_dpo/margin_mean": 0.26210689544677734, + "margin_dpo/margin_std": 0.6122475862503052, + "step": 102 + }, + { + "epoch": 0.15570672713529857, + "fcm_dpo/beta": 3.343921661376953, + "fcm_dpo/delta": 0.14063423871994019, + "fcm_dpo/margin": 0.25882646441459656, + "fcm_dpo/q_t": 0.3664790987968445, + "grad_norm": 876.5048828125, + "learning_rate": 4.957289714327572e-07, + "logits/chosen": 0.14401525259017944, + "logits/rejected": 0.11314252763986588, + "logps/chosen": -64.34033203125, + "logps/ref_chosen": -63.83664321899414, + "logps/ref_rejected": -79.32362365722656, + "logps/rejected": -80.08615112304688, + "loss": 1.2541, + "margin_dpo/margin_mean": 0.2588259279727936, + "margin_dpo/margin_std": 0.537617564201355, + "step": 103 + }, + { + "epoch": 0.15721844293272866, + "fcm_dpo/beta": 3.3820950984954834, + "fcm_dpo/delta": 0.11869892477989197, + "fcm_dpo/margin": 0.2636609375476837, + "fcm_dpo/q_t": 0.37176138162612915, + "grad_norm": 1155.879150390625, + "learning_rate": 4.954821743156767e-07, + "logits/chosen": 0.10751787573099136, + "logits/rejected": 0.03126327693462372, + "logps/chosen": -61.48141098022461, + "logps/ref_chosen": -60.99920654296875, + "logps/ref_rejected": -98.84645080566406, + "logps/rejected": -99.59231567382812, + "loss": 1.4641, + "margin_dpo/margin_mean": 0.26366138458251953, + "margin_dpo/margin_std": 0.6468815803527832, + "step": 104 + }, + { + "epoch": 0.15873015873015872, + "fcm_dpo/beta": 3.3653788566589355, + "fcm_dpo/delta": -0.06612719595432281, + "fcm_dpo/margin": 0.31470757722854614, + "fcm_dpo/q_t": 0.3323326110839844, + "grad_norm": 1119.451171875, + "learning_rate": 4.952285105344791e-07, + "logits/chosen": 0.09568033367395401, + "logits/rejected": 0.044659968465566635, + "logps/chosen": -71.36830139160156, + "logps/ref_chosen": -70.95027160644531, + "logps/ref_rejected": -87.88340759277344, + "logps/rejected": -88.61614227294922, + "loss": 1.3043, + "margin_dpo/margin_mean": 0.3147069811820984, + "margin_dpo/margin_std": 0.6669665575027466, + "step": 105 + }, + { + "epoch": 0.1602418745275888, + "fcm_dpo/beta": 3.4193921089172363, + "fcm_dpo/delta": 0.15833157300949097, + "fcm_dpo/margin": 0.25018543004989624, + "fcm_dpo/q_t": 0.3685312867164612, + "grad_norm": 1039.7274169921875, + "learning_rate": 4.949679871846857e-07, + "logits/chosen": 0.1333768665790558, + "logits/rejected": 0.1204344779253006, + "logps/chosen": -62.8930549621582, + "logps/ref_chosen": -62.45933151245117, + "logps/ref_rejected": -67.00595092773438, + "logps/rejected": -67.68985748291016, + "loss": 1.3112, + "margin_dpo/margin_mean": 0.2501852214336395, + "margin_dpo/margin_std": 0.5423716306686401, + "step": 106 + }, + { + "epoch": 0.1617535903250189, + "fcm_dpo/beta": 3.652026653289795, + "fcm_dpo/delta": 0.4439771771430969, + "fcm_dpo/margin": 0.15993273258209229, + "fcm_dpo/q_t": 0.43477770686149597, + "grad_norm": 1451.044677734375, + "learning_rate": 4.947006115536947e-07, + "logits/chosen": 0.06545466929674149, + "logits/rejected": 0.046112120151519775, + "logps/chosen": -76.338623046875, + "logps/ref_chosen": -75.83796691894531, + "logps/ref_rejected": -87.74038696289062, + "logps/rejected": -88.40097045898438, + "loss": 1.8206, + "margin_dpo/margin_mean": 0.15993207693099976, + "margin_dpo/margin_std": 0.673768162727356, + "step": 107 + }, + { + "epoch": 0.16326530612244897, + "fcm_dpo/beta": 3.626925468444824, + "fcm_dpo/delta": -0.3162718117237091, + "fcm_dpo/margin": 0.3520002067089081, + "fcm_dpo/q_t": 0.33733585476875305, + "grad_norm": 953.0491943359375, + "learning_rate": 4.944263911205772e-07, + "logits/chosen": 0.07331952452659607, + "logits/rejected": 0.047261983156204224, + "logps/chosen": -68.7996826171875, + "logps/ref_chosen": -68.39323425292969, + "logps/ref_rejected": -83.24267578125, + "logps/rejected": -84.00111389160156, + "loss": 1.2468, + "margin_dpo/margin_mean": 0.35200008749961853, + "margin_dpo/margin_std": 0.6720170974731445, + "step": 108 + }, + { + "epoch": 0.16477702191987906, + "fcm_dpo/beta": 3.5566816329956055, + "fcm_dpo/delta": 0.07203048467636108, + "fcm_dpo/margin": 0.2625024616718292, + "fcm_dpo/q_t": 0.38299477100372314, + "grad_norm": 1039.4810791015625, + "learning_rate": 4.941453335558681e-07, + "logits/chosen": 0.06151915714144707, + "logits/rejected": 0.016085410490632057, + "logps/chosen": -55.96140670776367, + "logps/ref_chosen": -55.52748107910156, + "logps/ref_rejected": -83.55218505859375, + "logps/rejected": -84.24861907958984, + "loss": 1.3522, + "margin_dpo/margin_mean": 0.26250216364860535, + "margin_dpo/margin_std": 0.6019136309623718, + "step": 109 + }, + { + "epoch": 0.16628873771730915, + "fcm_dpo/beta": 3.8665976524353027, + "fcm_dpo/delta": 0.39112499356269836, + "fcm_dpo/margin": 0.16184496879577637, + "fcm_dpo/q_t": 0.41690564155578613, + "grad_norm": 1377.6337890625, + "learning_rate": 4.938574467213517e-07, + "logits/chosen": 0.033917784690856934, + "logits/rejected": 0.04065680876374245, + "logps/chosen": -81.66863250732422, + "logps/ref_chosen": -81.15874481201172, + "logps/ref_rejected": -72.56021118164062, + "logps/rejected": -73.23194885253906, + "loss": 1.6791, + "margin_dpo/margin_mean": 0.16184476017951965, + "margin_dpo/margin_std": 0.5766524076461792, + "step": 110 + }, + { + "epoch": 0.16780045351473924, + "fcm_dpo/beta": 3.9139037132263184, + "fcm_dpo/delta": -0.052766673266887665, + "fcm_dpo/margin": 0.2670096457004547, + "fcm_dpo/q_t": 0.36203914880752563, + "grad_norm": 1263.3140869140625, + "learning_rate": 4.935627386698418e-07, + "logits/chosen": 0.13472726941108704, + "logits/rejected": 0.10240040719509125, + "logps/chosen": -52.87812042236328, + "logps/ref_chosen": -52.358985900878906, + "logps/ref_rejected": -77.06150817871094, + "logps/rejected": -77.84764862060547, + "loss": 1.6275, + "margin_dpo/margin_mean": 0.26701000332832336, + "margin_dpo/margin_std": 0.6607059836387634, + "step": 111 + }, + { + "epoch": 0.1693121693121693, + "fcm_dpo/beta": 3.622741937637329, + "fcm_dpo/delta": -0.296929270029068, + "fcm_dpo/margin": 0.34460121393203735, + "fcm_dpo/q_t": 0.33765172958374023, + "grad_norm": 1077.2196044921875, + "learning_rate": 4.932612176449559e-07, + "logits/chosen": 0.05540486425161362, + "logits/rejected": 0.0011176479747518897, + "logps/chosen": -63.441078186035156, + "logps/ref_chosen": -63.02006530761719, + "logps/ref_rejected": -111.36941528320312, + "logps/rejected": -112.13502502441406, + "loss": 1.3207, + "margin_dpo/margin_mean": 0.34459996223449707, + "margin_dpo/margin_std": 0.6546406745910645, + "step": 112 + }, + { + "epoch": 0.1708238851095994, + "fcm_dpo/beta": 3.7972545623779297, + "fcm_dpo/delta": 0.12773901224136353, + "fcm_dpo/margin": 0.22868876159191132, + "fcm_dpo/q_t": 0.37673860788345337, + "grad_norm": 1496.3253173828125, + "learning_rate": 4.929528920808854e-07, + "logits/chosen": 0.09782901406288147, + "logits/rejected": 0.06331203132867813, + "logps/chosen": -56.33560562133789, + "logps/ref_chosen": -55.80766296386719, + "logps/ref_rejected": -69.84014129638672, + "logps/rejected": -70.59677124023438, + "loss": 1.5844, + "margin_dpo/margin_mean": 0.22868850827217102, + "margin_dpo/margin_std": 0.5964616537094116, + "step": 113 + }, + { + "epoch": 0.17233560090702948, + "fcm_dpo/beta": 3.3383381366729736, + "fcm_dpo/delta": -0.5678998231887817, + "fcm_dpo/margin": 0.4367901682853699, + "fcm_dpo/q_t": 0.29958969354629517, + "grad_norm": 712.6404418945312, + "learning_rate": 4.92637770602159e-07, + "logits/chosen": 0.13647404313087463, + "logits/rejected": 0.08034436404705048, + "logps/chosen": -66.70956420898438, + "logps/ref_chosen": -66.33277130126953, + "logps/ref_rejected": -71.61489868164062, + "logps/rejected": -72.42848205566406, + "loss": 1.0132, + "margin_dpo/margin_mean": 0.43679025769233704, + "margin_dpo/margin_std": 0.6181018352508545, + "step": 114 + }, + { + "epoch": 0.17384731670445955, + "fcm_dpo/beta": 3.3931922912597656, + "fcm_dpo/delta": 0.006447508931159973, + "fcm_dpo/margin": 0.291039377450943, + "fcm_dpo/q_t": 0.3765663206577301, + "grad_norm": 1062.108154296875, + "learning_rate": 4.923158620234019e-07, + "logits/chosen": 0.10150371491909027, + "logits/rejected": 0.047993022948503494, + "logps/chosen": -56.26490783691406, + "logps/ref_chosen": -55.74903869628906, + "logps/ref_rejected": -79.59849548339844, + "logps/rejected": -80.40541076660156, + "loss": 1.3013, + "margin_dpo/margin_mean": 0.29103943705558777, + "margin_dpo/margin_std": 0.6191039085388184, + "step": 115 + }, + { + "epoch": 0.17535903250188964, + "fcm_dpo/beta": 3.4025328159332275, + "fcm_dpo/delta": -0.009448423981666565, + "fcm_dpo/margin": 0.29369187355041504, + "fcm_dpo/q_t": 0.3570387065410614, + "grad_norm": 851.5135498046875, + "learning_rate": 4.91987175349089e-07, + "logits/chosen": 0.1060943752527237, + "logits/rejected": 0.04715292900800705, + "logps/chosen": -49.830238342285156, + "logps/ref_chosen": -49.36516571044922, + "logps/ref_rejected": -72.84671020507812, + "logps/rejected": -73.60546875, + "loss": 1.224, + "margin_dpo/margin_mean": 0.29369184374809265, + "margin_dpo/margin_std": 0.5422225594520569, + "step": 116 + }, + { + "epoch": 0.17687074829931973, + "fcm_dpo/beta": 3.344947338104248, + "fcm_dpo/delta": 0.09896770864725113, + "fcm_dpo/margin": 0.27185767889022827, + "fcm_dpo/q_t": 0.3576691150665283, + "grad_norm": 870.2111206054688, + "learning_rate": 4.916517197732933e-07, + "logits/chosen": 0.129032701253891, + "logits/rejected": 0.09620364010334015, + "logps/chosen": -58.112640380859375, + "logps/ref_chosen": -57.710899353027344, + "logps/ref_rejected": -69.77253723144531, + "logps/rejected": -70.4461441040039, + "loss": 1.3806, + "margin_dpo/margin_mean": 0.27185723185539246, + "margin_dpo/margin_std": 0.6084860563278198, + "step": 117 + }, + { + "epoch": 0.17838246409674982, + "fcm_dpo/beta": 3.271368980407715, + "fcm_dpo/delta": -0.09690429270267487, + "fcm_dpo/margin": 0.32874226570129395, + "fcm_dpo/q_t": 0.34997400641441345, + "grad_norm": 930.0485229492188, + "learning_rate": 4.913095046794281e-07, + "logits/chosen": 0.13153867423534393, + "logits/rejected": 0.09707070142030716, + "logps/chosen": -52.88352966308594, + "logps/ref_chosen": -52.479896545410156, + "logps/ref_rejected": -81.359130859375, + "logps/rejected": -82.09149169921875, + "loss": 1.2167, + "margin_dpo/margin_mean": 0.32874205708503723, + "margin_dpo/margin_std": 0.582424521446228, + "step": 118 + }, + { + "epoch": 0.17989417989417988, + "fcm_dpo/beta": 3.331033229827881, + "fcm_dpo/delta": 0.008004724979400635, + "fcm_dpo/margin": 0.2980421185493469, + "fcm_dpo/q_t": 0.35352998971939087, + "grad_norm": 886.5343017578125, + "learning_rate": 4.909605396399855e-07, + "logits/chosen": 0.08484401553869247, + "logits/rejected": 0.05185426026582718, + "logps/chosen": -61.905765533447266, + "logps/ref_chosen": -61.35767364501953, + "logps/ref_rejected": -75.71510314941406, + "logps/rejected": -76.56123352050781, + "loss": 1.2981, + "margin_dpo/margin_mean": 0.298042356967926, + "margin_dpo/margin_std": 0.5981870293617249, + "step": 119 + }, + { + "epoch": 0.18140589569160998, + "fcm_dpo/beta": 3.182232618331909, + "fcm_dpo/delta": -0.24365851283073425, + "fcm_dpo/margin": 0.3802499771118164, + "fcm_dpo/q_t": 0.3237895369529724, + "grad_norm": 770.2077026367188, + "learning_rate": 4.906048344162676e-07, + "logits/chosen": 0.07990750670433044, + "logits/rejected": 0.02834871970117092, + "logps/chosen": -60.297908782958984, + "logps/ref_chosen": -59.907569885253906, + "logps/ref_rejected": -79.6910629272461, + "logps/rejected": -80.46165466308594, + "loss": 1.0967, + "margin_dpo/margin_mean": 0.3802502751350403, + "margin_dpo/margin_std": 0.6159936189651489, + "step": 120 + }, + { + "epoch": 0.18291761148904007, + "fcm_dpo/beta": 3.1936514377593994, + "fcm_dpo/delta": 0.05449778214097023, + "fcm_dpo/margin": 0.29768818616867065, + "fcm_dpo/q_t": 0.3674450218677521, + "grad_norm": 816.0641479492188, + "learning_rate": 4.902423989581143e-07, + "logits/chosen": 0.17014677822589874, + "logits/rejected": 0.09852974861860275, + "logps/chosen": -56.18149948120117, + "logps/ref_chosen": -55.66604232788086, + "logps/ref_rejected": -101.56233978271484, + "logps/rejected": -102.37548828125, + "loss": 1.2968, + "margin_dpo/margin_mean": 0.2976876497268677, + "margin_dpo/margin_std": 0.6306780576705933, + "step": 121 + }, + { + "epoch": 0.18442932728647016, + "fcm_dpo/beta": 3.019984722137451, + "fcm_dpo/delta": -0.44598841667175293, + "fcm_dpo/margin": 0.4587884843349457, + "fcm_dpo/q_t": 0.29104509949684143, + "grad_norm": 804.1510009765625, + "learning_rate": 4.898732434036243e-07, + "logits/chosen": 0.09989838302135468, + "logits/rejected": 0.06974966824054718, + "logps/chosen": -63.821380615234375, + "logps/ref_chosen": -63.334373474121094, + "logps/ref_rejected": -73.67523193359375, + "logps/rejected": -74.62103271484375, + "loss": 0.969, + "margin_dpo/margin_mean": 0.45878836512565613, + "margin_dpo/margin_std": 0.6608290672302246, + "step": 122 + }, + { + "epoch": 0.18594104308390022, + "fcm_dpo/beta": 2.9597277641296387, + "fcm_dpo/delta": -0.11771807074546814, + "fcm_dpo/margin": 0.37006598711013794, + "fcm_dpo/q_t": 0.31938499212265015, + "grad_norm": 801.2340698242188, + "learning_rate": 4.894973780788722e-07, + "logits/chosen": 0.13346600532531738, + "logits/rejected": 0.09592346101999283, + "logps/chosen": -57.292945861816406, + "logps/ref_chosen": -56.89874267578125, + "logps/ref_rejected": -78.97028350830078, + "logps/rejected": -79.73455810546875, + "loss": 1.2025, + "margin_dpo/margin_mean": 0.37006592750549316, + "margin_dpo/margin_std": 0.640432596206665, + "step": 123 + }, + { + "epoch": 0.1874527588813303, + "fcm_dpo/beta": 2.767918109893799, + "fcm_dpo/delta": -0.2092204988002777, + "fcm_dpo/margin": 0.4275299310684204, + "fcm_dpo/q_t": 0.31604132056236267, + "grad_norm": 655.9889526367188, + "learning_rate": 4.89114813497619e-07, + "logits/chosen": 0.1320020854473114, + "logits/rejected": 0.08239568769931793, + "logps/chosen": -57.52501678466797, + "logps/ref_chosen": -57.116085052490234, + "logps/ref_rejected": -87.93074035644531, + "logps/rejected": -88.76720428466797, + "loss": 0.9876, + "margin_dpo/margin_mean": 0.4275299310684204, + "margin_dpo/margin_std": 0.6369043588638306, + "step": 124 + }, + { + "epoch": 0.1889644746787604, + "fcm_dpo/beta": 2.691709518432617, + "fcm_dpo/delta": -0.04524332284927368, + "fcm_dpo/margin": 0.3849112093448639, + "fcm_dpo/q_t": 0.32585692405700684, + "grad_norm": 650.68212890625, + "learning_rate": 4.887255603610184e-07, + "logits/chosen": 0.18828628957271576, + "logits/rejected": 0.1363983303308487, + "logps/chosen": -66.21900939941406, + "logps/ref_chosen": -65.7061767578125, + "logps/ref_rejected": -91.72711944580078, + "logps/rejected": -92.62486267089844, + "loss": 1.0744, + "margin_dpo/margin_mean": 0.3849112391471863, + "margin_dpo/margin_std": 0.5876812934875488, + "step": 125 + }, + { + "epoch": 0.19047619047619047, + "fcm_dpo/beta": 2.652081251144409, + "fcm_dpo/delta": -0.10823916643857956, + "fcm_dpo/margin": 0.41218724846839905, + "fcm_dpo/q_t": 0.3507199287414551, + "grad_norm": 565.577880859375, + "learning_rate": 4.883296295573176e-07, + "logits/chosen": -0.01404772698879242, + "logits/rejected": -0.02040482684969902, + "logps/chosen": -68.63446807861328, + "logps/ref_chosen": -68.17608642578125, + "logps/ref_rejected": -65.1175537109375, + "logps/rejected": -65.98812103271484, + "loss": 1.143, + "margin_dpo/margin_mean": 0.41218748688697815, + "margin_dpo/margin_std": 0.8156576156616211, + "step": 126 + }, + { + "epoch": 0.19198790627362056, + "fcm_dpo/beta": 2.6257121562957764, + "fcm_dpo/delta": -0.039457425475120544, + "fcm_dpo/margin": 0.39357566833496094, + "fcm_dpo/q_t": 0.32776835560798645, + "grad_norm": 675.0282592773438, + "learning_rate": 4.87927032161552e-07, + "logits/chosen": 0.06537148356437683, + "logits/rejected": 0.037958111613988876, + "logps/chosen": -62.41735076904297, + "logps/ref_chosen": -61.88023376464844, + "logps/ref_rejected": -68.46012878417969, + "logps/rejected": -69.39082336425781, + "loss": 1.1334, + "margin_dpo/margin_mean": 0.3935753107070923, + "margin_dpo/margin_std": 0.6591010093688965, + "step": 127 + }, + { + "epoch": 0.19349962207105065, + "fcm_dpo/beta": 2.6458208560943604, + "fcm_dpo/delta": 0.08855466544628143, + "fcm_dpo/margin": 0.3467669188976288, + "fcm_dpo/q_t": 0.36068370938301086, + "grad_norm": 708.3317260742188, + "learning_rate": 4.875177794352363e-07, + "logits/chosen": 0.09111534804105759, + "logits/rejected": 0.045390479266643524, + "logps/chosen": -67.32196044921875, + "logps/ref_chosen": -66.708984375, + "logps/ref_rejected": -94.97969055175781, + "logps/rejected": -95.9394302368164, + "loss": 1.2229, + "margin_dpo/margin_mean": 0.34676679968833923, + "margin_dpo/margin_std": 0.687119722366333, + "step": 128 + }, + { + "epoch": 0.19501133786848074, + "fcm_dpo/beta": 2.789608955383301, + "fcm_dpo/delta": 0.23963144421577454, + "fcm_dpo/margin": 0.2792533040046692, + "fcm_dpo/q_t": 0.38132244348526, + "grad_norm": 765.4723510742188, + "learning_rate": 4.871018828260491e-07, + "logits/chosen": 0.1117800921201706, + "logits/rejected": 0.10394299030303955, + "logps/chosen": -65.94437408447266, + "logps/ref_chosen": -65.33882904052734, + "logps/ref_rejected": -68.06109619140625, + "logps/rejected": -68.94589233398438, + "loss": 1.2034, + "margin_dpo/margin_mean": 0.279253751039505, + "margin_dpo/margin_std": 0.5657248497009277, + "step": 129 + }, + { + "epoch": 0.1965230536659108, + "fcm_dpo/beta": 2.7729220390319824, + "fcm_dpo/delta": 0.050821587443351746, + "fcm_dpo/margin": 0.33975258469581604, + "fcm_dpo/q_t": 0.35864949226379395, + "grad_norm": 768.4454956054688, + "learning_rate": 4.866793539675126e-07, + "logits/chosen": 0.09036006778478622, + "logits/rejected": 0.04790624603629112, + "logps/chosen": -59.219017028808594, + "logps/ref_chosen": -58.660743713378906, + "logps/ref_rejected": -79.24510192871094, + "logps/rejected": -80.14312744140625, + "loss": 1.1136, + "margin_dpo/margin_mean": 0.3397524058818817, + "margin_dpo/margin_std": 0.5575762987136841, + "step": 130 + }, + { + "epoch": 0.1980347694633409, + "fcm_dpo/beta": 2.782139301300049, + "fcm_dpo/delta": -0.2223011553287506, + "fcm_dpo/margin": 0.4299342930316925, + "fcm_dpo/q_t": 0.3284626305103302, + "grad_norm": 713.0172119140625, + "learning_rate": 4.86250204678667e-07, + "logits/chosen": 0.10117419809103012, + "logits/rejected": 0.048455823212862015, + "logps/chosen": -52.9766845703125, + "logps/ref_chosen": -52.51453399658203, + "logps/ref_rejected": -85.18299865722656, + "logps/rejected": -86.07508087158203, + "loss": 1.152, + "margin_dpo/margin_mean": 0.42993444204330444, + "margin_dpo/margin_std": 0.7103478908538818, + "step": 131 + }, + { + "epoch": 0.19954648526077098, + "fcm_dpo/beta": 2.728858470916748, + "fcm_dpo/delta": 0.014488308690488338, + "fcm_dpo/margin": 0.3616468608379364, + "fcm_dpo/q_t": 0.3465641736984253, + "grad_norm": 803.3519897460938, + "learning_rate": 4.858144469637408e-07, + "logits/chosen": 0.16957558691501617, + "logits/rejected": 0.1406971514225006, + "logps/chosen": -66.2645263671875, + "logps/ref_chosen": -65.68513488769531, + "logps/ref_rejected": -69.54120635986328, + "logps/rejected": -70.48225402832031, + "loss": 1.3716, + "margin_dpo/margin_mean": 0.36164700984954834, + "margin_dpo/margin_std": 0.8076159358024597, + "step": 132 + }, + { + "epoch": 0.20105820105820105, + "fcm_dpo/beta": 2.8398377895355225, + "fcm_dpo/delta": 0.20342613756656647, + "fcm_dpo/margin": 0.2855343222618103, + "fcm_dpo/q_t": 0.3706758916378021, + "grad_norm": 806.1981811523438, + "learning_rate": 4.853720930118138e-07, + "logits/chosen": 0.07018555700778961, + "logits/rejected": 0.06098049134016037, + "logps/chosen": -64.16683959960938, + "logps/ref_chosen": -63.598114013671875, + "logps/ref_rejected": -73.72798156738281, + "logps/rejected": -74.58223724365234, + "loss": 1.3296, + "margin_dpo/margin_mean": 0.2855341136455536, + "margin_dpo/margin_std": 0.6436434984207153, + "step": 133 + }, + { + "epoch": 0.20256991685563114, + "fcm_dpo/beta": 2.7438831329345703, + "fcm_dpo/delta": -0.14177896082401276, + "fcm_dpo/margin": 0.40749433636665344, + "fcm_dpo/q_t": 0.32042205333709717, + "grad_norm": 637.2561645507812, + "learning_rate": 4.849231551964771e-07, + "logits/chosen": 0.17743632197380066, + "logits/rejected": 0.129373237490654, + "logps/chosen": -54.34275817871094, + "logps/ref_chosen": -53.79457092285156, + "logps/ref_rejected": -74.16741943359375, + "logps/rejected": -75.12309265136719, + "loss": 1.0797, + "margin_dpo/margin_mean": 0.40749499201774597, + "margin_dpo/margin_std": 0.6834430694580078, + "step": 134 + }, + { + "epoch": 0.20408163265306123, + "fcm_dpo/beta": 2.8520781993865967, + "fcm_dpo/delta": 0.2403937578201294, + "fcm_dpo/margin": 0.2731159031391144, + "fcm_dpo/q_t": 0.3797072768211365, + "grad_norm": 688.9449462890625, + "learning_rate": 4.844676460754862e-07, + "logits/chosen": 0.09586119651794434, + "logits/rejected": 0.06667510420084, + "logps/chosen": -49.97583770751953, + "logps/ref_chosen": -49.441078186035156, + "logps/ref_rejected": -65.96878051757812, + "logps/rejected": -66.77665710449219, + "loss": 1.3673, + "margin_dpo/margin_mean": 0.27311572432518005, + "margin_dpo/margin_std": 0.6469433307647705, + "step": 135 + }, + { + "epoch": 0.20559334845049132, + "fcm_dpo/beta": 2.90311861038208, + "fcm_dpo/delta": 0.004882900044322014, + "fcm_dpo/margin": 0.3429165482521057, + "fcm_dpo/q_t": 0.36461225152015686, + "grad_norm": 1105.5750732421875, + "learning_rate": 4.840055783904106e-07, + "logits/chosen": 0.10260805487632751, + "logits/rejected": 0.04400411248207092, + "logps/chosen": -67.37596893310547, + "logps/ref_chosen": -66.75926208496094, + "logps/ref_rejected": -94.61787414550781, + "logps/rejected": -95.57749938964844, + "loss": 1.5811, + "margin_dpo/margin_mean": 0.34291741251945496, + "margin_dpo/margin_std": 0.9085370302200317, + "step": 136 + }, + { + "epoch": 0.20710506424792138, + "fcm_dpo/beta": 2.891000747680664, + "fcm_dpo/delta": -0.11807064712047577, + "fcm_dpo/margin": 0.38187700510025024, + "fcm_dpo/q_t": 0.3429448902606964, + "grad_norm": 694.5245971679688, + "learning_rate": 4.835369650662767e-07, + "logits/chosen": 0.10072774440050125, + "logits/rejected": 0.07752367854118347, + "logps/chosen": -57.34507751464844, + "logps/ref_chosen": -56.78379821777344, + "logps/ref_rejected": -69.89952087402344, + "logps/rejected": -70.8426742553711, + "loss": 1.2, + "margin_dpo/margin_mean": 0.3818773031234741, + "margin_dpo/margin_std": 0.6857679486274719, + "step": 137 + }, + { + "epoch": 0.20861678004535147, + "fcm_dpo/beta": 2.905198574066162, + "fcm_dpo/delta": 0.12694688141345978, + "fcm_dpo/margin": 0.3041801452636719, + "fcm_dpo/q_t": 0.3584628403186798, + "grad_norm": 783.094482421875, + "learning_rate": 4.830618192112065e-07, + "logits/chosen": 0.0714266374707222, + "logits/rejected": 0.04139017313718796, + "logps/chosen": -59.46550750732422, + "logps/ref_chosen": -58.766014099121094, + "logps/ref_rejected": -68.12371826171875, + "logps/rejected": -69.12739562988281, + "loss": 1.3424, + "margin_dpo/margin_mean": 0.3041801452636719, + "margin_dpo/margin_std": 0.6721060276031494, + "step": 138 + }, + { + "epoch": 0.21012849584278157, + "fcm_dpo/beta": 2.842034101486206, + "fcm_dpo/delta": -0.29515254497528076, + "fcm_dpo/margin": 0.4422586262226105, + "fcm_dpo/q_t": 0.32127201557159424, + "grad_norm": 698.6984252929688, + "learning_rate": 4.825801541160509e-07, + "logits/chosen": 0.058577846735715866, + "logits/rejected": 0.034485623240470886, + "logps/chosen": -71.8461685180664, + "logps/ref_chosen": -71.2255859375, + "logps/ref_rejected": -82.1834716796875, + "logps/rejected": -83.2463150024414, + "loss": 1.0673, + "margin_dpo/margin_mean": 0.44225820899009705, + "margin_dpo/margin_std": 0.6527875661849976, + "step": 139 + }, + { + "epoch": 0.21164021164021163, + "fcm_dpo/beta": 2.5584716796875, + "fcm_dpo/delta": -0.5142702460289001, + "fcm_dpo/margin": 0.5633730888366699, + "fcm_dpo/q_t": 0.28819897770881653, + "grad_norm": 720.5505981445312, + "learning_rate": 4.820919832540181e-07, + "logits/chosen": 0.06424537301063538, + "logits/rejected": 0.027711138129234314, + "logps/chosen": -63.82072830200195, + "logps/ref_chosen": -63.27766418457031, + "logps/ref_rejected": -83.30647277832031, + "logps/rejected": -84.41291809082031, + "loss": 1.1217, + "margin_dpo/margin_mean": 0.5633726119995117, + "margin_dpo/margin_std": 0.8722689151763916, + "step": 140 + }, + { + "epoch": 0.21315192743764172, + "fcm_dpo/beta": 2.474776029586792, + "fcm_dpo/delta": 0.05119156837463379, + "fcm_dpo/margin": 0.3848419487476349, + "fcm_dpo/q_t": 0.3656679391860962, + "grad_norm": 698.813720703125, + "learning_rate": 4.815973202802966e-07, + "logits/chosen": 0.09540177881717682, + "logits/rejected": 0.06088024377822876, + "logps/chosen": -62.38557052612305, + "logps/ref_chosen": -61.76676940917969, + "logps/ref_rejected": -88.60601806640625, + "logps/rejected": -89.60966491699219, + "loss": 1.2344, + "margin_dpo/margin_mean": 0.38484299182891846, + "margin_dpo/margin_std": 0.7531988620758057, + "step": 141 + }, + { + "epoch": 0.2146636432350718, + "fcm_dpo/beta": 2.6312007904052734, + "fcm_dpo/delta": 0.25451576709747314, + "fcm_dpo/margin": 0.2886815369129181, + "fcm_dpo/q_t": 0.39209288358688354, + "grad_norm": 677.07763671875, + "learning_rate": 4.810961790316729e-07, + "logits/chosen": 0.08383051306009293, + "logits/rejected": 0.06255074590444565, + "logps/chosen": -65.8593521118164, + "logps/ref_chosen": -65.2747802734375, + "logps/ref_rejected": -81.1378173828125, + "logps/rejected": -82.01107788085938, + "loss": 1.3684, + "margin_dpo/margin_mean": 0.2886812686920166, + "margin_dpo/margin_std": 0.7156628370285034, + "step": 142 + }, + { + "epoch": 0.2161753590325019, + "fcm_dpo/beta": 2.587679386138916, + "fcm_dpo/delta": -0.12541311979293823, + "fcm_dpo/margin": 0.42952513694763184, + "fcm_dpo/q_t": 0.3180665075778961, + "grad_norm": 654.9125366210938, + "learning_rate": 4.805885735261454e-07, + "logits/chosen": 0.12669947743415833, + "logits/rejected": 0.11181487888097763, + "logps/chosen": -63.13290023803711, + "logps/ref_chosen": -62.617828369140625, + "logps/ref_rejected": -70.39239501953125, + "logps/rejected": -71.33699035644531, + "loss": 1.0608, + "margin_dpo/margin_mean": 0.4295256435871124, + "margin_dpo/margin_std": 0.6682602167129517, + "step": 143 + }, + { + "epoch": 0.21768707482993196, + "fcm_dpo/beta": 2.556525707244873, + "fcm_dpo/delta": -0.07277373969554901, + "fcm_dpo/margin": 0.4165218472480774, + "fcm_dpo/q_t": 0.34666940569877625, + "grad_norm": 743.940185546875, + "learning_rate": 4.800745179625307e-07, + "logits/chosen": 0.11285848915576935, + "logits/rejected": 0.0883728414773941, + "logps/chosen": -61.44839096069336, + "logps/ref_chosen": -60.80268859863281, + "logps/ref_rejected": -79.07284545898438, + "logps/rejected": -80.13507080078125, + "loss": 1.1906, + "margin_dpo/margin_mean": 0.4165222942829132, + "margin_dpo/margin_std": 0.7579972743988037, + "step": 144 + }, + { + "epoch": 0.21919879062736206, + "fcm_dpo/beta": 2.5957999229431152, + "fcm_dpo/delta": 0.1555352658033371, + "fcm_dpo/margin": 0.33037135004997253, + "fcm_dpo/q_t": 0.3795892000198364, + "grad_norm": 965.2445678710938, + "learning_rate": 4.795540267200686e-07, + "logits/chosen": 0.07255662977695465, + "logits/rejected": 0.08949023485183716, + "logps/chosen": -75.23291015625, + "logps/ref_chosen": -74.61146545410156, + "logps/ref_rejected": -83.24461364746094, + "logps/rejected": -84.19642639160156, + "loss": 1.456, + "margin_dpo/margin_mean": 0.3303707540035248, + "margin_dpo/margin_std": 0.8115462064743042, + "step": 145 + }, + { + "epoch": 0.22071050642479215, + "fcm_dpo/beta": 2.5885000228881836, + "fcm_dpo/delta": -0.06581529229879379, + "fcm_dpo/margin": 0.40906020998954773, + "fcm_dpo/q_t": 0.33817818760871887, + "grad_norm": 619.466064453125, + "learning_rate": 4.790271143580173e-07, + "logits/chosen": 0.05093229562044144, + "logits/rejected": 0.03536106273531914, + "logps/chosen": -58.38066101074219, + "logps/ref_chosen": -57.84098434448242, + "logps/ref_rejected": -67.47422790527344, + "logps/rejected": -68.42295837402344, + "loss": 1.1586, + "margin_dpo/margin_mean": 0.40906035900115967, + "margin_dpo/margin_std": 0.7477720975875854, + "step": 146 + }, + { + "epoch": 0.2222222222222222, + "fcm_dpo/beta": 2.669675588607788, + "fcm_dpo/delta": 0.24072621762752533, + "fcm_dpo/margin": 0.29151690006256104, + "fcm_dpo/q_t": 0.38209617137908936, + "grad_norm": 970.9570922851562, + "learning_rate": 4.784937956152489e-07, + "logits/chosen": 0.05634861811995506, + "logits/rejected": 0.02099587954580784, + "logps/chosen": -67.4126968383789, + "logps/ref_chosen": -66.81346893310547, + "logps/ref_rejected": -81.1796875, + "logps/rejected": -82.0704345703125, + "loss": 1.4796, + "margin_dpo/margin_mean": 0.29151687026023865, + "margin_dpo/margin_std": 0.7656582593917847, + "step": 147 + }, + { + "epoch": 0.2237339380196523, + "fcm_dpo/beta": 2.6029810905456543, + "fcm_dpo/delta": -0.31015288829803467, + "fcm_dpo/margin": 0.48854613304138184, + "fcm_dpo/q_t": 0.3191307783126831, + "grad_norm": 515.7787475585938, + "learning_rate": 4.779540854098347e-07, + "logits/chosen": 0.19165629148483276, + "logits/rejected": 0.12959185242652893, + "logps/chosen": -49.28028106689453, + "logps/ref_chosen": -48.6877555847168, + "logps/ref_rejected": -67.50503540039062, + "logps/rejected": -68.58610534667969, + "loss": 1.0437, + "margin_dpo/margin_mean": 0.488546222448349, + "margin_dpo/margin_std": 0.7724089622497559, + "step": 148 + }, + { + "epoch": 0.2252456538170824, + "fcm_dpo/beta": 2.533602476119995, + "fcm_dpo/delta": -0.09391121566295624, + "fcm_dpo/margin": 0.42736145853996277, + "fcm_dpo/q_t": 0.34449946880340576, + "grad_norm": 640.553466796875, + "learning_rate": 4.774079988386296e-07, + "logits/chosen": 0.05926530063152313, + "logits/rejected": 0.01661105640232563, + "logps/chosen": -55.83122253417969, + "logps/ref_chosen": -55.143775939941406, + "logps/ref_rejected": -64.79888916015625, + "logps/rejected": -65.9136962890625, + "loss": 1.1159, + "margin_dpo/margin_mean": 0.4273618459701538, + "margin_dpo/margin_std": 0.7076586484909058, + "step": 149 + }, + { + "epoch": 0.22675736961451248, + "fcm_dpo/beta": 2.3987488746643066, + "fcm_dpo/delta": -0.2589126229286194, + "fcm_dpo/margin": 0.5113659501075745, + "fcm_dpo/q_t": 0.2972312569618225, + "grad_norm": 609.1364135742188, + "learning_rate": 4.768555511768486e-07, + "logits/chosen": 0.10247902572154999, + "logits/rejected": 0.06602032482624054, + "logps/chosen": -67.88574981689453, + "logps/ref_chosen": -67.47074890136719, + "logps/ref_rejected": -89.21170806884766, + "logps/rejected": -90.13807678222656, + "loss": 0.9804, + "margin_dpo/margin_mean": 0.5113657712936401, + "margin_dpo/margin_std": 0.6983498334884644, + "step": 150 + }, + { + "epoch": 0.22826908541194255, + "fcm_dpo/beta": 2.3255553245544434, + "fcm_dpo/delta": -0.19754400849342346, + "fcm_dpo/margin": 0.5049396753311157, + "fcm_dpo/q_t": 0.3148772716522217, + "grad_norm": 509.62213134765625, + "learning_rate": 4.762967578776406e-07, + "logits/chosen": 0.06744161248207092, + "logits/rejected": 0.025958776473999023, + "logps/chosen": -52.93949890136719, + "logps/ref_chosen": -52.45954132080078, + "logps/ref_rejected": -79.0630111694336, + "logps/rejected": -80.04790496826172, + "loss": 0.9958, + "margin_dpo/margin_mean": 0.5049391388893127, + "margin_dpo/margin_std": 0.7245649099349976, + "step": 151 + }, + { + "epoch": 0.22978080120937264, + "fcm_dpo/beta": 2.2168989181518555, + "fcm_dpo/delta": -0.2083718478679657, + "fcm_dpo/margin": 0.5341185927391052, + "fcm_dpo/q_t": 0.30489107966423035, + "grad_norm": 501.7793884277344, + "learning_rate": 4.757316345716553e-07, + "logits/chosen": 0.1456744223833084, + "logits/rejected": 0.10498102009296417, + "logps/chosen": -57.103904724121094, + "logps/ref_chosen": -56.5538330078125, + "logps/ref_rejected": -76.55074310302734, + "logps/rejected": -77.63492584228516, + "loss": 0.9032, + "margin_dpo/margin_mean": 0.5341184139251709, + "margin_dpo/margin_std": 0.6747971773147583, + "step": 152 + }, + { + "epoch": 0.23129251700680273, + "fcm_dpo/beta": 2.214939832687378, + "fcm_dpo/delta": 0.11927812546491623, + "fcm_dpo/margin": 0.40247973799705505, + "fcm_dpo/q_t": 0.3638674020767212, + "grad_norm": 600.3435668945312, + "learning_rate": 4.751601970666064e-07, + "logits/chosen": 0.05303303897380829, + "logits/rejected": 0.01978529989719391, + "logps/chosen": -68.58604431152344, + "logps/ref_chosen": -68.00689697265625, + "logps/ref_rejected": -74.83482360839844, + "logps/rejected": -75.81645202636719, + "loss": 1.2482, + "margin_dpo/margin_mean": 0.4024793207645416, + "margin_dpo/margin_std": 0.7963600158691406, + "step": 153 + }, + { + "epoch": 0.2328042328042328, + "fcm_dpo/beta": 2.284209728240967, + "fcm_dpo/delta": 0.13816551864147186, + "fcm_dpo/margin": 0.38259416818618774, + "fcm_dpo/q_t": 0.3632936179637909, + "grad_norm": 581.8963623046875, + "learning_rate": 4.745824613468292e-07, + "logits/chosen": 0.13186918199062347, + "logits/rejected": 0.12810632586479187, + "logps/chosen": -59.913002014160156, + "logps/ref_chosen": -59.222537994384766, + "logps/ref_rejected": -64.19131469726562, + "logps/rejected": -65.2643814086914, + "loss": 1.2855, + "margin_dpo/margin_mean": 0.3825940191745758, + "margin_dpo/margin_std": 0.8229261636734009, + "step": 154 + }, + { + "epoch": 0.23431594860166288, + "fcm_dpo/beta": 2.359236717224121, + "fcm_dpo/delta": 0.17262253165245056, + "fcm_dpo/margin": 0.3568933308124542, + "fcm_dpo/q_t": 0.3739526867866516, + "grad_norm": 687.4069213867188, + "learning_rate": 4.7399844357283393e-07, + "logits/chosen": 0.14357620477676392, + "logits/rejected": 0.1261100172996521, + "logps/chosen": -69.06387329101562, + "logps/ref_chosen": -68.45469665527344, + "logps/ref_rejected": -77.91763305664062, + "logps/rejected": -78.88371276855469, + "loss": 1.4692, + "margin_dpo/margin_mean": 0.35689258575439453, + "margin_dpo/margin_std": 0.879474401473999, + "step": 155 + }, + { + "epoch": 0.23582766439909297, + "fcm_dpo/beta": 2.3249125480651855, + "fcm_dpo/delta": -0.2846854329109192, + "fcm_dpo/margin": 0.5367815494537354, + "fcm_dpo/q_t": 0.33714932203292847, + "grad_norm": 715.7948608398438, + "learning_rate": 4.7340816008085305e-07, + "logits/chosen": 0.09127533435821533, + "logits/rejected": 0.05366864800453186, + "logps/chosen": -67.89020538330078, + "logps/ref_chosen": -67.26959991455078, + "logps/ref_rejected": -86.95914459228516, + "logps/rejected": -88.11653137207031, + "loss": 1.1585, + "margin_dpo/margin_mean": 0.5367816686630249, + "margin_dpo/margin_std": 0.9481757879257202, + "step": 156 + }, + { + "epoch": 0.23733938019652306, + "fcm_dpo/beta": 2.2261717319488525, + "fcm_dpo/delta": -0.006286881864070892, + "fcm_dpo/margin": 0.45090270042419434, + "fcm_dpo/q_t": 0.3336307406425476, + "grad_norm": 524.041259765625, + "learning_rate": 4.728116273823847e-07, + "logits/chosen": 0.08175022900104523, + "logits/rejected": 0.062264494597911835, + "logps/chosen": -55.3340950012207, + "logps/ref_chosen": -54.77287292480469, + "logps/ref_rejected": -63.87866973876953, + "logps/rejected": -64.89079284667969, + "loss": 1.0499, + "margin_dpo/margin_mean": 0.45090246200561523, + "margin_dpo/margin_std": 0.7311956286430359, + "step": 157 + }, + { + "epoch": 0.23885109599395313, + "fcm_dpo/beta": 2.172646999359131, + "fcm_dpo/delta": -0.2092001736164093, + "fcm_dpo/margin": 0.5448204278945923, + "fcm_dpo/q_t": 0.3070847988128662, + "grad_norm": 502.65093994140625, + "learning_rate": 4.7220886216373085e-07, + "logits/chosen": 0.1196097731590271, + "logits/rejected": 0.0907188355922699, + "logps/chosen": -65.47095489501953, + "logps/ref_chosen": -64.92271423339844, + "logps/ref_rejected": -82.23789978027344, + "logps/rejected": -83.3309555053711, + "loss": 0.9403, + "margin_dpo/margin_mean": 0.5448204278945923, + "margin_dpo/margin_std": 0.7522009015083313, + "step": 158 + }, + { + "epoch": 0.24036281179138322, + "fcm_dpo/beta": 2.2735085487365723, + "fcm_dpo/delta": 0.19475619494915009, + "fcm_dpo/margin": 0.3540440499782562, + "fcm_dpo/q_t": 0.35417577624320984, + "grad_norm": 675.0926513671875, + "learning_rate": 4.715998812855304e-07, + "logits/chosen": 0.13686862587928772, + "logits/rejected": 0.10717260837554932, + "logps/chosen": -57.696720123291016, + "logps/ref_chosen": -57.046993255615234, + "logps/ref_rejected": -73.32441711425781, + "logps/rejected": -74.32818603515625, + "loss": 1.2425, + "margin_dpo/margin_mean": 0.3540443778038025, + "margin_dpo/margin_std": 0.6996503472328186, + "step": 159 + }, + { + "epoch": 0.2418745275888133, + "fcm_dpo/beta": 2.3114399909973145, + "fcm_dpo/delta": 0.17240478098392487, + "fcm_dpo/margin": 0.3636714816093445, + "fcm_dpo/q_t": 0.3722858130931854, + "grad_norm": 635.1336669921875, + "learning_rate": 4.7098470178228755e-07, + "logits/chosen": -0.020404599606990814, + "logits/rejected": -0.04949381574988365, + "logps/chosen": -50.52197265625, + "logps/ref_chosen": -49.806915283203125, + "logps/ref_rejected": -68.3370132446289, + "logps/rejected": -69.41574096679688, + "loss": 1.2751, + "margin_dpo/margin_mean": 0.3636714518070221, + "margin_dpo/margin_std": 0.8036404848098755, + "step": 160 + }, + { + "epoch": 0.24338624338624337, + "fcm_dpo/beta": 2.285512924194336, + "fcm_dpo/delta": -0.16357703506946564, + "fcm_dpo/margin": 0.5008035898208618, + "fcm_dpo/q_t": 0.35015690326690674, + "grad_norm": 537.9451293945312, + "learning_rate": 4.703633408618955e-07, + "logits/chosen": 0.10273732244968414, + "logits/rejected": 0.07215458899736404, + "logps/chosen": -53.11811065673828, + "logps/ref_chosen": -52.50048828125, + "logps/ref_rejected": -66.04540252685547, + "logps/rejected": -67.16382598876953, + "loss": 1.1267, + "margin_dpo/margin_mean": 0.5008042454719543, + "margin_dpo/margin_std": 0.8614367246627808, + "step": 161 + }, + { + "epoch": 0.24489795918367346, + "fcm_dpo/beta": 2.08780574798584, + "fcm_dpo/delta": -0.3560563623905182, + "fcm_dpo/margin": 0.6207355260848999, + "fcm_dpo/q_t": 0.2931872010231018, + "grad_norm": 511.5837097167969, + "learning_rate": 4.697358159051549e-07, + "logits/chosen": 0.16846126317977905, + "logits/rejected": 0.12896160781383514, + "logps/chosen": -70.21339416503906, + "logps/ref_chosen": -69.46919250488281, + "logps/ref_rejected": -92.00952911376953, + "logps/rejected": -93.37446594238281, + "loss": 0.9736, + "margin_dpo/margin_mean": 0.6207360029220581, + "margin_dpo/margin_std": 0.8480439186096191, + "step": 162 + }, + { + "epoch": 0.24640967498110355, + "fcm_dpo/beta": 2.0367884635925293, + "fcm_dpo/delta": -0.2981437146663666, + "fcm_dpo/margin": 0.618397057056427, + "fcm_dpo/q_t": 0.29892927408218384, + "grad_norm": 515.961181640625, + "learning_rate": 4.691021444652876e-07, + "logits/chosen": 0.10065104067325592, + "logits/rejected": 0.06360255181789398, + "logps/chosen": -51.2899055480957, + "logps/ref_chosen": -50.613834381103516, + "logps/ref_rejected": -74.62033081054688, + "logps/rejected": -75.914794921875, + "loss": 0.9615, + "margin_dpo/margin_mean": 0.6183971762657166, + "margin_dpo/margin_std": 0.8240780830383301, + "step": 163 + }, + { + "epoch": 0.24792139077853365, + "fcm_dpo/beta": 1.9806370735168457, + "fcm_dpo/delta": -0.047169312834739685, + "fcm_dpo/margin": 0.5249905586242676, + "fcm_dpo/q_t": 0.32894620299339294, + "grad_norm": 464.32489013671875, + "learning_rate": 4.6846234426744624e-07, + "logits/chosen": 0.08540582656860352, + "logits/rejected": 0.03592706099152565, + "logps/chosen": -55.629249572753906, + "logps/ref_chosen": -54.848114013671875, + "logps/ref_rejected": -79.0630111694336, + "logps/rejected": -80.369140625, + "loss": 1.0554, + "margin_dpo/margin_mean": 0.524990439414978, + "margin_dpo/margin_std": 0.7865326404571533, + "step": 164 + }, + { + "epoch": 0.2494331065759637, + "fcm_dpo/beta": 1.966191291809082, + "fcm_dpo/delta": -0.0075155869126319885, + "fcm_dpo/margin": 0.5113043785095215, + "fcm_dpo/q_t": 0.3119150400161743, + "grad_norm": 391.06072998046875, + "learning_rate": 4.678164332082175e-07, + "logits/chosen": 0.1477801650762558, + "logits/rejected": 0.10125482082366943, + "logps/chosen": -51.86711883544922, + "logps/ref_chosen": -51.089210510253906, + "logps/ref_rejected": -71.23370361328125, + "logps/rejected": -72.52291870117188, + "loss": 0.9392, + "margin_dpo/margin_mean": 0.5113040804862976, + "margin_dpo/margin_std": 0.6569217443466187, + "step": 165 + }, + { + "epoch": 0.2509448223733938, + "fcm_dpo/beta": 2.0378761291503906, + "fcm_dpo/delta": 0.31097179651260376, + "fcm_dpo/margin": 0.34910979866981506, + "fcm_dpo/q_t": 0.38307300209999084, + "grad_norm": 556.363037109375, + "learning_rate": 4.6716442935512214e-07, + "logits/chosen": 0.10038108378648758, + "logits/rejected": 0.036018554121255875, + "logps/chosen": -63.89585876464844, + "logps/ref_chosen": -63.19081115722656, + "logps/ref_rejected": -93.8402099609375, + "logps/rejected": -94.89436340332031, + "loss": 1.2454, + "margin_dpo/margin_mean": 0.3491097092628479, + "margin_dpo/margin_std": 0.7657175064086914, + "step": 166 + }, + { + "epoch": 0.25245653817082386, + "fcm_dpo/beta": 2.0208253860473633, + "fcm_dpo/delta": -0.17621225118637085, + "fcm_dpo/margin": 0.5719941854476929, + "fcm_dpo/q_t": 0.2979215979576111, + "grad_norm": 404.3883056640625, + "learning_rate": 4.6650635094610966e-07, + "logits/chosen": 0.06507319211959839, + "logits/rejected": 0.03547991067171097, + "logps/chosen": -59.553977966308594, + "logps/ref_chosen": -58.92427062988281, + "logps/ref_rejected": -72.97377014160156, + "logps/rejected": -74.17547607421875, + "loss": 0.8796, + "margin_dpo/margin_mean": 0.5719939470291138, + "margin_dpo/margin_std": 0.6960855722427368, + "step": 167 + }, + { + "epoch": 0.25396825396825395, + "fcm_dpo/beta": 2.060161828994751, + "fcm_dpo/delta": 0.1698514223098755, + "fcm_dpo/margin": 0.4094662666320801, + "fcm_dpo/q_t": 0.3545387387275696, + "grad_norm": 583.7806396484375, + "learning_rate": 4.6584221638904767e-07, + "logits/chosen": 0.07120160013437271, + "logits/rejected": 0.04992123693227768, + "logps/chosen": -66.4698486328125, + "logps/ref_chosen": -65.65138244628906, + "logps/ref_rejected": -79.71418762207031, + "logps/rejected": -80.94212341308594, + "loss": 1.1174, + "margin_dpo/margin_mean": 0.40946611762046814, + "margin_dpo/margin_std": 0.7269895076751709, + "step": 168 + }, + { + "epoch": 0.25547996976568405, + "fcm_dpo/beta": 2.0494308471679688, + "fcm_dpo/delta": -0.08093604445457458, + "fcm_dpo/margin": 0.5232309699058533, + "fcm_dpo/q_t": 0.34376293420791626, + "grad_norm": 552.8328857421875, + "learning_rate": 4.651720442612075e-07, + "logits/chosen": 0.15064923465251923, + "logits/rejected": 0.12327395379543304, + "logps/chosen": -62.13090515136719, + "logps/ref_chosen": -61.425865173339844, + "logps/ref_rejected": -76.09590148925781, + "logps/rejected": -77.32416534423828, + "loss": 1.0984, + "margin_dpo/margin_mean": 0.523231029510498, + "margin_dpo/margin_std": 0.913813591003418, + "step": 169 + }, + { + "epoch": 0.25699168556311414, + "fcm_dpo/beta": 2.1044416427612305, + "fcm_dpo/delta": 0.22101661562919617, + "fcm_dpo/margin": 0.3785492181777954, + "fcm_dpo/q_t": 0.3640963137149811, + "grad_norm": 517.9861450195312, + "learning_rate": 4.6449585330874425e-07, + "logits/chosen": 0.05246744677424431, + "logits/rejected": 0.051534149795770645, + "logps/chosen": -57.42051696777344, + "logps/ref_chosen": -56.65319061279297, + "logps/ref_rejected": -63.45965576171875, + "logps/rejected": -64.60552215576172, + "loss": 1.2398, + "margin_dpo/margin_mean": 0.3785494565963745, + "margin_dpo/margin_std": 0.7789652943611145, + "step": 170 + }, + { + "epoch": 0.2585034013605442, + "fcm_dpo/beta": 2.0837273597717285, + "fcm_dpo/delta": -0.041708558797836304, + "fcm_dpo/margin": 0.49539345502853394, + "fcm_dpo/q_t": 0.31717920303344727, + "grad_norm": 528.3797607421875, + "learning_rate": 4.6381366244617224e-07, + "logits/chosen": 0.12783187627792358, + "logits/rejected": 0.08980339765548706, + "logps/chosen": -64.4964599609375, + "logps/ref_chosen": -63.73476028442383, + "logps/ref_rejected": -78.50328063964844, + "logps/rejected": -79.76036834716797, + "loss": 1.0854, + "margin_dpo/margin_mean": 0.49539363384246826, + "margin_dpo/margin_std": 0.778314471244812, + "step": 171 + }, + { + "epoch": 0.2600151171579743, + "fcm_dpo/beta": 2.0559816360473633, + "fcm_dpo/delta": -0.2410603016614914, + "fcm_dpo/margin": 0.589635968208313, + "fcm_dpo/q_t": 0.2946144938468933, + "grad_norm": 428.92987060546875, + "learning_rate": 4.631254907558365e-07, + "logits/chosen": 0.17183159291744232, + "logits/rejected": 0.12959660589694977, + "logps/chosen": -52.974788665771484, + "logps/ref_chosen": -52.201759338378906, + "logps/ref_rejected": -82.85285949707031, + "logps/rejected": -84.21553039550781, + "loss": 0.949, + "margin_dpo/margin_mean": 0.5896360874176025, + "margin_dpo/margin_std": 0.7388289570808411, + "step": 172 + }, + { + "epoch": 0.2615268329554044, + "fcm_dpo/beta": 1.8781511783599854, + "fcm_dpo/delta": -0.25002074241638184, + "fcm_dpo/margin": 0.6354281902313232, + "fcm_dpo/q_t": 0.32253411412239075, + "grad_norm": 381.1091613769531, + "learning_rate": 4.624313574873786e-07, + "logits/chosen": 0.14371845126152039, + "logits/rejected": 0.07756569981575012, + "logps/chosen": -56.25102233886719, + "logps/ref_chosen": -55.434722900390625, + "logps/ref_rejected": -77.81967163085938, + "logps/rejected": -79.27140045166016, + "loss": 0.9918, + "margin_dpo/margin_mean": 0.6354283094406128, + "margin_dpo/margin_std": 0.9134526252746582, + "step": 173 + }, + { + "epoch": 0.26303854875283444, + "fcm_dpo/beta": 1.8751147985458374, + "fcm_dpo/delta": -0.21836894750595093, + "fcm_dpo/margin": 0.6357536315917969, + "fcm_dpo/q_t": 0.3092048168182373, + "grad_norm": 498.1719665527344, + "learning_rate": 4.61731282057198e-07, + "logits/chosen": 0.12083408981561661, + "logits/rejected": 0.07103556394577026, + "logps/chosen": -57.99762725830078, + "logps/ref_chosen": -57.17195129394531, + "logps/ref_rejected": -85.47578430175781, + "logps/rejected": -86.93720245361328, + "loss": 1.0066, + "margin_dpo/margin_mean": 0.6357530355453491, + "margin_dpo/margin_std": 0.8986474275588989, + "step": 174 + }, + { + "epoch": 0.26455026455026454, + "fcm_dpo/beta": 1.770094394683838, + "fcm_dpo/delta": -0.23501265048980713, + "fcm_dpo/margin": 0.6816864013671875, + "fcm_dpo/q_t": 0.30375754833221436, + "grad_norm": 463.9217529296875, + "learning_rate": 4.6102528404790965e-07, + "logits/chosen": 0.18367326259613037, + "logits/rejected": 0.15908128023147583, + "logps/chosen": -68.52366638183594, + "logps/ref_chosen": -67.6656265258789, + "logps/ref_rejected": -84.36766815185547, + "logps/rejected": -85.90739440917969, + "loss": 0.9656, + "margin_dpo/margin_mean": 0.6816866397857666, + "margin_dpo/margin_std": 0.915657639503479, + "step": 175 + }, + { + "epoch": 0.2660619803476946, + "fcm_dpo/beta": 1.7365822792053223, + "fcm_dpo/delta": 0.01569700986146927, + "fcm_dpo/margin": 0.5668948888778687, + "fcm_dpo/q_t": 0.37061506509780884, + "grad_norm": 517.9691162109375, + "learning_rate": 4.603133832077953e-07, + "logits/chosen": 0.08695434033870697, + "logits/rejected": 0.06711474061012268, + "logps/chosen": -78.7764892578125, + "logps/ref_chosen": -77.8587646484375, + "logps/ref_rejected": -81.08732604980469, + "logps/rejected": -82.57195281982422, + "loss": 1.2356, + "margin_dpo/margin_mean": 0.5668948292732239, + "margin_dpo/margin_std": 1.1584200859069824, + "step": 176 + }, + { + "epoch": 0.2675736961451247, + "fcm_dpo/beta": 1.6431140899658203, + "fcm_dpo/delta": -0.37270694971084595, + "fcm_dpo/margin": 0.8022236227989197, + "fcm_dpo/q_t": 0.2722761631011963, + "grad_norm": 448.8039245605469, + "learning_rate": 4.5959559945025183e-07, + "logits/chosen": 0.24163630604743958, + "logits/rejected": 0.1636749804019928, + "logps/chosen": -56.08782958984375, + "logps/ref_chosen": -55.22039794921875, + "logps/ref_rejected": -92.54973602294922, + "logps/rejected": -94.21939086914062, + "loss": 0.8902, + "margin_dpo/margin_mean": 0.8022229671478271, + "margin_dpo/margin_std": 1.0025627613067627, + "step": 177 + }, + { + "epoch": 0.2690854119425548, + "fcm_dpo/beta": 1.6737594604492188, + "fcm_dpo/delta": 0.21938863396644592, + "fcm_dpo/margin": 0.47716161608695984, + "fcm_dpo/q_t": 0.3556697368621826, + "grad_norm": 458.8934020996094, + "learning_rate": 4.588719528532341e-07, + "logits/chosen": 0.08861234784126282, + "logits/rejected": 0.05083230137825012, + "logps/chosen": -61.76763916015625, + "logps/ref_chosen": -60.81049346923828, + "logps/ref_rejected": -81.12973022460938, + "logps/rejected": -82.56403350830078, + "loss": 1.0621, + "margin_dpo/margin_mean": 0.47716209292411804, + "margin_dpo/margin_std": 0.7696354985237122, + "step": 178 + }, + { + "epoch": 0.2705971277399849, + "fcm_dpo/beta": 1.7257657051086426, + "fcm_dpo/delta": 0.10213658213615417, + "fcm_dpo/margin": 0.5256574153900146, + "fcm_dpo/q_t": 0.35149049758911133, + "grad_norm": 419.9453125, + "learning_rate": 4.581424636586928e-07, + "logits/chosen": 0.16226297616958618, + "logits/rejected": 0.1488857865333557, + "logps/chosen": -66.7244644165039, + "logps/ref_chosen": -65.67171478271484, + "logps/ref_rejected": -75.32586669921875, + "logps/rejected": -76.9042739868164, + "loss": 1.0632, + "margin_dpo/margin_mean": 0.5256578922271729, + "margin_dpo/margin_std": 0.8762655258178711, + "step": 179 + }, + { + "epoch": 0.272108843537415, + "fcm_dpo/beta": 1.8206181526184082, + "fcm_dpo/delta": 0.2291109263896942, + "fcm_dpo/margin": 0.42961639165878296, + "fcm_dpo/q_t": 0.36887508630752563, + "grad_norm": 520.1433715820312, + "learning_rate": 4.5740715227200897e-07, + "logits/chosen": -0.0028491299599409103, + "logits/rejected": -0.017982792109251022, + "logps/chosen": -57.53323745727539, + "logps/ref_chosen": -56.68280792236328, + "logps/ref_rejected": -64.94414520263672, + "logps/rejected": -66.22418975830078, + "loss": 1.2665, + "margin_dpo/margin_mean": 0.4296168386936188, + "margin_dpo/margin_std": 0.9101868867874146, + "step": 180 + }, + { + "epoch": 0.273620559334845, + "fcm_dpo/beta": 1.7677171230316162, + "fcm_dpo/delta": -0.24141666293144226, + "fcm_dpo/margin": 0.6860055923461914, + "fcm_dpo/q_t": 0.2977555990219116, + "grad_norm": 448.171630859375, + "learning_rate": 4.566660392614228e-07, + "logits/chosen": 0.13813161849975586, + "logits/rejected": 0.10955438017845154, + "logps/chosen": -61.58479309082031, + "logps/ref_chosen": -60.77604675292969, + "logps/ref_rejected": -83.98361206054688, + "logps/rejected": -85.47836303710938, + "loss": 0.9069, + "margin_dpo/margin_mean": 0.686005711555481, + "margin_dpo/margin_std": 0.8478412628173828, + "step": 181 + }, + { + "epoch": 0.2751322751322751, + "fcm_dpo/beta": 1.698185920715332, + "fcm_dpo/delta": -0.05800933390855789, + "fcm_dpo/margin": 0.6177934408187866, + "fcm_dpo/q_t": 0.3166268467903137, + "grad_norm": 430.59722900390625, + "learning_rate": 4.5591914535745817e-07, + "logits/chosen": 0.1499433070421219, + "logits/rejected": 0.09003090858459473, + "logps/chosen": -61.18524932861328, + "logps/ref_chosen": -60.2537841796875, + "logps/ref_rejected": -89.7706298828125, + "logps/rejected": -91.31988525390625, + "loss": 1.0536, + "margin_dpo/margin_mean": 0.6177935004234314, + "margin_dpo/margin_std": 0.9228367209434509, + "step": 182 + }, + { + "epoch": 0.2766439909297052, + "fcm_dpo/beta": 1.8208626508712769, + "fcm_dpo/delta": 0.4041329324245453, + "fcm_dpo/margin": 0.34058958292007446, + "fcm_dpo/q_t": 0.3961232900619507, + "grad_norm": 510.5823059082031, + "learning_rate": 4.551664914523433e-07, + "logits/chosen": 0.1353759616613388, + "logits/rejected": 0.12078934907913208, + "logps/chosen": -62.945030212402344, + "logps/ref_chosen": -61.76142120361328, + "logps/ref_rejected": -72.54627990722656, + "logps/rejected": -74.07048034667969, + "loss": 1.3364, + "margin_dpo/margin_mean": 0.3405901789665222, + "margin_dpo/margin_std": 0.8557813763618469, + "step": 183 + }, + { + "epoch": 0.2781557067271353, + "fcm_dpo/beta": 1.8339712619781494, + "fcm_dpo/delta": 0.05328105390071869, + "fcm_dpo/margin": 0.5154128074645996, + "fcm_dpo/q_t": 0.34025606513023376, + "grad_norm": 346.7197265625, + "learning_rate": 4.544080985994258e-07, + "logits/chosen": 0.21997570991516113, + "logits/rejected": 0.1718028038740158, + "logps/chosen": -47.82099151611328, + "logps/ref_chosen": -46.840721130371094, + "logps/ref_rejected": -69.3609390258789, + "logps/rejected": -70.85662841796875, + "loss": 0.989, + "margin_dpo/margin_mean": 0.5154126882553101, + "margin_dpo/margin_std": 0.7232804298400879, + "step": 184 + }, + { + "epoch": 0.2796674225245654, + "fcm_dpo/beta": 1.8281052112579346, + "fcm_dpo/delta": -0.15045057237148285, + "fcm_dpo/margin": 0.6196208000183105, + "fcm_dpo/q_t": 0.321952223777771, + "grad_norm": 413.64068603515625, + "learning_rate": 4.5364398801258394e-07, + "logits/chosen": 0.14673639833927155, + "logits/rejected": 0.10907743126153946, + "logps/chosen": -53.348968505859375, + "logps/ref_chosen": -52.32114028930664, + "logps/ref_rejected": -68.3885726928711, + "logps/rejected": -70.03601837158203, + "loss": 1.0869, + "margin_dpo/margin_mean": 0.6196208000183105, + "margin_dpo/margin_std": 1.0106725692749023, + "step": 185 + }, + { + "epoch": 0.2811791383219955, + "fcm_dpo/beta": 1.8034803867340088, + "fcm_dpo/delta": -0.005721554160118103, + "fcm_dpo/margin": 0.5566083788871765, + "fcm_dpo/q_t": 0.3407011032104492, + "grad_norm": 465.7325744628906, + "learning_rate": 4.5287418106563354e-07, + "logits/chosen": 0.07695234566926956, + "logits/rejected": 0.04599303752183914, + "logps/chosen": -68.40414428710938, + "logps/ref_chosen": -67.42012786865234, + "logps/ref_rejected": -82.50968933105469, + "logps/rejected": -84.0503158569336, + "loss": 1.0608, + "margin_dpo/margin_mean": 0.5566080808639526, + "margin_dpo/margin_std": 0.8762015104293823, + "step": 186 + }, + { + "epoch": 0.28269085411942557, + "fcm_dpo/beta": 1.7329106330871582, + "fcm_dpo/delta": -0.18373380601406097, + "fcm_dpo/margin": 0.6642186641693115, + "fcm_dpo/q_t": 0.32172733545303345, + "grad_norm": 530.1708374023438, + "learning_rate": 4.520986992917297e-07, + "logits/chosen": 0.14111362397670746, + "logits/rejected": 0.09435050934553146, + "logps/chosen": -76.567138671875, + "logps/ref_chosen": -75.52549743652344, + "logps/ref_rejected": -94.76289367675781, + "logps/rejected": -96.46875, + "loss": 1.1758, + "margin_dpo/margin_mean": 0.6642183661460876, + "margin_dpo/margin_std": 1.1894935369491577, + "step": 187 + }, + { + "epoch": 0.2842025699168556, + "fcm_dpo/beta": 1.744195818901062, + "fcm_dpo/delta": 0.005925014615058899, + "fcm_dpo/margin": 0.5699671506881714, + "fcm_dpo/q_t": 0.3193795382976532, + "grad_norm": 516.5187377929688, + "learning_rate": 4.5131756438276466e-07, + "logits/chosen": 0.151597797870636, + "logits/rejected": 0.11677326261997223, + "logps/chosen": -72.44251251220703, + "logps/ref_chosen": -71.52333068847656, + "logps/ref_rejected": -78.29949951171875, + "logps/rejected": -79.78865814208984, + "loss": 1.1632, + "margin_dpo/margin_mean": 0.56996750831604, + "margin_dpo/margin_std": 1.0281386375427246, + "step": 188 + }, + { + "epoch": 0.2857142857142857, + "fcm_dpo/beta": 1.6932382583618164, + "fcm_dpo/delta": -0.15139150619506836, + "fcm_dpo/margin": 0.6669385433197021, + "fcm_dpo/q_t": 0.30612969398498535, + "grad_norm": 391.1927795410156, + "learning_rate": 4.5053079818876096e-07, + "logits/chosen": 0.12143361568450928, + "logits/rejected": 0.12991394102573395, + "logps/chosen": -73.05665588378906, + "logps/ref_chosen": -72.17626953125, + "logps/ref_rejected": -75.26313781738281, + "logps/rejected": -76.81045532226562, + "loss": 0.8928, + "margin_dpo/margin_mean": 0.6669397950172424, + "margin_dpo/margin_std": 0.832381010055542, + "step": 189 + }, + { + "epoch": 0.2872260015117158, + "fcm_dpo/beta": 1.6875749826431274, + "fcm_dpo/delta": -0.09505629539489746, + "fcm_dpo/margin": 0.642721951007843, + "fcm_dpo/q_t": 0.32302048802375793, + "grad_norm": 424.7405090332031, + "learning_rate": 4.4973842271726024e-07, + "logits/chosen": 0.19074919819831848, + "logits/rejected": 0.081771120429039, + "logps/chosen": -55.586639404296875, + "logps/ref_chosen": -54.624271392822266, + "logps/ref_rejected": -101.47068786621094, + "logps/rejected": -103.0757827758789, + "loss": 1.0135, + "margin_dpo/margin_mean": 0.6427220106124878, + "margin_dpo/margin_std": 0.9737996459007263, + "step": 190 + }, + { + "epoch": 0.2887377173091459, + "fcm_dpo/beta": 1.6515135765075684, + "fcm_dpo/delta": -0.04676612466573715, + "fcm_dpo/margin": 0.6305712461471558, + "fcm_dpo/q_t": 0.31939807534217834, + "grad_norm": 493.140625, + "learning_rate": 4.48940460132708e-07, + "logits/chosen": 0.1993117779493332, + "logits/rejected": 0.1801232248544693, + "logps/chosen": -74.03681945800781, + "logps/ref_chosen": -72.93251037597656, + "logps/ref_rejected": -89.95103454589844, + "logps/rejected": -91.68590545654297, + "loss": 1.047, + "margin_dpo/margin_mean": 0.6305709481239319, + "margin_dpo/margin_std": 0.9657796621322632, + "step": 191 + }, + { + "epoch": 0.29024943310657597, + "fcm_dpo/beta": 1.7161282300949097, + "fcm_dpo/delta": 0.19982855021953583, + "fcm_dpo/margin": 0.47440922260284424, + "fcm_dpo/q_t": 0.3627406060695648, + "grad_norm": 372.83734130859375, + "learning_rate": 4.481369327558329e-07, + "logits/chosen": 0.16536489129066467, + "logits/rejected": 0.1462571918964386, + "logps/chosen": -55.00927734375, + "logps/ref_chosen": -54.001121520996094, + "logps/ref_rejected": -63.531551361083984, + "logps/rejected": -65.01411437988281, + "loss": 1.1335, + "margin_dpo/margin_mean": 0.4744090139865875, + "margin_dpo/margin_std": 0.8528145551681519, + "step": 192 + }, + { + "epoch": 0.29176114890400606, + "fcm_dpo/beta": 1.6729130744934082, + "fcm_dpo/delta": -0.11725394427776337, + "fcm_dpo/margin": 0.6579139828681946, + "fcm_dpo/q_t": 0.3185346722602844, + "grad_norm": 344.4507751464844, + "learning_rate": 4.47327863063023e-07, + "logits/chosen": 0.09357620775699615, + "logits/rejected": 0.07541916519403458, + "logps/chosen": -57.7445182800293, + "logps/ref_chosen": -56.74927520751953, + "logps/ref_rejected": -58.80629348754883, + "logps/rejected": -60.459449768066406, + "loss": 0.9244, + "margin_dpo/margin_mean": 0.6579139232635498, + "margin_dpo/margin_std": 0.9124239087104797, + "step": 193 + }, + { + "epoch": 0.29327286470143615, + "fcm_dpo/beta": 1.6954293251037598, + "fcm_dpo/delta": 0.1348000019788742, + "fcm_dpo/margin": 0.5165129899978638, + "fcm_dpo/q_t": 0.36234208941459656, + "grad_norm": 409.556396484375, + "learning_rate": 4.4651327368569684e-07, + "logits/chosen": 0.1781534105539322, + "logits/rejected": 0.15505832433700562, + "logps/chosen": -57.62710189819336, + "logps/ref_chosen": -56.64944076538086, + "logps/ref_rejected": -69.98954772949219, + "logps/rejected": -71.48371887207031, + "loss": 1.1709, + "margin_dpo/margin_mean": 0.5165130496025085, + "margin_dpo/margin_std": 0.9614365100860596, + "step": 194 + }, + { + "epoch": 0.2947845804988662, + "fcm_dpo/beta": 1.7598028182983398, + "fcm_dpo/delta": 0.22711437940597534, + "fcm_dpo/margin": 0.4485671818256378, + "fcm_dpo/q_t": 0.3598003685474396, + "grad_norm": 496.6513977050781, + "learning_rate": 4.4569318740967043e-07, + "logits/chosen": 0.09457789361476898, + "logits/rejected": 0.09252005815505981, + "logps/chosen": -71.65326690673828, + "logps/ref_chosen": -70.40977478027344, + "logps/ref_rejected": -74.39448547363281, + "logps/rejected": -76.08654022216797, + "loss": 1.2491, + "margin_dpo/margin_mean": 0.44856685400009155, + "margin_dpo/margin_std": 0.9060893058776855, + "step": 195 + }, + { + "epoch": 0.2962962962962963, + "fcm_dpo/beta": 1.8433669805526733, + "fcm_dpo/delta": 0.033393874764442444, + "fcm_dpo/margin": 0.5237653255462646, + "fcm_dpo/q_t": 0.34850770235061646, + "grad_norm": 410.0362854003906, + "learning_rate": 4.448676271745197e-07, + "logits/chosen": 0.1736685037612915, + "logits/rejected": 0.14138346910476685, + "logps/chosen": -60.27099609375, + "logps/ref_chosen": -59.227577209472656, + "logps/ref_rejected": -83.54757690429688, + "logps/rejected": -85.11476135253906, + "loss": 1.193, + "margin_dpo/margin_mean": 0.5237653255462646, + "margin_dpo/margin_std": 0.9456428289413452, + "step": 196 + }, + { + "epoch": 0.29780801209372637, + "fcm_dpo/beta": 1.8353400230407715, + "fcm_dpo/delta": -0.15863925218582153, + "fcm_dpo/margin": 0.6141480803489685, + "fcm_dpo/q_t": 0.33761459589004517, + "grad_norm": 520.2391967773438, + "learning_rate": 4.440366160729392e-07, + "logits/chosen": 0.23479902744293213, + "logits/rejected": 0.1985134482383728, + "logps/chosen": -52.59620666503906, + "logps/ref_chosen": -51.52912902832031, + "logps/ref_rejected": -73.70631408691406, + "logps/rejected": -75.38753509521484, + "loss": 1.2601, + "margin_dpo/margin_mean": 0.6141484975814819, + "margin_dpo/margin_std": 1.1263779401779175, + "step": 197 + }, + { + "epoch": 0.29931972789115646, + "fcm_dpo/beta": 1.6845048666000366, + "fcm_dpo/delta": -0.392536461353302, + "fcm_dpo/margin": 0.796295702457428, + "fcm_dpo/q_t": 0.2878139615058899, + "grad_norm": 392.1756591796875, + "learning_rate": 4.432001773500957e-07, + "logits/chosen": 0.19247442483901978, + "logits/rejected": 0.16175703704357147, + "logps/chosen": -60.77484893798828, + "logps/ref_chosen": -59.78268051147461, + "logps/ref_rejected": -72.24533081054688, + "logps/rejected": -74.03379821777344, + "loss": 0.8946, + "margin_dpo/margin_mean": 0.796296238899231, + "margin_dpo/margin_std": 1.0002985000610352, + "step": 198 + }, + { + "epoch": 0.30083144368858655, + "fcm_dpo/beta": 1.6466844081878662, + "fcm_dpo/delta": 0.005560420453548431, + "fcm_dpo/margin": 0.6032355427742004, + "fcm_dpo/q_t": 0.3430374562740326, + "grad_norm": 385.1544494628906, + "learning_rate": 4.4235833440297856e-07, + "logits/chosen": 0.13208839297294617, + "logits/rejected": 0.0626702532172203, + "logps/chosen": -57.460811614990234, + "logps/ref_chosen": -56.38677215576172, + "logps/ref_rejected": -74.56779479980469, + "logps/rejected": -76.24507141113281, + "loss": 1.0833, + "margin_dpo/margin_mean": 0.6032348275184631, + "margin_dpo/margin_std": 0.970983624458313, + "step": 199 + }, + { + "epoch": 0.30234315948601664, + "fcm_dpo/beta": 1.5712354183197021, + "fcm_dpo/delta": -0.1618885099887848, + "fcm_dpo/margin": 0.7240477800369263, + "fcm_dpo/q_t": 0.327957421541214, + "grad_norm": 412.8072204589844, + "learning_rate": 4.415111107797445e-07, + "logits/chosen": 0.1844407021999359, + "logits/rejected": 0.12967121601104736, + "logps/chosen": -58.72947692871094, + "logps/ref_chosen": -57.82432556152344, + "logps/ref_rejected": -89.28246307373047, + "logps/rejected": -90.91166687011719, + "loss": 1.0309, + "margin_dpo/margin_mean": 0.7240477800369263, + "margin_dpo/margin_std": 1.1023731231689453, + "step": 200 + }, + { + "epoch": 0.30234315948601664, + "eval_fcm_dpo/beta": 1.5800285339355469, + "eval_logits/chosen": 0.16845357418060303, + "eval_logits/rejected": 0.13373498618602753, + "eval_logps/chosen": -75.8633041381836, + "eval_logps/ref_chosen": -74.85946655273438, + "eval_logps/ref_rejected": -79.54898834228516, + "eval_logps/rejected": -81.05730438232422, + "eval_loss": 0.6114334464073181, + "eval_margin_dpo/margin_mean": 0.5044752359390259, + "eval_margin_dpo/margin_std": 1.028841495513916, + "eval_runtime": 38.0264, + "eval_samples_per_second": 60.563, + "eval_steps_per_second": 1.893, + "step": 200 + }, + { + "epoch": 0.30385487528344673, + "fcm_dpo/beta": 1.5763221979141235, + "fcm_dpo/delta": 0.00995655357837677, + "fcm_dpo/margin": 0.6285428404808044, + "fcm_dpo/q_t": 0.33469393849372864, + "grad_norm": 368.6453552246094, + "learning_rate": 4.4065853017905953e-07, + "logits/chosen": 0.20987267792224884, + "logits/rejected": 0.17573854327201843, + "logps/chosen": -60.036251068115234, + "logps/ref_chosen": -58.999759674072266, + "logps/ref_rejected": -84.67575073242188, + "logps/rejected": -86.34077453613281, + "loss": 0.9778, + "margin_dpo/margin_mean": 0.6285424828529358, + "margin_dpo/margin_std": 0.8790519833564758, + "step": 201 + }, + { + "epoch": 0.30536659108087677, + "fcm_dpo/beta": 1.5742418766021729, + "fcm_dpo/delta": -0.20220552384853363, + "fcm_dpo/margin": 0.7447031736373901, + "fcm_dpo/q_t": 0.306610643863678, + "grad_norm": 324.744384765625, + "learning_rate": 4.3980061644943575e-07, + "logits/chosen": 0.08034056425094604, + "logits/rejected": 0.031506434082984924, + "logps/chosen": -48.6182861328125, + "logps/ref_chosen": -47.660648345947266, + "logps/ref_rejected": -73.63249969482422, + "logps/rejected": -75.3348388671875, + "loss": 0.918, + "margin_dpo/margin_mean": 0.7447031736373901, + "margin_dpo/margin_std": 0.9296808242797852, + "step": 202 + }, + { + "epoch": 0.30687830687830686, + "fcm_dpo/beta": 1.5995709896087646, + "fcm_dpo/delta": 0.29017671942710876, + "fcm_dpo/margin": 0.45493584871292114, + "fcm_dpo/q_t": 0.3810551166534424, + "grad_norm": 469.91229248046875, + "learning_rate": 4.3893739358856455e-07, + "logits/chosen": 0.19242677092552185, + "logits/rejected": 0.14212460815906525, + "logps/chosen": -63.31700134277344, + "logps/ref_chosen": -62.32553482055664, + "logps/ref_rejected": -99.37226104736328, + "logps/rejected": -100.81866455078125, + "loss": 1.2402, + "margin_dpo/margin_mean": 0.454935759305954, + "margin_dpo/margin_std": 0.9751724004745483, + "step": 203 + }, + { + "epoch": 0.30839002267573695, + "fcm_dpo/beta": 1.5674870014190674, + "fcm_dpo/delta": -0.029434487223625183, + "fcm_dpo/margin": 0.6477770209312439, + "fcm_dpo/q_t": 0.3433707654476166, + "grad_norm": 368.2581787109375, + "learning_rate": 4.380688857426449e-07, + "logits/chosen": 0.0801263153553009, + "logits/rejected": 0.030903467908501625, + "logps/chosen": -51.68293762207031, + "logps/ref_chosen": -50.62931823730469, + "logps/ref_rejected": -66.60475158691406, + "logps/rejected": -68.30615234375, + "loss": 1.1195, + "margin_dpo/margin_mean": 0.6477770805358887, + "margin_dpo/margin_std": 1.0862679481506348, + "step": 204 + }, + { + "epoch": 0.30990173847316704, + "fcm_dpo/beta": 1.653544306755066, + "fcm_dpo/delta": 0.27683955430984497, + "fcm_dpo/margin": 0.450222909450531, + "fcm_dpo/q_t": 0.3771224319934845, + "grad_norm": 536.0993041992188, + "learning_rate": 4.3719511720570814e-07, + "logits/chosen": 0.16879329085350037, + "logits/rejected": 0.12696264684200287, + "logps/chosen": -71.54264831542969, + "logps/ref_chosen": -70.3561782836914, + "logps/ref_rejected": -93.39848327636719, + "logps/rejected": -95.03517150878906, + "loss": 1.3927, + "margin_dpo/margin_mean": 0.4502222239971161, + "margin_dpo/margin_std": 1.1024572849273682, + "step": 205 + }, + { + "epoch": 0.31141345427059713, + "fcm_dpo/beta": 1.7623982429504395, + "fcm_dpo/delta": 0.12155643105506897, + "fcm_dpo/margin": 0.49780306220054626, + "fcm_dpo/q_t": 0.3561771512031555, + "grad_norm": 486.98162841796875, + "learning_rate": 4.363161124189387e-07, + "logits/chosen": 0.18706491589546204, + "logits/rejected": 0.17341138422489166, + "logps/chosen": -68.82878112792969, + "logps/ref_chosen": -67.64547729492188, + "logps/ref_rejected": -79.89584350585938, + "logps/rejected": -81.57694244384766, + "loss": 1.2778, + "margin_dpo/margin_mean": 0.497803270816803, + "margin_dpo/margin_std": 0.9891307353973389, + "step": 206 + }, + { + "epoch": 0.3129251700680272, + "fcm_dpo/beta": 1.7180607318878174, + "fcm_dpo/delta": 0.0706307590007782, + "fcm_dpo/margin": 0.5409280061721802, + "fcm_dpo/q_t": 0.35449251532554626, + "grad_norm": 428.377685546875, + "learning_rate": 4.3543189596998986e-07, + "logits/chosen": 0.10714876651763916, + "logits/rejected": 0.05734197795391083, + "logps/chosen": -68.83715057373047, + "logps/ref_chosen": -67.66419219970703, + "logps/ref_rejected": -85.10249328613281, + "logps/rejected": -86.81637573242188, + "loss": 1.1493, + "margin_dpo/margin_mean": 0.5409282445907593, + "margin_dpo/margin_std": 1.005875825881958, + "step": 207 + }, + { + "epoch": 0.3144368858654573, + "fcm_dpo/beta": 1.7887952327728271, + "fcm_dpo/delta": 0.09666138887405396, + "fcm_dpo/margin": 0.509753942489624, + "fcm_dpo/q_t": 0.3555990755558014, + "grad_norm": 445.3408508300781, + "learning_rate": 4.3454249259229664e-07, + "logits/chosen": 0.13969993591308594, + "logits/rejected": 0.11949601769447327, + "logps/chosen": -58.69970703125, + "logps/ref_chosen": -57.731712341308594, + "logps/ref_rejected": -74.19276428222656, + "logps/rejected": -75.6705093383789, + "loss": 1.2387, + "margin_dpo/margin_mean": 0.509753406047821, + "margin_dpo/margin_std": 0.9776418209075928, + "step": 208 + }, + { + "epoch": 0.31594860166288735, + "fcm_dpo/beta": 1.7703063488006592, + "fcm_dpo/delta": -0.3099019527435303, + "fcm_dpo/margin": 0.7123583555221558, + "fcm_dpo/q_t": 0.3118298351764679, + "grad_norm": 475.9559020996094, + "learning_rate": 4.336479271643833e-07, + "logits/chosen": 0.08468753099441528, + "logits/rejected": 0.04272126033902168, + "logps/chosen": -69.55844116210938, + "logps/ref_chosen": -68.55007934570312, + "logps/ref_rejected": -87.90541076660156, + "logps/rejected": -89.62612915039062, + "loss": 1.059, + "margin_dpo/margin_mean": 0.7123589515686035, + "margin_dpo/margin_std": 1.0521396398544312, + "step": 209 + }, + { + "epoch": 0.31746031746031744, + "fcm_dpo/beta": 1.604590892791748, + "fcm_dpo/delta": -0.3447558283805847, + "fcm_dpo/margin": 0.8102937340736389, + "fcm_dpo/q_t": 0.31004101037979126, + "grad_norm": 367.3415832519531, + "learning_rate": 4.327482247091679e-07, + "logits/chosen": 0.17499999701976776, + "logits/rejected": 0.10814614593982697, + "logps/chosen": -58.340545654296875, + "logps/ref_chosen": -57.268272399902344, + "logps/ref_rejected": -85.72807312011719, + "logps/rejected": -87.61064147949219, + "loss": 0.9243, + "margin_dpo/margin_mean": 0.8102930784225464, + "margin_dpo/margin_std": 1.078457236289978, + "step": 210 + }, + { + "epoch": 0.31897203325774753, + "fcm_dpo/beta": 1.5751144886016846, + "fcm_dpo/delta": -0.008975658565759659, + "fcm_dpo/margin": 0.6399465799331665, + "fcm_dpo/q_t": 0.33030977845191956, + "grad_norm": 443.0260009765625, + "learning_rate": 4.3184341039326217e-07, + "logits/chosen": 0.1592363566160202, + "logits/rejected": 0.09597043693065643, + "logps/chosen": -54.56357192993164, + "logps/ref_chosen": -53.640708923339844, + "logps/ref_rejected": -93.0387954711914, + "logps/rejected": -94.60160827636719, + "loss": 0.9671, + "margin_dpo/margin_mean": 0.6399465799331665, + "margin_dpo/margin_std": 0.9168812036514282, + "step": 211 + }, + { + "epoch": 0.3204837490551776, + "fcm_dpo/beta": 1.564136028289795, + "fcm_dpo/delta": -0.06847534328699112, + "fcm_dpo/margin": 0.678282618522644, + "fcm_dpo/q_t": 0.3206981420516968, + "grad_norm": 370.3670349121094, + "learning_rate": 4.309335095262675e-07, + "logits/chosen": 0.17273937165737152, + "logits/rejected": 0.11869757622480392, + "logps/chosen": -58.41630554199219, + "logps/ref_chosen": -57.36674499511719, + "logps/ref_rejected": -79.89643096923828, + "logps/rejected": -81.62427520751953, + "loss": 1.027, + "margin_dpo/margin_mean": 0.678282618522644, + "margin_dpo/margin_std": 1.0015318393707275, + "step": 212 + }, + { + "epoch": 0.3219954648526077, + "fcm_dpo/beta": 1.527937889099121, + "fcm_dpo/delta": -0.042714398354291916, + "fcm_dpo/margin": 0.6786133050918579, + "fcm_dpo/q_t": 0.3305957615375519, + "grad_norm": 391.22064208984375, + "learning_rate": 4.3001854756006724e-07, + "logits/chosen": 0.15932638943195343, + "logits/rejected": 0.138889878988266, + "logps/chosen": -66.10162353515625, + "logps/ref_chosen": -65.22111511230469, + "logps/ref_rejected": -80.1810302734375, + "logps/rejected": -81.74015045166016, + "loss": 1.0605, + "margin_dpo/margin_mean": 0.6786131858825684, + "margin_dpo/margin_std": 1.066502332687378, + "step": 213 + }, + { + "epoch": 0.3235071806500378, + "fcm_dpo/beta": 1.542710781097412, + "fcm_dpo/delta": -0.057289645075798035, + "fcm_dpo/margin": 0.6799356937408447, + "fcm_dpo/q_t": 0.33246564865112305, + "grad_norm": 433.1566467285156, + "learning_rate": 4.290985500881143e-07, + "logits/chosen": 0.06008949503302574, + "logits/rejected": 0.0402056947350502, + "logps/chosen": -62.26348114013672, + "logps/ref_chosen": -61.292327880859375, + "logps/ref_rejected": -67.69841003417969, + "logps/rejected": -69.3494873046875, + "loss": 1.0116, + "margin_dpo/margin_mean": 0.679936408996582, + "margin_dpo/margin_std": 1.0450165271759033, + "step": 214 + }, + { + "epoch": 0.3250188964474679, + "fcm_dpo/beta": 1.5210988521575928, + "fcm_dpo/delta": -0.0631207600235939, + "fcm_dpo/margin": 0.6934947371482849, + "fcm_dpo/q_t": 0.33989161252975464, + "grad_norm": 409.9744567871094, + "learning_rate": 4.281735428447157e-07, + "logits/chosen": 0.08377814292907715, + "logits/rejected": 0.010123915039002895, + "logps/chosen": -64.82623291015625, + "logps/ref_chosen": -63.869136810302734, + "logps/ref_rejected": -98.7657241821289, + "logps/rejected": -100.41632080078125, + "loss": 1.0766, + "margin_dpo/margin_mean": 0.6934951543807983, + "margin_dpo/margin_std": 1.1064403057098389, + "step": 215 + }, + { + "epoch": 0.32653061224489793, + "fcm_dpo/beta": 1.4754486083984375, + "fcm_dpo/delta": -0.2396281659603119, + "fcm_dpo/margin": 0.8188655972480774, + "fcm_dpo/q_t": 0.34051093459129333, + "grad_norm": 398.4416198730469, + "learning_rate": 4.2724355170431247e-07, + "logits/chosen": 0.16970112919807434, + "logits/rejected": 0.10684916377067566, + "logps/chosen": -68.89656829833984, + "logps/ref_chosen": -67.824951171875, + "logps/ref_rejected": -96.40231323242188, + "logps/rejected": -98.29280090332031, + "loss": 1.1031, + "margin_dpo/margin_mean": 0.8188657760620117, + "margin_dpo/margin_std": 1.4882557392120361, + "step": 216 + }, + { + "epoch": 0.328042328042328, + "fcm_dpo/beta": 1.344929575920105, + "fcm_dpo/delta": -0.35066401958465576, + "fcm_dpo/margin": 0.9668929576873779, + "fcm_dpo/q_t": 0.28799083828926086, + "grad_norm": 300.48565673828125, + "learning_rate": 4.26308602680756e-07, + "logits/chosen": 0.11938208341598511, + "logits/rejected": 0.03593681752681732, + "logps/chosen": -61.565162658691406, + "logps/ref_chosen": -60.5049934387207, + "logps/ref_rejected": -84.26618194580078, + "logps/rejected": -86.29324340820312, + "loss": 0.8155, + "margin_dpo/margin_mean": 0.966893196105957, + "margin_dpo/margin_std": 1.1881346702575684, + "step": 217 + }, + { + "epoch": 0.3295540438397581, + "fcm_dpo/beta": 1.4142457246780396, + "fcm_dpo/delta": 0.2700718343257904, + "fcm_dpo/margin": 0.5216431617736816, + "fcm_dpo/q_t": 0.3781411647796631, + "grad_norm": 394.3163757324219, + "learning_rate": 4.253687219265803e-07, + "logits/chosen": 0.024805322289466858, + "logits/rejected": 0.01834661327302456, + "logps/chosen": -71.73900604248047, + "logps/ref_chosen": -70.59431457519531, + "logps/ref_rejected": -73.89038848876953, + "logps/rejected": -75.55671691894531, + "loss": 1.2642, + "margin_dpo/margin_mean": 0.5216437578201294, + "margin_dpo/margin_std": 1.1325141191482544, + "step": 218 + }, + { + "epoch": 0.3310657596371882, + "fcm_dpo/beta": 1.4622104167938232, + "fcm_dpo/delta": 0.27110588550567627, + "fcm_dpo/margin": 0.5126116275787354, + "fcm_dpo/q_t": 0.36937472224235535, + "grad_norm": 361.056884765625, + "learning_rate": 4.2442393573227043e-07, + "logits/chosen": 0.08768868446350098, + "logits/rejected": 0.05643084645271301, + "logps/chosen": -61.44779586791992, + "logps/ref_chosen": -60.490943908691406, + "logps/ref_rejected": -75.85001373291016, + "logps/rejected": -77.31947326660156, + "loss": 1.0874, + "margin_dpo/margin_mean": 0.512610912322998, + "margin_dpo/margin_std": 0.8701074123382568, + "step": 219 + }, + { + "epoch": 0.3325774754346183, + "fcm_dpo/beta": 1.4802911281585693, + "fcm_dpo/delta": 0.026991277933120728, + "fcm_dpo/margin": 0.6585407257080078, + "fcm_dpo/q_t": 0.33476772904396057, + "grad_norm": 283.6431579589844, + "learning_rate": 4.234742705255272e-07, + "logits/chosen": 0.17591653764247894, + "logits/rejected": 0.13129651546478271, + "logps/chosen": -46.09846496582031, + "logps/ref_chosen": -45.013397216796875, + "logps/ref_rejected": -70.49369812011719, + "logps/rejected": -72.23731231689453, + "loss": 0.9934, + "margin_dpo/margin_mean": 0.6585406064987183, + "margin_dpo/margin_std": 0.9634271264076233, + "step": 220 + }, + { + "epoch": 0.3340891912320484, + "fcm_dpo/beta": 1.486309289932251, + "fcm_dpo/delta": -0.12030621618032455, + "fcm_dpo/margin": 0.7435950636863708, + "fcm_dpo/q_t": 0.32344067096710205, + "grad_norm": 338.2693176269531, + "learning_rate": 4.22519752870528e-07, + "logits/chosen": 0.16085302829742432, + "logits/rejected": 0.11432051658630371, + "logps/chosen": -60.045074462890625, + "logps/ref_chosen": -59.09584045410156, + "logps/ref_rejected": -88.64388275146484, + "logps/rejected": -90.33671569824219, + "loss": 0.9562, + "margin_dpo/margin_mean": 0.7435950636863708, + "margin_dpo/margin_std": 1.0672008991241455, + "step": 221 + }, + { + "epoch": 0.3356009070294785, + "fcm_dpo/beta": 1.4272222518920898, + "fcm_dpo/delta": -0.1927495002746582, + "fcm_dpo/margin": 0.8200865983963013, + "fcm_dpo/q_t": 0.2971838712692261, + "grad_norm": 337.138671875, + "learning_rate": 4.2156040946718343e-07, + "logits/chosen": 0.16604246199131012, + "logits/rejected": 0.10780028998851776, + "logps/chosen": -57.0385627746582, + "logps/ref_chosen": -55.9976921081543, + "logps/ref_rejected": -111.94727325439453, + "logps/rejected": -113.8082275390625, + "loss": 0.856, + "margin_dpo/margin_mean": 0.8200874328613281, + "margin_dpo/margin_std": 0.9770439863204956, + "step": 222 + }, + { + "epoch": 0.3371126228269085, + "fcm_dpo/beta": 1.3298912048339844, + "fcm_dpo/delta": -0.2783888578414917, + "fcm_dpo/margin": 0.9309906363487244, + "fcm_dpo/q_t": 0.2817830443382263, + "grad_norm": 263.7952880859375, + "learning_rate": 4.2059626715039065e-07, + "logits/chosen": 0.18012914061546326, + "logits/rejected": 0.13743728399276733, + "logps/chosen": -60.831424713134766, + "logps/ref_chosen": -59.891422271728516, + "logps/ref_rejected": -86.28954315185547, + "logps/rejected": -88.16053771972656, + "loss": 0.7994, + "margin_dpo/margin_mean": 0.9309903383255005, + "margin_dpo/margin_std": 0.9911828637123108, + "step": 223 + }, + { + "epoch": 0.3386243386243386, + "fcm_dpo/beta": 1.3959991931915283, + "fcm_dpo/delta": 0.3523348867893219, + "fcm_dpo/margin": 0.48118603229522705, + "fcm_dpo/q_t": 0.3791411519050598, + "grad_norm": 364.8034362792969, + "learning_rate": 4.1962735288928304e-07, + "logits/chosen": 0.19306407868862152, + "logits/rejected": 0.17875471711158752, + "logps/chosen": -65.12818908691406, + "logps/ref_chosen": -64.04463195800781, + "logps/ref_rejected": -75.05450439453125, + "logps/rejected": -76.61923217773438, + "loss": 1.0923, + "margin_dpo/margin_mean": 0.4811859726905823, + "margin_dpo/margin_std": 0.8308462500572205, + "step": 224 + }, + { + "epoch": 0.3401360544217687, + "fcm_dpo/beta": 1.395388126373291, + "fcm_dpo/delta": -0.05797035992145538, + "fcm_dpo/margin": 0.7516753673553467, + "fcm_dpo/q_t": 0.3332204222679138, + "grad_norm": 468.0265197753906, + "learning_rate": 4.186536937864752e-07, + "logits/chosen": 0.1659896969795227, + "logits/rejected": 0.07978636771440506, + "logps/chosen": -67.14776611328125, + "logps/ref_chosen": -66.0958251953125, + "logps/ref_rejected": -97.68675231933594, + "logps/rejected": -99.49036407470703, + "loss": 1.0067, + "margin_dpo/margin_mean": 0.7516759634017944, + "margin_dpo/margin_std": 1.1150200366973877, + "step": 225 + }, + { + "epoch": 0.3416477702191988, + "fcm_dpo/beta": 1.4121769666671753, + "fcm_dpo/delta": 0.040757764130830765, + "fcm_dpo/margin": 0.6820341348648071, + "fcm_dpo/q_t": 0.3408370018005371, + "grad_norm": 292.9568176269531, + "learning_rate": 4.176753170773052e-07, + "logits/chosen": 0.18655280768871307, + "logits/rejected": 0.15081270039081573, + "logps/chosen": -52.478660583496094, + "logps/ref_chosen": -51.4168701171875, + "logps/ref_rejected": -66.30068969726562, + "logps/rejected": -68.04450988769531, + "loss": 1.0394, + "margin_dpo/margin_mean": 0.6820334792137146, + "margin_dpo/margin_std": 1.0649542808532715, + "step": 226 + }, + { + "epoch": 0.3431594860166289, + "fcm_dpo/beta": 1.4467318058013916, + "fcm_dpo/delta": 0.14849498867988586, + "fcm_dpo/margin": 0.5975882411003113, + "fcm_dpo/q_t": 0.3471581041812897, + "grad_norm": 412.5190734863281, + "learning_rate": 4.166922501290729e-07, + "logits/chosen": 0.20183053612709045, + "logits/rejected": 0.17074134945869446, + "logps/chosen": -59.01850891113281, + "logps/ref_chosen": -57.989776611328125, + "logps/ref_rejected": -75.05464172363281, + "logps/rejected": -76.68096160888672, + "loss": 1.1633, + "margin_dpo/margin_mean": 0.5975878238677979, + "margin_dpo/margin_std": 1.1138098239898682, + "step": 227 + }, + { + "epoch": 0.34467120181405897, + "fcm_dpo/beta": 1.4542537927627563, + "fcm_dpo/delta": -0.03622462600469589, + "fcm_dpo/margin": 0.7099840641021729, + "fcm_dpo/q_t": 0.3378611207008362, + "grad_norm": 351.0310974121094, + "learning_rate": 4.1570452044027405e-07, + "logits/chosen": 0.17106792330741882, + "logits/rejected": 0.11330675333738327, + "logps/chosen": -56.669921875, + "logps/ref_chosen": -55.55936813354492, + "logps/ref_rejected": -77.02364349365234, + "logps/rejected": -78.84417724609375, + "loss": 1.1439, + "margin_dpo/margin_mean": 0.709984540939331, + "margin_dpo/margin_std": 1.241539716720581, + "step": 228 + }, + { + "epoch": 0.34618291761148906, + "fcm_dpo/beta": 1.4833769798278809, + "fcm_dpo/delta": 0.18256068229675293, + "fcm_dpo/margin": 0.5615620613098145, + "fcm_dpo/q_t": 0.3450517952442169, + "grad_norm": 629.395263671875, + "learning_rate": 4.147121556398312e-07, + "logits/chosen": 0.23741164803504944, + "logits/rejected": 0.19696751236915588, + "logps/chosen": -51.810546875, + "logps/ref_chosen": -50.79466247558594, + "logps/ref_rejected": -78.4474105834961, + "logps/rejected": -80.02485656738281, + "loss": 1.0995, + "margin_dpo/margin_mean": 0.5615620613098145, + "margin_dpo/margin_std": 0.9383633732795715, + "step": 229 + }, + { + "epoch": 0.3476946334089191, + "fcm_dpo/beta": 1.5409061908721924, + "fcm_dpo/delta": 0.06196488440036774, + "fcm_dpo/margin": 0.6093405485153198, + "fcm_dpo/q_t": 0.35204917192459106, + "grad_norm": 399.1847839355469, + "learning_rate": 4.137151834863213e-07, + "logits/chosen": 0.1482779085636139, + "logits/rejected": 0.14588508009910583, + "logps/chosen": -57.8225212097168, + "logps/ref_chosen": -56.729225158691406, + "logps/ref_rejected": -62.99180603027344, + "logps/rejected": -64.69444274902344, + "loss": 1.1512, + "margin_dpo/margin_mean": 0.6093416213989258, + "margin_dpo/margin_std": 1.131590485572815, + "step": 230 + }, + { + "epoch": 0.3492063492063492, + "fcm_dpo/beta": 1.4233132600784302, + "fcm_dpo/delta": -0.39863085746765137, + "fcm_dpo/margin": 0.9415854215621948, + "fcm_dpo/q_t": 0.27584952116012573, + "grad_norm": 315.82147216796875, + "learning_rate": 4.1271363186719835e-07, + "logits/chosen": 0.11343254894018173, + "logits/rejected": 0.09590326249599457, + "logps/chosen": -73.67927551269531, + "logps/ref_chosen": -72.59709930419922, + "logps/ref_rejected": -86.2322998046875, + "logps/rejected": -88.25605773925781, + "loss": 0.7891, + "margin_dpo/margin_mean": 0.9415853023529053, + "margin_dpo/margin_std": 1.024916172027588, + "step": 231 + }, + { + "epoch": 0.3507180650037793, + "fcm_dpo/beta": 1.3663169145584106, + "fcm_dpo/delta": -0.1273672878742218, + "fcm_dpo/margin": 0.8124350309371948, + "fcm_dpo/q_t": 0.33132317662239075, + "grad_norm": 345.0347595214844, + "learning_rate": 4.1170752879801436e-07, + "logits/chosen": 0.14546144008636475, + "logits/rejected": 0.12237675487995148, + "logps/chosen": -69.10570526123047, + "logps/ref_chosen": -68.1185302734375, + "logps/ref_rejected": -83.79415893554688, + "logps/rejected": -85.5937728881836, + "loss": 1.0339, + "margin_dpo/margin_mean": 0.812435507774353, + "margin_dpo/margin_std": 1.2407793998718262, + "step": 232 + }, + { + "epoch": 0.35222978080120937, + "fcm_dpo/beta": 1.4025087356567383, + "fcm_dpo/delta": 0.2927402853965759, + "fcm_dpo/margin": 0.5172353386878967, + "fcm_dpo/q_t": 0.3894173204898834, + "grad_norm": 380.82867431640625, + "learning_rate": 4.106969024216348e-07, + "logits/chosen": 0.10903730243444443, + "logits/rejected": 0.06897353380918503, + "logps/chosen": -56.4796142578125, + "logps/ref_chosen": -55.070152282714844, + "logps/ref_rejected": -66.61845397949219, + "logps/rejected": -68.5451431274414, + "loss": 1.212, + "margin_dpo/margin_mean": 0.5172350406646729, + "margin_dpo/margin_std": 1.0877723693847656, + "step": 233 + }, + { + "epoch": 0.35374149659863946, + "fcm_dpo/beta": 1.5282173156738281, + "fcm_dpo/delta": 0.27392610907554626, + "fcm_dpo/margin": 0.48579320311546326, + "fcm_dpo/q_t": 0.3870254158973694, + "grad_norm": 388.5447082519531, + "learning_rate": 4.09681781007452e-07, + "logits/chosen": 0.09753985702991486, + "logits/rejected": 0.08481541275978088, + "logps/chosen": -57.07015609741211, + "logps/ref_chosen": -55.92589569091797, + "logps/ref_rejected": -51.11608123779297, + "logps/rejected": -52.746131896972656, + "loss": 1.2257, + "margin_dpo/margin_mean": 0.48579323291778564, + "margin_dpo/margin_std": 1.005543828010559, + "step": 234 + }, + { + "epoch": 0.35525321239606955, + "fcm_dpo/beta": 1.4440486431121826, + "fcm_dpo/delta": -0.3470792770385742, + "fcm_dpo/margin": 0.8979411721229553, + "fcm_dpo/q_t": 0.2706488370895386, + "grad_norm": 331.3047180175781, + "learning_rate": 4.08662192950594e-07, + "logits/chosen": 0.18871155381202698, + "logits/rejected": 0.17248067259788513, + "logps/chosen": -65.49934387207031, + "logps/ref_chosen": -64.53972625732422, + "logps/ref_rejected": -77.69151306152344, + "logps/rejected": -79.549072265625, + "loss": 0.7827, + "margin_dpo/margin_mean": 0.897940993309021, + "margin_dpo/margin_std": 0.9550020694732666, + "step": 235 + }, + { + "epoch": 0.35676492819349964, + "fcm_dpo/beta": 1.4077339172363281, + "fcm_dpo/delta": -0.08833800256252289, + "fcm_dpo/margin": 0.7657995223999023, + "fcm_dpo/q_t": 0.34863966703414917, + "grad_norm": 387.498291015625, + "learning_rate": 4.076381667711306e-07, + "logits/chosen": 0.10846032202243805, + "logits/rejected": 0.09852010011672974, + "logps/chosen": -72.46954345703125, + "logps/ref_chosen": -71.15473937988281, + "logps/ref_rejected": -84.88541412353516, + "logps/rejected": -86.96601867675781, + "loss": 1.1004, + "margin_dpo/margin_mean": 0.7657992839813232, + "margin_dpo/margin_std": 1.3017526865005493, + "step": 236 + }, + { + "epoch": 0.35827664399092973, + "fcm_dpo/beta": 1.3855292797088623, + "fcm_dpo/delta": -0.0012986212968826294, + "fcm_dpo/margin": 0.7191513776779175, + "fcm_dpo/q_t": 0.3367508053779602, + "grad_norm": 383.1600036621094, + "learning_rate": 4.066097311132753e-07, + "logits/chosen": 0.20629596710205078, + "logits/rejected": 0.19376662373542786, + "logps/chosen": -77.21894836425781, + "logps/ref_chosen": -76.14201354980469, + "logps/ref_rejected": -80.88479614257812, + "logps/rejected": -82.6808853149414, + "loss": 1.1096, + "margin_dpo/margin_mean": 0.7191513180732727, + "margin_dpo/margin_std": 1.1922106742858887, + "step": 237 + }, + { + "epoch": 0.35978835978835977, + "fcm_dpo/beta": 1.488319993019104, + "fcm_dpo/delta": 0.24613891541957855, + "fcm_dpo/margin": 0.5112044811248779, + "fcm_dpo/q_t": 0.3709501624107361, + "grad_norm": 2477.388916015625, + "learning_rate": 4.0557691474458414e-07, + "logits/chosen": 0.13462495803833008, + "logits/rejected": 0.11732495576143265, + "logps/chosen": -70.04851531982422, + "logps/ref_chosen": -68.88484954833984, + "logps/ref_rejected": -75.8946304321289, + "logps/rejected": -77.56949615478516, + "loss": 1.4557, + "margin_dpo/margin_mean": 0.5112046003341675, + "margin_dpo/margin_std": 1.32561194896698, + "step": 238 + }, + { + "epoch": 0.36130007558578986, + "fcm_dpo/beta": 1.4520866870880127, + "fcm_dpo/delta": -0.12432458996772766, + "fcm_dpo/margin": 0.7647981643676758, + "fcm_dpo/q_t": 0.31985604763031006, + "grad_norm": 413.1396789550781, + "learning_rate": 4.045397465551513e-07, + "logits/chosen": 0.19768695533275604, + "logits/rejected": 0.11818195134401321, + "logps/chosen": -58.036109924316406, + "logps/ref_chosen": -56.771827697753906, + "logps/ref_rejected": -116.23050689697266, + "logps/rejected": -118.25959014892578, + "loss": 1.0398, + "margin_dpo/margin_mean": 0.764798641204834, + "margin_dpo/margin_std": 1.1497983932495117, + "step": 239 + }, + { + "epoch": 0.36281179138321995, + "fcm_dpo/beta": 1.4255015850067139, + "fcm_dpo/delta": -0.19121024012565613, + "fcm_dpo/margin": 0.8184474110603333, + "fcm_dpo/q_t": 0.30751824378967285, + "grad_norm": 311.9652404785156, + "learning_rate": 4.0349825555680045e-07, + "logits/chosen": 0.14041496813297272, + "logits/rejected": 0.07365534454584122, + "logps/chosen": -54.64623260498047, + "logps/ref_chosen": -53.35411071777344, + "logps/ref_rejected": -80.12019348144531, + "logps/rejected": -82.23076629638672, + "loss": 0.9905, + "margin_dpo/margin_mean": 0.8184475898742676, + "margin_dpo/margin_std": 1.1477313041687012, + "step": 240 + }, + { + "epoch": 0.36432350718065004, + "fcm_dpo/beta": 1.5252119302749634, + "fcm_dpo/delta": 0.4934987425804138, + "fcm_dpo/margin": 0.336672842502594, + "fcm_dpo/q_t": 0.4018644094467163, + "grad_norm": 527.2469482421875, + "learning_rate": 4.0245247088227377e-07, + "logits/chosen": 0.12555167078971863, + "logits/rejected": 0.09838816523551941, + "logps/chosen": -73.14974975585938, + "logps/ref_chosen": -71.89541625976562, + "logps/ref_rejected": -83.03492736816406, + "logps/rejected": -84.62593078613281, + "loss": 1.4036, + "margin_dpo/margin_mean": 0.33667343854904175, + "margin_dpo/margin_std": 0.9868639707565308, + "step": 241 + }, + { + "epoch": 0.36583522297808013, + "fcm_dpo/beta": 1.443713903427124, + "fcm_dpo/delta": -0.2312009036540985, + "fcm_dpo/margin": 0.8218022584915161, + "fcm_dpo/q_t": 0.31463247537612915, + "grad_norm": 314.4766845703125, + "learning_rate": 4.0140242178441665e-07, + "logits/chosen": 0.1157296746969223, + "logits/rejected": 0.09765278548002243, + "logps/chosen": -58.899356842041016, + "logps/ref_chosen": -57.927433013916016, + "logps/ref_rejected": -67.838623046875, + "logps/rejected": -69.6323471069336, + "loss": 0.9264, + "margin_dpo/margin_mean": 0.8218023180961609, + "margin_dpo/margin_std": 1.0703468322753906, + "step": 242 + }, + { + "epoch": 0.3673469387755102, + "fcm_dpo/beta": 1.4689741134643555, + "fcm_dpo/delta": 0.049783095717430115, + "fcm_dpo/margin": 0.6500656604766846, + "fcm_dpo/q_t": 0.3428027033805847, + "grad_norm": 388.419921875, + "learning_rate": 4.003481376353596e-07, + "logits/chosen": 0.10413776338100433, + "logits/rejected": 0.09902875125408173, + "logps/chosen": -75.4229965209961, + "logps/ref_chosen": -74.27667236328125, + "logps/ref_rejected": -73.24340057373047, + "logps/rejected": -75.03977966308594, + "loss": 1.1186, + "margin_dpo/margin_mean": 0.650065541267395, + "margin_dpo/margin_std": 1.1026396751403809, + "step": 243 + }, + { + "epoch": 0.3688586545729403, + "fcm_dpo/beta": 1.4141755104064941, + "fcm_dpo/delta": -0.2628709375858307, + "fcm_dpo/margin": 0.8689752817153931, + "fcm_dpo/q_t": 0.28472983837127686, + "grad_norm": 275.4247131347656, + "learning_rate": 3.9928964792569654e-07, + "logits/chosen": 0.15633343160152435, + "logits/rejected": 0.09156134724617004, + "logps/chosen": -54.57572555541992, + "logps/ref_chosen": -53.36390686035156, + "logps/ref_rejected": -71.10276794433594, + "logps/rejected": -73.18356323242188, + "loss": 0.7643, + "margin_dpo/margin_mean": 0.8689748048782349, + "margin_dpo/margin_std": 0.9518204927444458, + "step": 244 + }, + { + "epoch": 0.37037037037037035, + "fcm_dpo/beta": 1.3580291271209717, + "fcm_dpo/delta": -0.23086267709732056, + "fcm_dpo/margin": 0.8861613273620605, + "fcm_dpo/q_t": 0.3000994920730591, + "grad_norm": 464.6539611816406, + "learning_rate": 3.982269822636601e-07, + "logits/chosen": 0.16047267615795135, + "logits/rejected": 0.14088091254234314, + "logps/chosen": -72.37004089355469, + "logps/ref_chosen": -71.19510650634766, + "logps/ref_rejected": -80.76235961914062, + "logps/rejected": -82.82345581054688, + "loss": 0.9896, + "margin_dpo/margin_mean": 0.8861616849899292, + "margin_dpo/margin_std": 1.2600171566009521, + "step": 245 + }, + { + "epoch": 0.37188208616780044, + "fcm_dpo/beta": 1.2877942323684692, + "fcm_dpo/delta": -0.19463737308979034, + "fcm_dpo/margin": 0.9072257280349731, + "fcm_dpo/q_t": 0.3076491057872772, + "grad_norm": 311.3534240722656, + "learning_rate": 3.971601703742932e-07, + "logits/chosen": 0.15642526745796204, + "logits/rejected": 0.11311867088079453, + "logps/chosen": -72.98123168945312, + "logps/ref_chosen": -71.62104797363281, + "logps/ref_rejected": -94.03392028808594, + "logps/rejected": -96.30133056640625, + "loss": 0.8849, + "margin_dpo/margin_mean": 0.9072257876396179, + "margin_dpo/margin_std": 1.1439390182495117, + "step": 246 + }, + { + "epoch": 0.37339380196523053, + "fcm_dpo/beta": 1.385887622833252, + "fcm_dpo/delta": 0.3913596272468567, + "fcm_dpo/margin": 0.44914084672927856, + "fcm_dpo/q_t": 0.3937835395336151, + "grad_norm": 441.7315673828125, + "learning_rate": 3.960892420986177e-07, + "logits/chosen": 0.14804767072200775, + "logits/rejected": 0.13864608108997345, + "logps/chosen": -81.47723388671875, + "logps/ref_chosen": -80.02254486083984, + "logps/ref_rejected": -89.22705841064453, + "logps/rejected": -91.13088989257812, + "loss": 1.3732, + "margin_dpo/margin_mean": 0.44914010167121887, + "margin_dpo/margin_std": 1.1773467063903809, + "step": 247 + }, + { + "epoch": 0.3749055177626606, + "fcm_dpo/beta": 1.4095741510391235, + "fcm_dpo/delta": 0.022829867899417877, + "fcm_dpo/margin": 0.6935802698135376, + "fcm_dpo/q_t": 0.3401643633842468, + "grad_norm": 382.5201110839844, + "learning_rate": 3.9501422739279953e-07, + "logits/chosen": 0.1225530132651329, + "logits/rejected": 0.14110150933265686, + "logps/chosen": -66.71062469482422, + "logps/ref_chosen": -65.37796020507812, + "logps/ref_rejected": -61.365787506103516, + "logps/rejected": -63.392032623291016, + "loss": 1.0538, + "margin_dpo/margin_mean": 0.6935799717903137, + "margin_dpo/margin_std": 1.1022088527679443, + "step": 248 + }, + { + "epoch": 0.3764172335600907, + "fcm_dpo/beta": 1.5373433828353882, + "fcm_dpo/delta": 0.5078557133674622, + "fcm_dpo/margin": 0.3260525166988373, + "fcm_dpo/q_t": 0.40423983335494995, + "grad_norm": 464.44989013671875, + "learning_rate": 3.9393515632731094e-07, + "logits/chosen": 0.10547161102294922, + "logits/rejected": 0.12796463072299957, + "logps/chosen": -76.10067749023438, + "logps/ref_chosen": -74.60145568847656, + "logps/ref_rejected": -63.79338455200195, + "logps/rejected": -65.61865234375, + "loss": 1.4834, + "margin_dpo/margin_mean": 0.3260522484779358, + "margin_dpo/margin_std": 1.0597259998321533, + "step": 249 + }, + { + "epoch": 0.3779289493575208, + "fcm_dpo/beta": 1.4959361553192139, + "fcm_dpo/delta": -0.35001400113105774, + "fcm_dpo/margin": 0.871913731098175, + "fcm_dpo/q_t": 0.305349200963974, + "grad_norm": 331.92315673828125, + "learning_rate": 3.9285205908608934e-07, + "logits/chosen": 0.21487398445606232, + "logits/rejected": 0.18293528258800507, + "logps/chosen": -63.18694305419922, + "logps/ref_chosen": -61.938209533691406, + "logps/ref_rejected": -72.21602630615234, + "logps/rejected": -74.336669921875, + "loss": 0.9596, + "margin_dpo/margin_mean": 0.8719134330749512, + "margin_dpo/margin_std": 1.247259259223938, + "step": 250 + }, + { + "epoch": 0.3794406651549509, + "fcm_dpo/beta": 1.4492324590682983, + "fcm_dpo/delta": 0.03792502358555794, + "fcm_dpo/margin": 0.6663841009140015, + "fcm_dpo/q_t": 0.3597896099090576, + "grad_norm": 376.5051574707031, + "learning_rate": 3.9176496596569265e-07, + "logits/chosen": 0.2115197777748108, + "logits/rejected": 0.1815691590309143, + "logps/chosen": -68.17942810058594, + "logps/ref_chosen": -66.85694885253906, + "logps/ref_rejected": -84.83396911621094, + "logps/rejected": -86.82284545898438, + "loss": 1.1816, + "margin_dpo/margin_mean": 0.6663837432861328, + "margin_dpo/margin_std": 1.2780930995941162, + "step": 251 + }, + { + "epoch": 0.38095238095238093, + "fcm_dpo/beta": 1.496967077255249, + "fcm_dpo/delta": -0.03972265124320984, + "fcm_dpo/margin": 0.6779400110244751, + "fcm_dpo/q_t": 0.32841235399246216, + "grad_norm": 365.8186340332031, + "learning_rate": 3.9067390737445254e-07, + "logits/chosen": 0.12412711977958679, + "logits/rejected": 0.0818972960114479, + "logps/chosen": -57.51960372924805, + "logps/ref_chosen": -56.22393035888672, + "logps/ref_rejected": -77.1136245727539, + "logps/rejected": -79.08723449707031, + "loss": 1.2777, + "margin_dpo/margin_mean": 0.6779407262802124, + "margin_dpo/margin_std": 1.3109935522079468, + "step": 252 + }, + { + "epoch": 0.382464096749811, + "fcm_dpo/beta": 1.3989202976226807, + "fcm_dpo/delta": -0.0833391547203064, + "fcm_dpo/margin": 0.7623114585876465, + "fcm_dpo/q_t": 0.3369859755039215, + "grad_norm": 290.3008117675781, + "learning_rate": 3.8957891383162304e-07, + "logits/chosen": 0.17171865701675415, + "logits/rejected": 0.14320127665996552, + "logps/chosen": -53.48502731323242, + "logps/ref_chosen": -52.21001434326172, + "logps/ref_rejected": -58.75764846801758, + "logps/rejected": -60.79496765136719, + "loss": 1.0203, + "margin_dpo/margin_mean": 0.762311577796936, + "margin_dpo/margin_std": 1.1594964265823364, + "step": 253 + }, + { + "epoch": 0.3839758125472411, + "fcm_dpo/beta": 1.3931760787963867, + "fcm_dpo/delta": -0.08251707255840302, + "fcm_dpo/margin": 0.7698103189468384, + "fcm_dpo/q_t": 0.33657705783843994, + "grad_norm": 385.88311767578125, + "learning_rate": 3.884800159665276e-07, + "logits/chosen": 0.13405011594295502, + "logits/rejected": 0.09684339165687561, + "logps/chosen": -66.99877166748047, + "logps/ref_chosen": -65.63632202148438, + "logps/ref_rejected": -82.34425354003906, + "logps/rejected": -84.47650146484375, + "loss": 1.1088, + "margin_dpo/margin_mean": 0.7698097229003906, + "margin_dpo/margin_std": 1.271782398223877, + "step": 254 + }, + { + "epoch": 0.3854875283446712, + "fcm_dpo/beta": 1.3778660297393799, + "fcm_dpo/delta": -0.019536815583705902, + "fcm_dpo/margin": 0.7369774580001831, + "fcm_dpo/q_t": 0.33573752641677856, + "grad_norm": 347.2344055175781, + "learning_rate": 3.873772445177015e-07, + "logits/chosen": 0.1548142433166504, + "logits/rejected": 0.12779046595096588, + "logps/chosen": -69.03644561767578, + "logps/ref_chosen": -67.91108703613281, + "logps/ref_rejected": -83.89114379882812, + "logps/rejected": -85.75347900390625, + "loss": 1.068, + "margin_dpo/margin_mean": 0.7369774580001831, + "margin_dpo/margin_std": 1.1885807514190674, + "step": 255 + }, + { + "epoch": 0.3869992441421013, + "fcm_dpo/beta": 1.396597146987915, + "fcm_dpo/delta": -0.011127792298793793, + "fcm_dpo/margin": 0.7227224111557007, + "fcm_dpo/q_t": 0.3369947075843811, + "grad_norm": 397.8918762207031, + "learning_rate": 3.862706303320329e-07, + "logits/chosen": 0.1215638667345047, + "logits/rejected": 0.08205322921276093, + "logps/chosen": -64.94349670410156, + "logps/ref_chosen": -63.49998474121094, + "logps/ref_rejected": -90.77104187011719, + "logps/rejected": -92.93727111816406, + "loss": 1.0738, + "margin_dpo/margin_mean": 0.7227222323417664, + "margin_dpo/margin_std": 1.203334927558899, + "step": 256 + }, + { + "epoch": 0.3885109599395314, + "fcm_dpo/beta": 1.2768826484680176, + "fcm_dpo/delta": -0.461418092250824, + "fcm_dpo/margin": 1.086154580116272, + "fcm_dpo/q_t": 0.28833481669425964, + "grad_norm": 322.4739685058594, + "learning_rate": 3.851602043638994e-07, + "logits/chosen": 0.1613893210887909, + "logits/rejected": 0.11499130725860596, + "logps/chosen": -71.91011810302734, + "logps/ref_chosen": -70.60064697265625, + "logps/ref_rejected": -108.58313751220703, + "logps/rejected": -110.978759765625, + "loss": 0.9387, + "margin_dpo/margin_mean": 1.0861549377441406, + "margin_dpo/margin_std": 1.5388684272766113, + "step": 257 + }, + { + "epoch": 0.3900226757369615, + "fcm_dpo/beta": 1.2841336727142334, + "fcm_dpo/delta": 0.00753195583820343, + "fcm_dpo/margin": 0.7705293297767639, + "fcm_dpo/q_t": 0.32425814867019653, + "grad_norm": 300.5933837890625, + "learning_rate": 3.840459976743023e-07, + "logits/chosen": 0.1647588312625885, + "logits/rejected": 0.1312163770198822, + "logps/chosen": -60.74152374267578, + "logps/ref_chosen": -59.25416564941406, + "logps/ref_rejected": -85.58709716796875, + "logps/rejected": -87.84498596191406, + "loss": 0.933, + "margin_dpo/margin_mean": 0.7705295085906982, + "margin_dpo/margin_std": 1.0195441246032715, + "step": 258 + }, + { + "epoch": 0.3915343915343915, + "fcm_dpo/beta": 1.1445305347442627, + "fcm_dpo/delta": -0.5759162902832031, + "fcm_dpo/margin": 1.2901654243469238, + "fcm_dpo/q_t": 0.2562459111213684, + "grad_norm": 223.67605590820312, + "learning_rate": 3.8292804142999796e-07, + "logits/chosen": 0.09316843003034592, + "logits/rejected": 0.021822050213813782, + "logps/chosen": -66.56085205078125, + "logps/ref_chosen": -65.43487548828125, + "logps/ref_rejected": -95.41731262207031, + "logps/rejected": -97.83345031738281, + "loss": 0.7207, + "margin_dpo/margin_mean": 1.2901657819747925, + "margin_dpo/margin_std": 1.2492828369140625, + "step": 259 + }, + { + "epoch": 0.3930461073318216, + "fcm_dpo/beta": 1.1257474422454834, + "fcm_dpo/delta": 0.017584767192602158, + "fcm_dpo/margin": 0.8737805485725403, + "fcm_dpo/q_t": 0.33795487880706787, + "grad_norm": 259.8592834472656, + "learning_rate": 3.818063669026256e-07, + "logits/chosen": 0.127943217754364, + "logits/rejected": 0.06927517056465149, + "logps/chosen": -50.53019714355469, + "logps/ref_chosen": -49.08958435058594, + "logps/ref_rejected": -79.01708221435547, + "logps/rejected": -81.33147430419922, + "loss": 0.9979, + "margin_dpo/margin_mean": 0.873779296875, + "margin_dpo/margin_std": 1.2833609580993652, + "step": 260 + }, + { + "epoch": 0.3945578231292517, + "fcm_dpo/beta": 1.1455121040344238, + "fcm_dpo/delta": 0.08530843257904053, + "fcm_dpo/margin": 0.8054367303848267, + "fcm_dpo/q_t": 0.34108513593673706, + "grad_norm": 306.086181640625, + "learning_rate": 3.806810054678331e-07, + "logits/chosen": 0.03654761239886284, + "logits/rejected": 0.04507092386484146, + "logps/chosen": -72.11337280273438, + "logps/ref_chosen": -70.87239074707031, + "logps/ref_rejected": -65.01522064208984, + "logps/rejected": -67.06163787841797, + "loss": 0.9947, + "margin_dpo/margin_mean": 0.805436909198761, + "margin_dpo/margin_std": 1.1771018505096436, + "step": 261 + }, + { + "epoch": 0.3960695389266818, + "fcm_dpo/beta": 1.1686980724334717, + "fcm_dpo/delta": 0.04035666957497597, + "fcm_dpo/margin": 0.8231496810913086, + "fcm_dpo/q_t": 0.3458126187324524, + "grad_norm": 323.2743225097656, + "learning_rate": 3.7955198860439887e-07, + "logits/chosen": 0.17141227424144745, + "logits/rejected": 0.13052189350128174, + "logps/chosen": -69.32987976074219, + "logps/ref_chosen": -67.8706283569336, + "logps/ref_rejected": -88.7205810546875, + "logps/rejected": -91.00297546386719, + "loss": 0.9832, + "margin_dpo/margin_mean": 0.8231501579284668, + "margin_dpo/margin_std": 1.2316184043884277, + "step": 262 + }, + { + "epoch": 0.3975812547241119, + "fcm_dpo/beta": 1.23178231716156, + "fcm_dpo/delta": 0.2991076111793518, + "fcm_dpo/margin": 0.5800197124481201, + "fcm_dpo/q_t": 0.38507431745529175, + "grad_norm": 314.1044616699219, + "learning_rate": 3.784193478933516e-07, + "logits/chosen": 0.13119199872016907, + "logits/rejected": 0.05917968600988388, + "logps/chosen": -56.671165466308594, + "logps/ref_chosen": -55.194583892822266, + "logps/ref_rejected": -80.54048156738281, + "logps/rejected": -82.59709167480469, + "loss": 1.231, + "margin_dpo/margin_mean": 0.5800192356109619, + "margin_dpo/margin_std": 1.2319090366363525, + "step": 263 + }, + { + "epoch": 0.39909297052154197, + "fcm_dpo/beta": 1.2216218709945679, + "fcm_dpo/delta": -0.13029904663562775, + "fcm_dpo/margin": 0.9128764867782593, + "fcm_dpo/q_t": 0.34319591522216797, + "grad_norm": 366.9609069824219, + "learning_rate": 3.7728311501708674e-07, + "logits/chosen": 0.08489110320806503, + "logits/rejected": 0.04814103990793228, + "logps/chosen": -84.4659652709961, + "logps/ref_chosen": -83.17068481445312, + "logps/ref_rejected": -88.33625793457031, + "logps/rejected": -90.54441833496094, + "loss": 1.0999, + "margin_dpo/margin_mean": 0.9128766655921936, + "margin_dpo/margin_std": 1.5033926963806152, + "step": 264 + }, + { + "epoch": 0.40060468631897206, + "fcm_dpo/beta": 1.1946429014205933, + "fcm_dpo/delta": -0.042388565838336945, + "fcm_dpo/margin": 0.8688783049583435, + "fcm_dpo/q_t": 0.31362420320510864, + "grad_norm": 336.9339904785156, + "learning_rate": 3.7614332175848027e-07, + "logits/chosen": 0.1749192476272583, + "logits/rejected": 0.12444747984409332, + "logps/chosen": -53.23479080200195, + "logps/ref_chosen": -51.66284942626953, + "logps/ref_rejected": -67.1720962524414, + "logps/rejected": -69.6129150390625, + "loss": 0.9924, + "margin_dpo/margin_mean": 0.8688779473304749, + "margin_dpo/margin_std": 1.2264655828475952, + "step": 265 + }, + { + "epoch": 0.4021164021164021, + "fcm_dpo/beta": 1.200698971748352, + "fcm_dpo/delta": 0.018015079200267792, + "fcm_dpo/margin": 0.8188613653182983, + "fcm_dpo/q_t": 0.342385858297348, + "grad_norm": 329.1114807128906, + "learning_rate": 3.75e-07, + "logits/chosen": 0.1544215977191925, + "logits/rejected": 0.10259807109832764, + "logps/chosen": -58.89800262451172, + "logps/ref_chosen": -57.45049285888672, + "logps/ref_rejected": -77.60826110839844, + "logps/rejected": -79.87462615966797, + "loss": 1.0917, + "margin_dpo/margin_mean": 0.8188612461090088, + "margin_dpo/margin_std": 1.41795015335083, + "step": 266 + }, + { + "epoch": 0.4036281179138322, + "fcm_dpo/beta": 1.2707109451293945, + "fcm_dpo/delta": 0.28088656067848206, + "fcm_dpo/margin": 0.5725541710853577, + "fcm_dpo/q_t": 0.3735983073711395, + "grad_norm": 291.73394775390625, + "learning_rate": 3.738531817228131e-07, + "logits/chosen": 0.15856996178627014, + "logits/rejected": 0.1470004916191101, + "logps/chosen": -56.46894836425781, + "logps/ref_chosen": -55.03535079956055, + "logps/ref_rejected": -66.0953369140625, + "logps/rejected": -68.10148620605469, + "loss": 1.2705, + "margin_dpo/margin_mean": 0.5725547075271606, + "margin_dpo/margin_std": 1.2551192045211792, + "step": 267 + }, + { + "epoch": 0.4051398337112623, + "fcm_dpo/beta": 1.2686963081359863, + "fcm_dpo/delta": -0.053165629506111145, + "fcm_dpo/margin": 0.8244317770004272, + "fcm_dpo/q_t": 0.34110718965530396, + "grad_norm": 308.4896240234375, + "learning_rate": 3.7270289900589204e-07, + "logits/chosen": 0.07009106129407883, + "logits/rejected": 0.05312522128224373, + "logps/chosen": -66.37588500976562, + "logps/ref_chosen": -65.07174682617188, + "logps/ref_rejected": -71.42485809326172, + "logps/rejected": -73.5534439086914, + "loss": 0.9779, + "margin_dpo/margin_mean": 0.8244317770004272, + "margin_dpo/margin_std": 1.2355961799621582, + "step": 268 + }, + { + "epoch": 0.40665154950869237, + "fcm_dpo/beta": 1.2246897220611572, + "fcm_dpo/delta": -0.29109132289886475, + "fcm_dpo/margin": 1.0223791599273682, + "fcm_dpo/q_t": 0.3000904321670532, + "grad_norm": 285.43951416015625, + "learning_rate": 3.7154918402511714e-07, + "logits/chosen": 0.22535249590873718, + "logits/rejected": 0.1888647824525833, + "logps/chosen": -68.56861114501953, + "logps/ref_chosen": -67.1362075805664, + "logps/ref_rejected": -82.55778503417969, + "logps/rejected": -85.01256561279297, + "loss": 0.9234, + "margin_dpo/margin_mean": 1.0223793983459473, + "margin_dpo/margin_std": 1.2574834823608398, + "step": 269 + }, + { + "epoch": 0.40816326530612246, + "fcm_dpo/beta": 1.2294461727142334, + "fcm_dpo/delta": 0.26967453956604004, + "fcm_dpo/margin": 0.6104166507720947, + "fcm_dpo/q_t": 0.3626842498779297, + "grad_norm": 348.1169738769531, + "learning_rate": 3.7039206905237656e-07, + "logits/chosen": 0.18059831857681274, + "logits/rejected": 0.13146328926086426, + "logps/chosen": -68.18730163574219, + "logps/ref_chosen": -66.6886978149414, + "logps/ref_rejected": -85.16129302978516, + "logps/rejected": -87.27030944824219, + "loss": 1.1816, + "margin_dpo/margin_mean": 0.6104167103767395, + "margin_dpo/margin_std": 1.19452965259552, + "step": 270 + }, + { + "epoch": 0.40967498110355255, + "fcm_dpo/beta": 1.307576060295105, + "fcm_dpo/delta": 0.36521047353744507, + "fcm_dpo/margin": 0.5049761533737183, + "fcm_dpo/q_t": 0.4147690534591675, + "grad_norm": 440.6063537597656, + "learning_rate": 3.692315864546635e-07, + "logits/chosen": 0.1853274405002594, + "logits/rejected": 0.14235132932662964, + "logps/chosen": -73.94186401367188, + "logps/ref_chosen": -72.40754699707031, + "logps/ref_rejected": -92.06311798095703, + "logps/rejected": -94.10240173339844, + "loss": 1.5095, + "margin_dpo/margin_mean": 0.5049762725830078, + "margin_dpo/margin_std": 1.5028947591781616, + "step": 271 + }, + { + "epoch": 0.41118669690098264, + "fcm_dpo/beta": 1.266921043395996, + "fcm_dpo/delta": -0.4322693943977356, + "fcm_dpo/margin": 1.0848881006240845, + "fcm_dpo/q_t": 0.28500691056251526, + "grad_norm": 282.21990966796875, + "learning_rate": 3.6806776869317067e-07, + "logits/chosen": 0.15356749296188354, + "logits/rejected": 0.15307673811912537, + "logps/chosen": -67.91374206542969, + "logps/ref_chosen": -66.60140228271484, + "logps/ref_rejected": -67.74340057373047, + "logps/rejected": -70.140625, + "loss": 0.7882, + "margin_dpo/margin_mean": 1.0848881006240845, + "margin_dpo/margin_std": 1.259714126586914, + "step": 272 + }, + { + "epoch": 0.4126984126984127, + "fcm_dpo/beta": 1.2134504318237305, + "fcm_dpo/delta": -0.202475443482399, + "fcm_dpo/margin": 0.9696700572967529, + "fcm_dpo/q_t": 0.3056218922138214, + "grad_norm": 257.2859191894531, + "learning_rate": 3.669006483223828e-07, + "logits/chosen": 0.18354235589504242, + "logits/rejected": 0.14025747776031494, + "logps/chosen": -58.94585037231445, + "logps/ref_chosen": -57.35487747192383, + "logps/ref_rejected": -84.17168426513672, + "logps/rejected": -86.73233032226562, + "loss": 0.885, + "margin_dpo/margin_mean": 0.9696696996688843, + "margin_dpo/margin_std": 1.2579290866851807, + "step": 273 + }, + { + "epoch": 0.41421012849584277, + "fcm_dpo/beta": 1.17547607421875, + "fcm_dpo/delta": -0.022935807704925537, + "fcm_dpo/margin": 0.8681968450546265, + "fcm_dpo/q_t": 0.3227364718914032, + "grad_norm": 261.911376953125, + "learning_rate": 3.657302579891656e-07, + "logits/chosen": 0.0659763365983963, + "logits/rejected": 0.04620751738548279, + "logps/chosen": -61.098419189453125, + "logps/ref_chosen": -59.64149475097656, + "logps/ref_rejected": -68.29348754882812, + "logps/rejected": -70.61860656738281, + "loss": 1.004, + "margin_dpo/margin_mean": 0.8681962490081787, + "margin_dpo/margin_std": 1.2701618671417236, + "step": 274 + }, + { + "epoch": 0.41572184429327286, + "fcm_dpo/beta": 1.157713532447815, + "fcm_dpo/delta": -0.10604125261306763, + "fcm_dpo/margin": 0.9453713893890381, + "fcm_dpo/q_t": 0.32869789004325867, + "grad_norm": 274.95843505859375, + "learning_rate": 3.645566304318526e-07, + "logits/chosen": 0.1414085328578949, + "logits/rejected": 0.07877371460199356, + "logps/chosen": -54.83768844604492, + "logps/ref_chosen": -53.26664352416992, + "logps/ref_rejected": -73.84062194824219, + "logps/rejected": -76.3570327758789, + "loss": 0.9802, + "margin_dpo/margin_mean": 0.9453713893890381, + "margin_dpo/margin_std": 1.3858097791671753, + "step": 275 + }, + { + "epoch": 0.41723356009070295, + "fcm_dpo/beta": 1.1194026470184326, + "fcm_dpo/delta": -0.17233465611934662, + "fcm_dpo/margin": 1.0291403532028198, + "fcm_dpo/q_t": 0.29955726861953735, + "grad_norm": 242.73602294921875, + "learning_rate": 3.633797984793294e-07, + "logits/chosen": 0.11177192628383636, + "logits/rejected": 0.08380501717329025, + "logps/chosen": -54.313621520996094, + "logps/ref_chosen": -53.02079772949219, + "logps/ref_rejected": -61.56678771972656, + "logps/rejected": -63.888755798339844, + "loss": 0.8804, + "margin_dpo/margin_mean": 1.0291404724121094, + "margin_dpo/margin_std": 1.2605907917022705, + "step": 276 + }, + { + "epoch": 0.41874527588813304, + "fcm_dpo/beta": 1.16245436668396, + "fcm_dpo/delta": 0.17784440517425537, + "fcm_dpo/margin": 0.7118735313415527, + "fcm_dpo/q_t": 0.3743218183517456, + "grad_norm": 298.37750244140625, + "learning_rate": 3.6219979505011555e-07, + "logits/chosen": 0.2183254361152649, + "logits/rejected": 0.23113352060317993, + "logps/chosen": -73.12422180175781, + "logps/ref_chosen": -71.43299102783203, + "logps/ref_rejected": -67.65852355957031, + "logps/rejected": -70.06163024902344, + "loss": 1.1743, + "margin_dpo/margin_mean": 0.7118737697601318, + "margin_dpo/margin_std": 1.4327894449234009, + "step": 277 + }, + { + "epoch": 0.42025699168556313, + "fcm_dpo/beta": 1.172609806060791, + "fcm_dpo/delta": -0.03564952313899994, + "fcm_dpo/margin": 0.8724105358123779, + "fcm_dpo/q_t": 0.32611083984375, + "grad_norm": 296.2506408691406, + "learning_rate": 3.6101665315144353e-07, + "logits/chosen": 0.1023668497800827, + "logits/rejected": 0.07007478922605515, + "logps/chosen": -68.64065551757812, + "logps/ref_chosen": -67.11076354980469, + "logps/ref_rejected": -88.74851989746094, + "logps/rejected": -91.15081787109375, + "loss": 1.0143, + "margin_dpo/margin_mean": 0.8724101781845093, + "margin_dpo/margin_std": 1.2294014692306519, + "step": 278 + }, + { + "epoch": 0.4217687074829932, + "fcm_dpo/beta": 1.1110622882843018, + "fcm_dpo/delta": -0.26436370611190796, + "fcm_dpo/margin": 1.1091269254684448, + "fcm_dpo/q_t": 0.276674747467041, + "grad_norm": 208.7923583984375, + "learning_rate": 3.5983040587833563e-07, + "logits/chosen": 0.1121751144528389, + "logits/rejected": 0.0790662094950676, + "logps/chosen": -55.73821258544922, + "logps/ref_chosen": -54.49748611450195, + "logps/ref_rejected": -70.42373657226562, + "logps/rejected": -72.77357482910156, + "loss": 0.778, + "margin_dpo/margin_mean": 1.1091272830963135, + "margin_dpo/margin_std": 1.1377835273742676, + "step": 279 + }, + { + "epoch": 0.42328042328042326, + "fcm_dpo/beta": 1.045201063156128, + "fcm_dpo/delta": -0.19996249675750732, + "fcm_dpo/margin": 1.1231290102005005, + "fcm_dpo/q_t": 0.2829288840293884, + "grad_norm": 192.96217346191406, + "learning_rate": 3.586410864126781e-07, + "logits/chosen": 0.15486222505569458, + "logits/rejected": 0.123613640666008, + "logps/chosen": -61.731178283691406, + "logps/ref_chosen": -60.43281173706055, + "logps/ref_rejected": -78.39051818847656, + "logps/rejected": -80.81201171875, + "loss": 0.7481, + "margin_dpo/margin_mean": 1.1231298446655273, + "margin_dpo/margin_std": 1.1084861755371094, + "step": 280 + }, + { + "epoch": 0.42479213907785335, + "fcm_dpo/beta": 1.0173039436340332, + "fcm_dpo/delta": -0.09447715431451797, + "fcm_dpo/margin": 1.0629010200500488, + "fcm_dpo/q_t": 0.3139858841896057, + "grad_norm": 206.9331817626953, + "learning_rate": 3.574487280222929e-07, + "logits/chosen": 0.15210747718811035, + "logits/rejected": 0.15197323262691498, + "logps/chosen": -61.607582092285156, + "logps/ref_chosen": -60.2820930480957, + "logps/ref_rejected": -62.04009246826172, + "logps/rejected": -64.42848205566406, + "loss": 0.9316, + "margin_dpo/margin_mean": 1.0629009008407593, + "margin_dpo/margin_std": 1.3708744049072266, + "step": 281 + }, + { + "epoch": 0.42630385487528344, + "fcm_dpo/beta": 1.054863452911377, + "fcm_dpo/delta": 0.04717801511287689, + "fcm_dpo/margin": 0.8964927196502686, + "fcm_dpo/q_t": 0.33858174085617065, + "grad_norm": 262.0866394042969, + "learning_rate": 3.562533640600075e-07, + "logits/chosen": 0.1025453507900238, + "logits/rejected": 0.06392862647771835, + "logps/chosen": -62.19837951660156, + "logps/ref_chosen": -60.623924255371094, + "logps/ref_rejected": -68.67400360107422, + "logps/rejected": -71.14495849609375, + "loss": 1.0039, + "margin_dpo/margin_mean": 0.8964922428131104, + "margin_dpo/margin_std": 1.2645740509033203, + "step": 282 + }, + { + "epoch": 0.42781557067271353, + "fcm_dpo/beta": 1.0366630554199219, + "fcm_dpo/delta": -0.0020070038735866547, + "fcm_dpo/margin": 0.9660577774047852, + "fcm_dpo/q_t": 0.3374154269695282, + "grad_norm": 295.226318359375, + "learning_rate": 3.550550279627215e-07, + "logits/chosen": 0.11783361434936523, + "logits/rejected": 0.054866328835487366, + "logps/chosen": -69.20338439941406, + "logps/ref_chosen": -67.64775085449219, + "logps/ref_rejected": -99.96835327148438, + "logps/rejected": -102.49005126953125, + "loss": 1.0551, + "margin_dpo/margin_mean": 0.9660578370094299, + "margin_dpo/margin_std": 1.526496171951294, + "step": 283 + }, + { + "epoch": 0.4293272864701436, + "fcm_dpo/beta": 1.0322619676589966, + "fcm_dpo/delta": 0.00461952667683363, + "fcm_dpo/margin": 0.9647125005722046, + "fcm_dpo/q_t": 0.33289098739624023, + "grad_norm": 231.38433837890625, + "learning_rate": 3.5385375325047163e-07, + "logits/chosen": 0.17530453205108643, + "logits/rejected": 0.12596732378005981, + "logps/chosen": -58.40034103393555, + "logps/ref_chosen": -56.96742630004883, + "logps/ref_rejected": -86.36236572265625, + "logps/rejected": -88.75999450683594, + "loss": 0.9914, + "margin_dpo/margin_mean": 0.9647125005722046, + "margin_dpo/margin_std": 1.445831298828125, + "step": 284 + }, + { + "epoch": 0.4308390022675737, + "fcm_dpo/beta": 1.0661512613296509, + "fcm_dpo/delta": 0.16971619427204132, + "fcm_dpo/margin": 0.7905272245407104, + "fcm_dpo/q_t": 0.34532633423805237, + "grad_norm": 249.1006317138672, + "learning_rate": 3.5264957352549375e-07, + "logits/chosen": 0.18527567386627197, + "logits/rejected": 0.1653136909008026, + "logps/chosen": -73.34848022460938, + "logps/ref_chosen": -71.65611267089844, + "logps/ref_rejected": -81.63829803466797, + "logps/rejected": -84.12120056152344, + "loss": 0.9726, + "margin_dpo/margin_mean": 0.7905269861221313, + "margin_dpo/margin_std": 1.0835275650024414, + "step": 285 + }, + { + "epoch": 0.4323507180650038, + "fcm_dpo/beta": 1.005662441253662, + "fcm_dpo/delta": -0.34280824661254883, + "fcm_dpo/margin": 1.2858762741088867, + "fcm_dpo/q_t": 0.27013134956359863, + "grad_norm": 211.03579711914062, + "learning_rate": 3.514425224712835e-07, + "logits/chosen": 0.10800629109144211, + "logits/rejected": 0.0394493006169796, + "logps/chosen": -62.53429412841797, + "logps/ref_chosen": -61.07952117919922, + "logps/ref_rejected": -91.28128051757812, + "logps/rejected": -94.02192687988281, + "loss": 0.8318, + "margin_dpo/margin_mean": 1.2858755588531494, + "margin_dpo/margin_std": 1.5294857025146484, + "step": 286 + }, + { + "epoch": 0.43386243386243384, + "fcm_dpo/beta": 0.992131769657135, + "fcm_dpo/delta": -0.06625291705131531, + "fcm_dpo/margin": 1.0675835609436035, + "fcm_dpo/q_t": 0.3116587698459625, + "grad_norm": 213.88555908203125, + "learning_rate": 3.502326338516534e-07, + "logits/chosen": 0.13146105408668518, + "logits/rejected": 0.10076682269573212, + "logps/chosen": -47.52275848388672, + "logps/ref_chosen": -46.035789489746094, + "logps/ref_rejected": -59.95293426513672, + "logps/rejected": -62.507484436035156, + "loss": 0.8751, + "margin_dpo/margin_mean": 1.067583441734314, + "margin_dpo/margin_std": 1.295511245727539, + "step": 287 + }, + { + "epoch": 0.43537414965986393, + "fcm_dpo/beta": 0.9990655183792114, + "fcm_dpo/delta": 0.13602013885974884, + "fcm_dpo/margin": 0.8767856359481812, + "fcm_dpo/q_t": 0.35159996151924133, + "grad_norm": 293.45867919921875, + "learning_rate": 3.490199415097892e-07, + "logits/chosen": 0.053915057331323624, + "logits/rejected": 0.014950074255466461, + "logps/chosen": -67.08319854736328, + "logps/ref_chosen": -65.3908462524414, + "logps/ref_rejected": -88.53607940673828, + "logps/rejected": -91.10520935058594, + "loss": 1.0811, + "margin_dpo/margin_mean": 0.8767852187156677, + "margin_dpo/margin_std": 1.4756031036376953, + "step": 288 + }, + { + "epoch": 0.436885865457294, + "fcm_dpo/beta": 1.0097713470458984, + "fcm_dpo/delta": 0.013888869434595108, + "fcm_dpo/margin": 0.9776356220245361, + "fcm_dpo/q_t": 0.3457440435886383, + "grad_norm": 220.5363006591797, + "learning_rate": 3.4780447936730247e-07, + "logits/chosen": 0.16835784912109375, + "logits/rejected": 0.14368662238121033, + "logps/chosen": -56.29551696777344, + "logps/ref_chosen": -54.5936279296875, + "logps/ref_rejected": -67.20855712890625, + "logps/rejected": -69.88806915283203, + "loss": 1.0415, + "margin_dpo/margin_mean": 0.9776356220245361, + "margin_dpo/margin_std": 1.556645393371582, + "step": 289 + }, + { + "epoch": 0.4383975812547241, + "fcm_dpo/beta": 1.0043764114379883, + "fcm_dpo/delta": -0.04867362976074219, + "fcm_dpo/margin": 1.0387194156646729, + "fcm_dpo/q_t": 0.33352506160736084, + "grad_norm": 238.85610961914062, + "learning_rate": 3.465862814232821e-07, + "logits/chosen": 0.19789519906044006, + "logits/rejected": 0.14447157084941864, + "logps/chosen": -63.15911865234375, + "logps/ref_chosen": -61.38457489013672, + "logps/ref_rejected": -91.92778015136719, + "logps/rejected": -94.74103546142578, + "loss": 0.9701, + "margin_dpo/margin_mean": 1.038718819618225, + "margin_dpo/margin_std": 1.499373435974121, + "step": 290 + }, + { + "epoch": 0.4399092970521542, + "fcm_dpo/beta": 1.0028091669082642, + "fcm_dpo/delta": -0.10185343772172928, + "fcm_dpo/margin": 1.0858376026153564, + "fcm_dpo/q_t": 0.3095766305923462, + "grad_norm": 204.3426055908203, + "learning_rate": 3.4536538175334343e-07, + "logits/chosen": 0.23501402139663696, + "logits/rejected": 0.1915348768234253, + "logps/chosen": -52.46550750732422, + "logps/ref_chosen": -50.863037109375, + "logps/ref_rejected": -82.20868682861328, + "logps/rejected": -84.89698791503906, + "loss": 0.8974, + "margin_dpo/margin_mean": 1.085837960243225, + "margin_dpo/margin_std": 1.3475749492645264, + "step": 291 + }, + { + "epoch": 0.4414210128495843, + "fcm_dpo/beta": 1.0263406038284302, + "fcm_dpo/delta": 0.24256381392478943, + "fcm_dpo/margin": 0.7545459270477295, + "fcm_dpo/q_t": 0.3599158823490143, + "grad_norm": 320.62713623046875, + "learning_rate": 3.4414181450867465e-07, + "logits/chosen": 0.1599317491054535, + "logits/rejected": 0.11992324888706207, + "logps/chosen": -65.92310333251953, + "logps/ref_chosen": -64.34888458251953, + "logps/ref_rejected": -72.86434173583984, + "logps/rejected": -75.193115234375, + "loss": 1.0762, + "margin_dpo/margin_mean": 0.7545456886291504, + "margin_dpo/margin_std": 1.2547924518585205, + "step": 292 + }, + { + "epoch": 0.4429327286470144, + "fcm_dpo/beta": 1.0156301259994507, + "fcm_dpo/delta": -0.14779676496982574, + "fcm_dpo/margin": 1.1138885021209717, + "fcm_dpo/q_t": 0.29969820380210876, + "grad_norm": 190.7615966796875, + "learning_rate": 3.4291561391508185e-07, + "logits/chosen": 0.22061100602149963, + "logits/rejected": 0.15896283090114594, + "logps/chosen": -56.73548889160156, + "logps/ref_chosen": -54.869468688964844, + "logps/ref_rejected": -81.858642578125, + "logps/rejected": -84.83856201171875, + "loss": 0.8801, + "margin_dpo/margin_mean": 1.1138887405395508, + "margin_dpo/margin_std": 1.374760627746582, + "step": 293 + }, + { + "epoch": 0.4444444444444444, + "fcm_dpo/beta": 0.9778472185134888, + "fcm_dpo/delta": -0.08654538542032242, + "fcm_dpo/margin": 1.0972121953964233, + "fcm_dpo/q_t": 0.309672474861145, + "grad_norm": 168.36590576171875, + "learning_rate": 3.4168681427203153e-07, + "logits/chosen": 0.17619842290878296, + "logits/rejected": 0.143341526389122, + "logps/chosen": -58.26924133300781, + "logps/ref_chosen": -56.670902252197266, + "logps/ref_rejected": -70.32819366455078, + "logps/rejected": -73.02374267578125, + "loss": 0.8444, + "margin_dpo/margin_mean": 1.0972115993499756, + "margin_dpo/margin_std": 1.2684025764465332, + "step": 294 + }, + { + "epoch": 0.4459561602418745, + "fcm_dpo/beta": 1.0054044723510742, + "fcm_dpo/delta": 0.08222609758377075, + "fcm_dpo/margin": 0.9189479947090149, + "fcm_dpo/q_t": 0.35169196128845215, + "grad_norm": 228.4792938232422, + "learning_rate": 3.4045544995169125e-07, + "logits/chosen": 0.1701010763645172, + "logits/rejected": 0.10789903253316879, + "logps/chosen": -52.09330749511719, + "logps/ref_chosen": -50.40088653564453, + "logps/ref_rejected": -83.43521881103516, + "logps/rejected": -86.04659271240234, + "loss": 1.0393, + "margin_dpo/margin_mean": 0.918948233127594, + "margin_dpo/margin_std": 1.4247148036956787, + "step": 295 + }, + { + "epoch": 0.4474678760393046, + "fcm_dpo/beta": 0.9557000994682312, + "fcm_dpo/delta": -0.28874313831329346, + "fcm_dpo/margin": 1.3085522651672363, + "fcm_dpo/q_t": 0.28932487964630127, + "grad_norm": 233.505126953125, + "learning_rate": 3.392215553979679e-07, + "logits/chosen": 0.12661093473434448, + "logits/rejected": 0.09931506216526031, + "logps/chosen": -70.89358520507812, + "logps/ref_chosen": -69.15034484863281, + "logps/ref_rejected": -89.60166931152344, + "logps/rejected": -92.65345764160156, + "loss": 0.8386, + "margin_dpo/margin_mean": 1.3085522651672363, + "margin_dpo/margin_std": 1.4957661628723145, + "step": 296 + }, + { + "epoch": 0.4489795918367347, + "fcm_dpo/beta": 0.9359762668609619, + "fcm_dpo/delta": -0.1143687292933464, + "fcm_dpo/margin": 1.1771044731140137, + "fcm_dpo/q_t": 0.30046796798706055, + "grad_norm": 200.27796936035156, + "learning_rate": 3.3798516512554485e-07, + "logits/chosen": 0.13300062716007233, + "logits/rejected": 0.08352112770080566, + "logps/chosen": -59.854644775390625, + "logps/ref_chosen": -58.01630401611328, + "logps/ref_rejected": -69.95780944824219, + "logps/rejected": -72.97325134277344, + "loss": 0.8157, + "margin_dpo/margin_mean": 1.1771044731140137, + "margin_dpo/margin_std": 1.2866284847259521, + "step": 297 + }, + { + "epoch": 0.4504913076341648, + "fcm_dpo/beta": 0.9372185468673706, + "fcm_dpo/delta": 0.04445381462574005, + "fcm_dpo/margin": 1.0229597091674805, + "fcm_dpo/q_t": 0.3366781175136566, + "grad_norm": 205.09780883789062, + "learning_rate": 3.367463137189156e-07, + "logits/chosen": 0.2418043464422226, + "logits/rejected": 0.18982425332069397, + "logps/chosen": -58.04621505737305, + "logps/ref_chosen": -56.1693115234375, + "logps/ref_rejected": -68.55052185058594, + "logps/rejected": -71.45037841796875, + "loss": 1.0065, + "margin_dpo/margin_mean": 1.0229599475860596, + "margin_dpo/margin_std": 1.4922395944595337, + "step": 298 + }, + { + "epoch": 0.4520030234315949, + "fcm_dpo/beta": 0.9768006801605225, + "fcm_dpo/delta": 0.26853734254837036, + "fcm_dpo/margin": 0.7673162817955017, + "fcm_dpo/q_t": 0.3727618455886841, + "grad_norm": 255.50270080566406, + "learning_rate": 3.355050358314172e-07, + "logits/chosen": 0.08510833978652954, + "logits/rejected": 0.06492967158555984, + "logps/chosen": -64.02816009521484, + "logps/ref_chosen": -62.31780242919922, + "logps/ref_rejected": -72.60028839111328, + "logps/rejected": -75.07796478271484, + "loss": 1.1587, + "margin_dpo/margin_mean": 0.7673170566558838, + "margin_dpo/margin_std": 1.4521667957305908, + "step": 299 + }, + { + "epoch": 0.45351473922902497, + "fcm_dpo/beta": 0.982731282711029, + "fcm_dpo/delta": -0.016341693699359894, + "fcm_dpo/margin": 1.0325164794921875, + "fcm_dpo/q_t": 0.3216491937637329, + "grad_norm": 243.2555389404297, + "learning_rate": 3.3426136618426043e-07, + "logits/chosen": 0.14876267313957214, + "logits/rejected": 0.10573962330818176, + "logps/chosen": -62.35066223144531, + "logps/ref_chosen": -60.38157653808594, + "logps/ref_rejected": -75.45442199707031, + "logps/rejected": -78.45602416992188, + "loss": 0.9532, + "margin_dpo/margin_mean": 1.0325164794921875, + "margin_dpo/margin_std": 1.3928803205490112, + "step": 300 + }, + { + "epoch": 0.455026455026455, + "fcm_dpo/beta": 0.9845176935195923, + "fcm_dpo/delta": 0.09773456305265427, + "fcm_dpo/margin": 0.9239287376403809, + "fcm_dpo/q_t": 0.36213570833206177, + "grad_norm": 244.90597534179688, + "learning_rate": 3.3301533956555885e-07, + "logits/chosen": 0.16752877831459045, + "logits/rejected": 0.14301586151123047, + "logps/chosen": -54.7681999206543, + "logps/ref_chosen": -52.85089111328125, + "logps/ref_rejected": -69.97584533691406, + "logps/rejected": -72.81707763671875, + "loss": 1.1465, + "margin_dpo/margin_mean": 0.9239292740821838, + "margin_dpo/margin_std": 1.6796950101852417, + "step": 301 + }, + { + "epoch": 0.4565381708238851, + "fcm_dpo/beta": 1.0574851036071777, + "fcm_dpo/delta": 0.3823769986629486, + "fcm_dpo/margin": 0.607899010181427, + "fcm_dpo/q_t": 0.3950553238391876, + "grad_norm": 319.1892395019531, + "learning_rate": 3.317669908293554e-07, + "logits/chosen": 0.06301631033420563, + "logits/rejected": 0.02415418066084385, + "logps/chosen": -68.90666198730469, + "logps/ref_chosen": -66.96650695800781, + "logps/ref_rejected": -88.09510803222656, + "logps/rejected": -90.64315795898438, + "loss": 1.2959, + "margin_dpo/margin_mean": 0.6078989505767822, + "margin_dpo/margin_std": 1.4545881748199463, + "step": 302 + }, + { + "epoch": 0.4580498866213152, + "fcm_dpo/beta": 0.9939075112342834, + "fcm_dpo/delta": -0.5528866052627563, + "fcm_dpo/margin": 1.478973388671875, + "fcm_dpo/q_t": 0.2767646014690399, + "grad_norm": 199.89747619628906, + "learning_rate": 3.3051635489464793e-07, + "logits/chosen": 0.17920634150505066, + "logits/rejected": 0.1289132535457611, + "logps/chosen": -63.91869354248047, + "logps/ref_chosen": -62.12152862548828, + "logps/ref_rejected": -90.31204223632812, + "logps/rejected": -93.58818817138672, + "loss": 0.8206, + "margin_dpo/margin_mean": 1.4789727926254272, + "margin_dpo/margin_std": 1.7652822732925415, + "step": 303 + }, + { + "epoch": 0.4595616024187453, + "fcm_dpo/beta": 0.9474884271621704, + "fcm_dpo/delta": -0.1770581603050232, + "fcm_dpo/margin": 1.2210001945495605, + "fcm_dpo/q_t": 0.2853131592273712, + "grad_norm": 196.20419311523438, + "learning_rate": 3.292634667444117e-07, + "logits/chosen": 0.14427047967910767, + "logits/rejected": 0.10413600504398346, + "logps/chosen": -62.46052551269531, + "logps/ref_chosen": -60.695091247558594, + "logps/ref_rejected": -78.2525405883789, + "logps/rejected": -81.23897552490234, + "loss": 0.7895, + "margin_dpo/margin_mean": 1.2210009098052979, + "margin_dpo/margin_std": 1.2481887340545654, + "step": 304 + }, + { + "epoch": 0.46107331821617537, + "fcm_dpo/beta": 0.9275550842285156, + "fcm_dpo/delta": 0.015897810459136963, + "fcm_dpo/margin": 1.0611162185668945, + "fcm_dpo/q_t": 0.3354414701461792, + "grad_norm": 219.49313354492188, + "learning_rate": 3.280083614246217e-07, + "logits/chosen": 0.09203135967254639, + "logits/rejected": 0.09808552265167236, + "logps/chosen": -74.84890747070312, + "logps/ref_chosen": -72.69914245605469, + "logps/ref_rejected": -65.65670776367188, + "logps/rejected": -68.86759185791016, + "loss": 1.0316, + "margin_dpo/margin_mean": 1.0611159801483154, + "margin_dpo/margin_std": 1.6295936107635498, + "step": 305 + }, + { + "epoch": 0.46258503401360546, + "fcm_dpo/beta": 0.9322211742401123, + "fcm_dpo/delta": 0.10965774953365326, + "fcm_dpo/margin": 0.958516001701355, + "fcm_dpo/q_t": 0.34834927320480347, + "grad_norm": 212.98146057128906, + "learning_rate": 3.267510740432719e-07, + "logits/chosen": 0.15844234824180603, + "logits/rejected": 0.07754644751548767, + "logps/chosen": -56.162513732910156, + "logps/ref_chosen": -53.97052764892578, + "logps/ref_rejected": -71.02423095703125, + "logps/rejected": -74.17473602294922, + "loss": 1.0813, + "margin_dpo/margin_mean": 0.9585161209106445, + "margin_dpo/margin_std": 1.579054355621338, + "step": 306 + }, + { + "epoch": 0.46409674981103555, + "fcm_dpo/beta": 0.9765808582305908, + "fcm_dpo/delta": 0.09200756251811981, + "fcm_dpo/margin": 0.9371168613433838, + "fcm_dpo/q_t": 0.35316699743270874, + "grad_norm": 227.36492919921875, + "learning_rate": 3.2549163976939285e-07, + "logits/chosen": 0.19748598337173462, + "logits/rejected": 0.16263772547245026, + "logps/chosen": -59.23182678222656, + "logps/ref_chosen": -57.413108825683594, + "logps/ref_rejected": -68.68010711669922, + "logps/rejected": -71.43594360351562, + "loss": 1.1199, + "margin_dpo/margin_mean": 0.9371169805526733, + "margin_dpo/margin_std": 1.6100661754608154, + "step": 307 + }, + { + "epoch": 0.4656084656084656, + "fcm_dpo/beta": 0.9509669542312622, + "fcm_dpo/delta": -0.15045209228992462, + "fcm_dpo/margin": 1.1912171840667725, + "fcm_dpo/q_t": 0.30430689454078674, + "grad_norm": 198.90879821777344, + "learning_rate": 3.2423009383206874e-07, + "logits/chosen": 0.14542096853256226, + "logits/rejected": 0.1255512833595276, + "logps/chosen": -68.51435852050781, + "logps/ref_chosen": -66.59879302978516, + "logps/ref_rejected": -74.337158203125, + "logps/rejected": -77.44393920898438, + "loss": 0.8821, + "margin_dpo/margin_mean": 1.1912175416946411, + "margin_dpo/margin_std": 1.509572982788086, + "step": 308 + }, + { + "epoch": 0.4671201814058957, + "fcm_dpo/beta": 0.9307016134262085, + "fcm_dpo/delta": -0.036133162677288055, + "fcm_dpo/margin": 1.1062381267547607, + "fcm_dpo/q_t": 0.32213109731674194, + "grad_norm": 279.2270812988281, + "learning_rate": 3.229664715194511e-07, + "logits/chosen": 0.1940372884273529, + "logits/rejected": 0.14977452158927917, + "logps/chosen": -67.56904602050781, + "logps/ref_chosen": -65.39474487304688, + "logps/ref_rejected": -75.70930480957031, + "logps/rejected": -78.98983764648438, + "loss": 0.926, + "margin_dpo/margin_mean": 1.1062389612197876, + "margin_dpo/margin_std": 1.470036506652832, + "step": 309 + }, + { + "epoch": 0.46863189720332576, + "fcm_dpo/beta": 0.9882631897926331, + "fcm_dpo/delta": 0.319682240486145, + "fcm_dpo/margin": 0.7099736928939819, + "fcm_dpo/q_t": 0.38903123140335083, + "grad_norm": 270.8043518066406, + "learning_rate": 3.2170080817777257e-07, + "logits/chosen": 0.19459328055381775, + "logits/rejected": 0.18906690180301666, + "logps/chosen": -76.8685302734375, + "logps/ref_chosen": -74.66827392578125, + "logps/ref_rejected": -80.5689697265625, + "logps/rejected": -83.47919464111328, + "loss": 1.2064, + "margin_dpo/margin_mean": 0.709973931312561, + "margin_dpo/margin_std": 1.481546401977539, + "step": 310 + }, + { + "epoch": 0.47014361300075586, + "fcm_dpo/beta": 0.997173547744751, + "fcm_dpo/delta": -0.014330286532640457, + "fcm_dpo/margin": 1.015620231628418, + "fcm_dpo/q_t": 0.33949679136276245, + "grad_norm": 241.86203002929688, + "learning_rate": 3.204331392103574e-07, + "logits/chosen": 0.11166363954544067, + "logits/rejected": 0.02732861414551735, + "logps/chosen": -61.56627655029297, + "logps/ref_chosen": -59.738033294677734, + "logps/ref_rejected": -93.60757446289062, + "logps/rejected": -96.4514389038086, + "loss": 1.1047, + "margin_dpo/margin_mean": 1.0156208276748657, + "margin_dpo/margin_std": 1.744983434677124, + "step": 311 + }, + { + "epoch": 0.47165532879818595, + "fcm_dpo/beta": 1.0016117095947266, + "fcm_dpo/delta": -0.043600842356681824, + "fcm_dpo/margin": 1.0361428260803223, + "fcm_dpo/q_t": 0.3195294141769409, + "grad_norm": 267.0063781738281, + "learning_rate": 3.1916350007663176e-07, + "logits/chosen": 0.16160011291503906, + "logits/rejected": 0.08802653849124908, + "logps/chosen": -55.83079528808594, + "logps/ref_chosen": -53.816436767578125, + "logps/ref_rejected": -68.6575698852539, + "logps/rejected": -71.70807647705078, + "loss": 1.0571, + "margin_dpo/margin_mean": 1.0361424684524536, + "margin_dpo/margin_std": 1.636512041091919, + "step": 312 + }, + { + "epoch": 0.47316704459561604, + "fcm_dpo/beta": 0.9808007478713989, + "fcm_dpo/delta": -0.0073838010430336, + "fcm_dpo/margin": 1.0249512195587158, + "fcm_dpo/q_t": 0.3501819968223572, + "grad_norm": 224.44908142089844, + "learning_rate": 3.178919262911314e-07, + "logits/chosen": 0.19469937682151794, + "logits/rejected": 0.17493098974227905, + "logps/chosen": -61.82642364501953, + "logps/ref_chosen": -59.957359313964844, + "logps/ref_rejected": -69.31729888916016, + "logps/rejected": -72.21131896972656, + "loss": 1.077, + "margin_dpo/margin_mean": 1.0249509811401367, + "margin_dpo/margin_std": 1.7490208148956299, + "step": 313 + }, + { + "epoch": 0.47467876039304613, + "fcm_dpo/beta": 0.9450622200965881, + "fcm_dpo/delta": -0.23715783655643463, + "fcm_dpo/margin": 1.274531364440918, + "fcm_dpo/q_t": 0.2915083169937134, + "grad_norm": 200.15219116210938, + "learning_rate": 3.166184534225087e-07, + "logits/chosen": 0.16370022296905518, + "logits/rejected": 0.17338353395462036, + "logps/chosen": -72.26028442382812, + "logps/ref_chosen": -70.26815795898438, + "logps/ref_rejected": -69.23971557617188, + "logps/rejected": -72.50637817382812, + "loss": 0.8243, + "margin_dpo/margin_mean": 1.2745311260223389, + "margin_dpo/margin_std": 1.4535917043685913, + "step": 314 + }, + { + "epoch": 0.47619047619047616, + "fcm_dpo/beta": 0.9470343589782715, + "fcm_dpo/delta": -0.0004953928291797638, + "fcm_dpo/margin": 1.0559828281402588, + "fcm_dpo/q_t": 0.328433096408844, + "grad_norm": 213.31190490722656, + "learning_rate": 3.1534311709253723e-07, + "logits/chosen": 0.09684689342975616, + "logits/rejected": 0.060719601809978485, + "logps/chosen": -69.97232055664062, + "logps/ref_chosen": -67.79469299316406, + "logps/ref_rejected": -74.55148315429688, + "logps/rejected": -77.78509521484375, + "loss": 0.9555, + "margin_dpo/margin_mean": 1.0559827089309692, + "margin_dpo/margin_std": 1.4584524631500244, + "step": 315 + }, + { + "epoch": 0.47770219198790626, + "fcm_dpo/beta": 0.9314021468162537, + "fcm_dpo/delta": -0.23499611020088196, + "fcm_dpo/margin": 1.2908413410186768, + "fcm_dpo/q_t": 0.3175312876701355, + "grad_norm": 208.80650329589844, + "learning_rate": 3.1406595297511564e-07, + "logits/chosen": 0.08339610695838928, + "logits/rejected": 0.005085200071334839, + "logps/chosen": -57.207271575927734, + "logps/ref_chosen": -55.288482666015625, + "logps/ref_rejected": -96.15723419189453, + "logps/rejected": -99.36686706542969, + "loss": 0.9423, + "margin_dpo/margin_mean": 1.290840983390808, + "margin_dpo/margin_std": 1.7332630157470703, + "step": 316 + }, + { + "epoch": 0.47921390778533635, + "fcm_dpo/beta": 0.8467363119125366, + "fcm_dpo/delta": -0.33823323249816895, + "fcm_dpo/margin": 1.5226337909698486, + "fcm_dpo/q_t": 0.2754213809967041, + "grad_norm": 168.14048767089844, + "learning_rate": 3.1278699679526975e-07, + "logits/chosen": 0.2033698856830597, + "logits/rejected": 0.1626172512769699, + "logps/chosen": -56.44146728515625, + "logps/ref_chosen": -54.58137512207031, + "logps/ref_rejected": -72.77232360839844, + "logps/rejected": -76.15504455566406, + "loss": 0.7402, + "margin_dpo/margin_mean": 1.5226335525512695, + "margin_dpo/margin_std": 1.537990689277649, + "step": 317 + }, + { + "epoch": 0.48072562358276644, + "fcm_dpo/beta": 0.880409836769104, + "fcm_dpo/delta": 0.30436062812805176, + "fcm_dpo/margin": 0.8145400285720825, + "fcm_dpo/q_t": 0.3881235420703888, + "grad_norm": 241.8609619140625, + "learning_rate": 3.1150628432815336e-07, + "logits/chosen": 0.21144279837608337, + "logits/rejected": 0.1659296602010727, + "logps/chosen": -55.05265426635742, + "logps/ref_chosen": -52.88822937011719, + "logps/ref_rejected": -80.63988494873047, + "logps/rejected": -83.61885070800781, + "loss": 1.3193, + "margin_dpo/margin_mean": 0.8145396709442139, + "margin_dpo/margin_std": 1.8883013725280762, + "step": 318 + }, + { + "epoch": 0.48223733938019653, + "fcm_dpo/beta": 0.8839104771614075, + "fcm_dpo/delta": -0.04694606736302376, + "fcm_dpo/margin": 1.1784477233886719, + "fcm_dpo/q_t": 0.3256542682647705, + "grad_norm": 204.67640686035156, + "learning_rate": 3.1022385139804707e-07, + "logits/chosen": 0.14550672471523285, + "logits/rejected": 0.12916123867034912, + "logps/chosen": -66.23543548583984, + "logps/ref_chosen": -64.36333465576172, + "logps/ref_rejected": -79.47296142578125, + "logps/rejected": -82.52351379394531, + "loss": 1.0132, + "margin_dpo/margin_mean": 1.1784476041793823, + "margin_dpo/margin_std": 1.7645900249481201, + "step": 319 + }, + { + "epoch": 0.4837490551776266, + "fcm_dpo/beta": 0.9131995439529419, + "fcm_dpo/delta": 0.07204453647136688, + "fcm_dpo/margin": 1.013108253479004, + "fcm_dpo/q_t": 0.37524113059043884, + "grad_norm": 217.01268005371094, + "learning_rate": 3.0893973387735683e-07, + "logits/chosen": 0.055543892085552216, + "logits/rejected": 0.023240717127919197, + "logps/chosen": -51.213958740234375, + "logps/ref_chosen": -49.558746337890625, + "logps/ref_rejected": -71.23444366455078, + "logps/rejected": -73.90277099609375, + "loss": 1.1825, + "margin_dpo/margin_mean": 1.0131080150604248, + "margin_dpo/margin_std": 1.9926589727401733, + "step": 320 + }, + { + "epoch": 0.4852607709750567, + "fcm_dpo/beta": 0.8980337381362915, + "fcm_dpo/delta": 0.061922501772642136, + "fcm_dpo/margin": 1.0499402284622192, + "fcm_dpo/q_t": 0.33765465021133423, + "grad_norm": 189.1380615234375, + "learning_rate": 3.0765396768561004e-07, + "logits/chosen": 0.11102212965488434, + "logits/rejected": 0.09998691082000732, + "logps/chosen": -54.228790283203125, + "logps/ref_chosen": -52.08526611328125, + "logps/ref_rejected": -55.58674621582031, + "logps/rejected": -58.78020477294922, + "loss": 1.0637, + "margin_dpo/margin_mean": 1.0499403476715088, + "margin_dpo/margin_std": 1.613488793373108, + "step": 321 + }, + { + "epoch": 0.48677248677248675, + "fcm_dpo/beta": 0.8880925178527832, + "fcm_dpo/delta": -0.23487280309200287, + "fcm_dpo/margin": 1.3585911989212036, + "fcm_dpo/q_t": 0.28043854236602783, + "grad_norm": 176.82778930664062, + "learning_rate": 3.063665887884511e-07, + "logits/chosen": 0.17297013103961945, + "logits/rejected": 0.1158091127872467, + "logps/chosen": -49.436309814453125, + "logps/ref_chosen": -47.404109954833984, + "logps/ref_rejected": -73.4260025024414, + "logps/rejected": -76.81678771972656, + "loss": 0.7812, + "margin_dpo/margin_mean": 1.3585913181304932, + "margin_dpo/margin_std": 1.383784532546997, + "step": 322 + }, + { + "epoch": 0.48828420256991684, + "fcm_dpo/beta": 0.8809718489646912, + "fcm_dpo/delta": -0.021233975887298584, + "fcm_dpo/margin": 1.149742841720581, + "fcm_dpo/q_t": 0.359465092420578, + "grad_norm": 248.77261352539062, + "learning_rate": 3.0507763319663517e-07, + "logits/chosen": 0.10760500282049179, + "logits/rejected": 0.054044321179389954, + "logps/chosen": -72.1104736328125, + "logps/ref_chosen": -70.00630187988281, + "logps/ref_rejected": -86.96690368652344, + "logps/rejected": -90.22081756591797, + "loss": 1.1742, + "margin_dpo/margin_mean": 1.1497416496276855, + "margin_dpo/margin_std": 2.1441869735717773, + "step": 323 + }, + { + "epoch": 0.4897959183673469, + "fcm_dpo/beta": 0.8546582460403442, + "fcm_dpo/delta": -0.05345672369003296, + "fcm_dpo/margin": 1.2257628440856934, + "fcm_dpo/q_t": 0.3085279166698456, + "grad_norm": 156.60922241210938, + "learning_rate": 3.0378713696502097e-07, + "logits/chosen": 0.18619199097156525, + "logits/rejected": 0.1414175033569336, + "logps/chosen": -57.747230529785156, + "logps/ref_chosen": -55.88882064819336, + "logps/ref_rejected": -75.23088073730469, + "logps/rejected": -78.31505584716797, + "loss": 0.8287, + "margin_dpo/margin_mean": 1.2257624864578247, + "margin_dpo/margin_std": 1.363228678703308, + "step": 324 + }, + { + "epoch": 0.491307634164777, + "fcm_dpo/beta": 0.8600409626960754, + "fcm_dpo/delta": 0.09678801149129868, + "fcm_dpo/margin": 1.0598913431167603, + "fcm_dpo/q_t": 0.3425368666648865, + "grad_norm": 200.6235809326172, + "learning_rate": 3.0249513619156206e-07, + "logits/chosen": 0.1525503695011139, + "logits/rejected": 0.1075083315372467, + "logps/chosen": -66.32670593261719, + "logps/ref_chosen": -64.14701843261719, + "logps/ref_rejected": -79.91143798828125, + "logps/rejected": -83.15100860595703, + "loss": 0.9779, + "margin_dpo/margin_mean": 1.059891700744629, + "margin_dpo/margin_std": 1.5498141050338745, + "step": 325 + }, + { + "epoch": 0.4928193499622071, + "fcm_dpo/beta": 0.9292858242988586, + "fcm_dpo/delta": 0.4350077509880066, + "fcm_dpo/margin": 0.6357402801513672, + "fcm_dpo/q_t": 0.40372180938720703, + "grad_norm": 280.2021179199219, + "learning_rate": 3.012016670162977e-07, + "logits/chosen": 0.11779944598674774, + "logits/rejected": 0.11533387750387192, + "logps/chosen": -78.00666046142578, + "logps/ref_chosen": -75.53131103515625, + "logps/ref_rejected": -76.5898666381836, + "logps/rejected": -79.70095825195312, + "loss": 1.3715, + "margin_dpo/margin_mean": 0.6357403993606567, + "margin_dpo/margin_std": 1.7088639736175537, + "step": 326 + }, + { + "epoch": 0.4943310657596372, + "fcm_dpo/beta": 0.9441518783569336, + "fcm_dpo/delta": -0.06476020067930222, + "fcm_dpo/margin": 1.120490312576294, + "fcm_dpo/q_t": 0.32473599910736084, + "grad_norm": 212.27267456054688, + "learning_rate": 2.99906765620341e-07, + "logits/chosen": 0.09260990470647812, + "logits/rejected": 0.054583217948675156, + "logps/chosen": -71.31211853027344, + "logps/ref_chosen": -69.33717346191406, + "logps/ref_rejected": -73.37751770019531, + "logps/rejected": -76.47296142578125, + "loss": 1.0397, + "margin_dpo/margin_mean": 1.1204906702041626, + "margin_dpo/margin_std": 1.7054201364517212, + "step": 327 + }, + { + "epoch": 0.4958427815570673, + "fcm_dpo/beta": 0.938388466835022, + "fcm_dpo/delta": -0.004852544516324997, + "fcm_dpo/margin": 1.0702922344207764, + "fcm_dpo/q_t": 0.3356061577796936, + "grad_norm": 220.39108276367188, + "learning_rate": 2.9861046822486766e-07, + "logits/chosen": 0.1284867823123932, + "logits/rejected": 0.10963472723960876, + "logps/chosen": -63.444252014160156, + "logps/ref_chosen": -61.70623016357422, + "logps/ref_rejected": -83.73808288574219, + "logps/rejected": -86.54638671875, + "loss": 0.9952, + "margin_dpo/margin_mean": 1.0702919960021973, + "margin_dpo/margin_std": 1.561734914779663, + "step": 328 + }, + { + "epoch": 0.4973544973544973, + "fcm_dpo/beta": 0.926159143447876, + "fcm_dpo/delta": 0.030632048845291138, + "fcm_dpo/margin": 1.044736385345459, + "fcm_dpo/q_t": 0.36076274514198303, + "grad_norm": 272.3174743652344, + "learning_rate": 2.9731281109010253e-07, + "logits/chosen": 0.165533185005188, + "logits/rejected": 0.12572559714317322, + "logps/chosen": -66.76375579833984, + "logps/ref_chosen": -64.4984130859375, + "logps/ref_rejected": -83.6591796875, + "logps/rejected": -86.96925354003906, + "loss": 1.169, + "margin_dpo/margin_mean": 1.0447362661361694, + "margin_dpo/margin_std": 1.9857655763626099, + "step": 329 + }, + { + "epoch": 0.4988662131519274, + "fcm_dpo/beta": 0.9391987323760986, + "fcm_dpo/delta": -0.06099821627140045, + "fcm_dpo/margin": 1.122628927230835, + "fcm_dpo/q_t": 0.32373300194740295, + "grad_norm": 196.6431427001953, + "learning_rate": 2.9601383051430505e-07, + "logits/chosen": 0.13443490862846375, + "logits/rejected": 0.08146592229604721, + "logps/chosen": -56.5888671875, + "logps/ref_chosen": -54.80464172363281, + "logps/ref_rejected": -75.3194351196289, + "logps/rejected": -78.22628784179688, + "loss": 1.0489, + "margin_dpo/margin_mean": 1.1226279735565186, + "margin_dpo/margin_std": 1.7134172916412354, + "step": 330 + }, + { + "epoch": 0.5003779289493575, + "fcm_dpo/beta": 0.8843910694122314, + "fcm_dpo/delta": -0.29690316319465637, + "fcm_dpo/margin": 1.4203097820281982, + "fcm_dpo/q_t": 0.29872971773147583, + "grad_norm": 204.7743377685547, + "learning_rate": 2.947135628327544e-07, + "logits/chosen": 0.24195344746112823, + "logits/rejected": 0.2168186902999878, + "logps/chosen": -61.22477340698242, + "logps/ref_chosen": -59.242584228515625, + "logps/ref_rejected": -69.87483215332031, + "logps/rejected": -73.27733612060547, + "loss": 0.8837, + "margin_dpo/margin_mean": 1.4203091859817505, + "margin_dpo/margin_std": 1.860417127609253, + "step": 331 + }, + { + "epoch": 0.5018896447467877, + "fcm_dpo/beta": 0.8774402141571045, + "fcm_dpo/delta": -0.07574308663606644, + "fcm_dpo/margin": 1.2151854038238525, + "fcm_dpo/q_t": 0.320780485868454, + "grad_norm": 200.2390899658203, + "learning_rate": 2.934120444167326e-07, + "logits/chosen": 0.1037403866648674, + "logits/rejected": 0.0657915323972702, + "logps/chosen": -69.155517578125, + "logps/ref_chosen": -67.10975646972656, + "logps/ref_rejected": -77.11839294433594, + "logps/rejected": -80.37932586669922, + "loss": 0.9145, + "margin_dpo/margin_mean": 1.2151854038238525, + "margin_dpo/margin_std": 1.6321237087249756, + "step": 332 + }, + { + "epoch": 0.5034013605442177, + "fcm_dpo/beta": 0.8272979259490967, + "fcm_dpo/delta": -0.3091200590133667, + "fcm_dpo/margin": 1.535266637802124, + "fcm_dpo/q_t": 0.2927742302417755, + "grad_norm": 172.9521942138672, + "learning_rate": 2.921093116725076e-07, + "logits/chosen": 0.1792515516281128, + "logits/rejected": 0.12908412516117096, + "logps/chosen": -60.283103942871094, + "logps/ref_chosen": -58.381134033203125, + "logps/ref_rejected": -85.02839660644531, + "logps/rejected": -88.46562957763672, + "loss": 0.8145, + "margin_dpo/margin_mean": 1.5352662801742554, + "margin_dpo/margin_std": 1.7731688022613525, + "step": 333 + }, + { + "epoch": 0.5049130763416477, + "fcm_dpo/beta": 0.8177670240402222, + "fcm_dpo/delta": 0.09361538290977478, + "fcm_dpo/margin": 1.1182286739349365, + "fcm_dpo/q_t": 0.3452100455760956, + "grad_norm": 200.41497802734375, + "learning_rate": 2.9080540104031484e-07, + "logits/chosen": 0.16784140467643738, + "logits/rejected": 0.13531029224395752, + "logps/chosen": -69.00025939941406, + "logps/ref_chosen": -66.89199829101562, + "logps/ref_rejected": -91.83695220947266, + "logps/rejected": -95.06344604492188, + "loss": 1.063, + "margin_dpo/margin_mean": 1.118227481842041, + "margin_dpo/margin_std": 1.7574753761291504, + "step": 334 + }, + { + "epoch": 0.5064247921390779, + "fcm_dpo/beta": 0.8364279866218567, + "fcm_dpo/delta": -0.04344947636127472, + "fcm_dpo/margin": 1.23694908618927, + "fcm_dpo/q_t": 0.32282981276512146, + "grad_norm": 176.56353759765625, + "learning_rate": 2.895003489933375e-07, + "logits/chosen": 0.15401214361190796, + "logits/rejected": 0.12541456520557404, + "logps/chosen": -63.48824691772461, + "logps/ref_chosen": -61.51445770263672, + "logps/ref_rejected": -75.68916320800781, + "logps/rejected": -78.89990234375, + "loss": 0.9844, + "margin_dpo/margin_mean": 1.2369496822357178, + "margin_dpo/margin_std": 1.6888550519943237, + "step": 335 + }, + { + "epoch": 0.5079365079365079, + "fcm_dpo/beta": 0.8125041127204895, + "fcm_dpo/delta": 0.06346192955970764, + "fcm_dpo/margin": 1.1519867181777954, + "fcm_dpo/q_t": 0.35848677158355713, + "grad_norm": 223.40049743652344, + "learning_rate": 2.8819419203668675e-07, + "logits/chosen": 0.09962709248065948, + "logits/rejected": 0.0864500105381012, + "logps/chosen": -71.24321746826172, + "logps/ref_chosen": -68.85006713867188, + "logps/ref_rejected": -92.99603271484375, + "logps/rejected": -96.54116821289062, + "loss": 1.0724, + "margin_dpo/margin_mean": 1.151987075805664, + "margin_dpo/margin_std": 1.9282138347625732, + "step": 336 + }, + { + "epoch": 0.509448223733938, + "fcm_dpo/beta": 0.8386461734771729, + "fcm_dpo/delta": 0.09305550158023834, + "fcm_dpo/margin": 1.0915064811706543, + "fcm_dpo/q_t": 0.3443507254123688, + "grad_norm": 215.70370483398438, + "learning_rate": 2.8688696670638053e-07, + "logits/chosen": 0.08634011447429657, + "logits/rejected": 0.05762971565127373, + "logps/chosen": -75.31697845458984, + "logps/ref_chosen": -73.18783569335938, + "logps/ref_rejected": -86.89118957519531, + "logps/rejected": -90.11184692382812, + "loss": 1.0527, + "margin_dpo/margin_mean": 1.0915067195892334, + "margin_dpo/margin_std": 1.7208020687103271, + "step": 337 + }, + { + "epoch": 0.5109599395313681, + "fcm_dpo/beta": 0.8704172372817993, + "fcm_dpo/delta": 0.1373847872018814, + "fcm_dpo/margin": 1.0027812719345093, + "fcm_dpo/q_t": 0.3466625213623047, + "grad_norm": 219.7271270751953, + "learning_rate": 2.8557870956832133e-07, + "logits/chosen": 0.11361702531576157, + "logits/rejected": 0.09130830317735672, + "logps/chosen": -66.18433380126953, + "logps/ref_chosen": -63.939613342285156, + "logps/ref_rejected": -75.34243774414062, + "logps/rejected": -78.58993530273438, + "loss": 1.0112, + "margin_dpo/margin_mean": 1.0027809143066406, + "margin_dpo/margin_std": 1.4872009754180908, + "step": 338 + }, + { + "epoch": 0.5124716553287982, + "fcm_dpo/beta": 0.8656154274940491, + "fcm_dpo/delta": 0.016111478209495544, + "fcm_dpo/margin": 1.137109398841858, + "fcm_dpo/q_t": 0.3326491713523865, + "grad_norm": 190.49337768554688, + "learning_rate": 2.842694572172736e-07, + "logits/chosen": 0.19113630056381226, + "logits/rejected": 0.12893235683441162, + "logps/chosen": -47.86590576171875, + "logps/ref_chosen": -45.54913330078125, + "logps/ref_rejected": -67.0482177734375, + "logps/rejected": -70.50209045410156, + "loss": 0.958, + "margin_dpo/margin_mean": 1.137109398841858, + "margin_dpo/margin_std": 1.5872113704681396, + "step": 339 + }, + { + "epoch": 0.5139833711262283, + "fcm_dpo/beta": 0.8644633889198303, + "fcm_dpo/delta": -0.16399508714675903, + "fcm_dpo/margin": 1.3231072425842285, + "fcm_dpo/q_t": 0.3184114396572113, + "grad_norm": 165.63905334472656, + "learning_rate": 2.8295924627584004e-07, + "logits/chosen": 0.14443045854568481, + "logits/rejected": 0.12298154830932617, + "logps/chosen": -56.29311752319336, + "logps/ref_chosen": -54.00564956665039, + "logps/ref_rejected": -61.314430236816406, + "logps/rejected": -64.92501068115234, + "loss": 1.0075, + "margin_dpo/margin_mean": 1.3231074810028076, + "margin_dpo/margin_std": 1.933672547340393, + "step": 340 + }, + { + "epoch": 0.5154950869236583, + "fcm_dpo/beta": 0.8116443157196045, + "fcm_dpo/delta": -0.10012944042682648, + "fcm_dpo/margin": 1.3234137296676636, + "fcm_dpo/q_t": 0.31874704360961914, + "grad_norm": 227.253662109375, + "learning_rate": 2.816481133934373e-07, + "logits/chosen": 0.1503913700580597, + "logits/rejected": 0.11632785201072693, + "logps/chosen": -65.45622253417969, + "logps/ref_chosen": -63.39509582519531, + "logps/ref_rejected": -76.20973205566406, + "logps/rejected": -79.59427642822266, + "loss": 0.9461, + "margin_dpo/margin_mean": 1.3234134912490845, + "margin_dpo/margin_std": 1.674351453781128, + "step": 341 + }, + { + "epoch": 0.5170068027210885, + "fcm_dpo/beta": 0.8011665940284729, + "fcm_dpo/delta": -0.21002721786499023, + "fcm_dpo/margin": 1.4784516096115112, + "fcm_dpo/q_t": 0.30981749296188354, + "grad_norm": 152.3086700439453, + "learning_rate": 2.8033609524527046e-07, + "logits/chosen": 0.17057004570960999, + "logits/rejected": 0.13639067113399506, + "logps/chosen": -55.32265853881836, + "logps/ref_chosen": -53.047813415527344, + "logps/ref_rejected": -68.2854232788086, + "logps/rejected": -72.03872680664062, + "loss": 0.8854, + "margin_dpo/margin_mean": 1.4784512519836426, + "margin_dpo/margin_std": 1.8578169345855713, + "step": 342 + }, + { + "epoch": 0.5185185185185185, + "fcm_dpo/beta": 0.8318637609481812, + "fcm_dpo/delta": 0.3891153335571289, + "fcm_dpo/margin": 0.7660273909568787, + "fcm_dpo/q_t": 0.38361668586730957, + "grad_norm": 204.89308166503906, + "learning_rate": 2.7902322853130753e-07, + "logits/chosen": 0.08965672552585602, + "logits/rejected": 0.08724290132522583, + "logps/chosen": -72.62460327148438, + "logps/ref_chosen": -70.57852935791016, + "logps/ref_rejected": -84.73873901367188, + "logps/rejected": -87.55084228515625, + "loss": 1.2263, + "margin_dpo/margin_mean": 0.7660267353057861, + "margin_dpo/margin_std": 1.6297626495361328, + "step": 343 + }, + { + "epoch": 0.5200302343159486, + "fcm_dpo/beta": 0.8591570854187012, + "fcm_dpo/delta": 0.02412712574005127, + "fcm_dpo/margin": 1.1384611129760742, + "fcm_dpo/q_t": 0.3305787742137909, + "grad_norm": 197.9630584716797, + "learning_rate": 2.7770954997525274e-07, + "logits/chosen": 0.15758341550827026, + "logits/rejected": 0.11625611782073975, + "logps/chosen": -58.18259811401367, + "logps/ref_chosen": -55.811004638671875, + "logps/ref_rejected": -84.77637481689453, + "logps/rejected": -88.28643035888672, + "loss": 0.982, + "margin_dpo/margin_mean": 1.138460636138916, + "margin_dpo/margin_std": 1.6236982345581055, + "step": 344 + }, + { + "epoch": 0.5215419501133787, + "fcm_dpo/beta": 0.8624707460403442, + "fcm_dpo/delta": 0.05118731036782265, + "fcm_dpo/margin": 1.1047433614730835, + "fcm_dpo/q_t": 0.3428131341934204, + "grad_norm": 181.71697998046875, + "learning_rate": 2.7639509632351927e-07, + "logits/chosen": 0.2466953694820404, + "logits/rejected": 0.2128785401582718, + "logps/chosen": -59.707969665527344, + "logps/ref_chosen": -57.78609848022461, + "logps/ref_rejected": -78.91847229003906, + "logps/rejected": -81.94508361816406, + "loss": 1.0228, + "margin_dpo/margin_mean": 1.1047430038452148, + "margin_dpo/margin_std": 1.6839402914047241, + "step": 345 + }, + { + "epoch": 0.5230536659108088, + "fcm_dpo/beta": 0.8550155162811279, + "fcm_dpo/delta": -0.15863507986068726, + "fcm_dpo/margin": 1.334245204925537, + "fcm_dpo/q_t": 0.3107675015926361, + "grad_norm": 217.05181884765625, + "learning_rate": 2.7507990434420123e-07, + "logits/chosen": 0.18394121527671814, + "logits/rejected": 0.13396984338760376, + "logps/chosen": -58.29369354248047, + "logps/ref_chosen": -56.285125732421875, + "logps/ref_rejected": -91.15303039550781, + "logps/rejected": -94.495849609375, + "loss": 0.9038, + "margin_dpo/margin_mean": 1.334245204925537, + "margin_dpo/margin_std": 1.7104822397232056, + "step": 346 + }, + { + "epoch": 0.5245653817082389, + "fcm_dpo/beta": 0.8462698459625244, + "fcm_dpo/delta": 0.07013484090566635, + "fcm_dpo/margin": 1.1055729389190674, + "fcm_dpo/q_t": 0.34362083673477173, + "grad_norm": 187.4880828857422, + "learning_rate": 2.737640108260456e-07, + "logits/chosen": 0.2319878786802292, + "logits/rejected": 0.19426073133945465, + "logps/chosen": -55.76850891113281, + "logps/ref_chosen": -53.499542236328125, + "logps/ref_rejected": -72.52565002441406, + "logps/rejected": -75.90019226074219, + "loss": 1.0505, + "margin_dpo/margin_mean": 1.1055727005004883, + "margin_dpo/margin_std": 1.7748844623565674, + "step": 347 + }, + { + "epoch": 0.5260770975056689, + "fcm_dpo/beta": 0.8080967664718628, + "fcm_dpo/delta": -0.3646644949913025, + "fcm_dpo/margin": 1.6269646883010864, + "fcm_dpo/q_t": 0.3286696970462799, + "grad_norm": 163.6527099609375, + "learning_rate": 2.724474525774229e-07, + "logits/chosen": 0.23792192339897156, + "logits/rejected": 0.21459215879440308, + "logps/chosen": -52.67319107055664, + "logps/ref_chosen": -50.78684997558594, + "logps/ref_rejected": -68.63732147216797, + "logps/rejected": -72.15061950683594, + "loss": 0.9542, + "margin_dpo/margin_mean": 1.6269644498825073, + "margin_dpo/margin_std": 2.543069362640381, + "step": 348 + }, + { + "epoch": 0.527588813303099, + "fcm_dpo/beta": 0.779322624206543, + "fcm_dpo/delta": -0.11380349844694138, + "fcm_dpo/margin": 1.411959171295166, + "fcm_dpo/q_t": 0.3116574287414551, + "grad_norm": 173.95448303222656, + "learning_rate": 2.711302664252973e-07, + "logits/chosen": 0.16869422793388367, + "logits/rejected": 0.10714869201183319, + "logps/chosen": -55.361000061035156, + "logps/ref_chosen": -53.325008392333984, + "logps/ref_rejected": -83.21236419677734, + "logps/rejected": -86.66030883789062, + "loss": 0.9061, + "margin_dpo/margin_mean": 1.4119596481323242, + "margin_dpo/margin_std": 1.8286110162734985, + "step": 349 + }, + { + "epoch": 0.5291005291005291, + "fcm_dpo/beta": 0.7421770095825195, + "fcm_dpo/delta": -0.2771596610546112, + "fcm_dpo/margin": 1.6710288524627686, + "fcm_dpo/q_t": 0.29638049006462097, + "grad_norm": 182.74623107910156, + "learning_rate": 2.698124892141971e-07, + "logits/chosen": 0.1460535228252411, + "logits/rejected": 0.09140360355377197, + "logps/chosen": -63.824440002441406, + "logps/ref_chosen": -61.625770568847656, + "logps/ref_rejected": -87.63627624511719, + "logps/rejected": -91.50596618652344, + "loss": 0.8304, + "margin_dpo/margin_mean": 1.6710278987884521, + "margin_dpo/margin_std": 1.9888241291046143, + "step": 350 + }, + { + "epoch": 0.5306122448979592, + "fcm_dpo/beta": 0.7518589496612549, + "fcm_dpo/delta": 0.1678137630224228, + "fcm_dpo/margin": 1.1261451244354248, + "fcm_dpo/q_t": 0.33188802003860474, + "grad_norm": 165.69610595703125, + "learning_rate": 2.6849415780518357e-07, + "logits/chosen": 0.09675121307373047, + "logits/rejected": 0.0414729006588459, + "logps/chosen": -58.45286178588867, + "logps/ref_chosen": -56.2563362121582, + "logps/ref_rejected": -79.11589813232422, + "logps/rejected": -82.43856811523438, + "loss": 1.0476, + "margin_dpo/margin_mean": 1.1261451244354248, + "margin_dpo/margin_std": 1.6900222301483154, + "step": 351 + }, + { + "epoch": 0.5321239606953893, + "fcm_dpo/beta": 0.7760653495788574, + "fcm_dpo/delta": 0.16433821618556976, + "fcm_dpo/margin": 1.0951128005981445, + "fcm_dpo/q_t": 0.35313987731933594, + "grad_norm": 193.21861267089844, + "learning_rate": 2.6717530907482024e-07, + "logits/chosen": 0.16212013363838196, + "logits/rejected": 0.12353149801492691, + "logps/chosen": -65.12254333496094, + "logps/ref_chosen": -63.05195236206055, + "logps/ref_rejected": -85.52035522460938, + "logps/rejected": -88.68605041503906, + "loss": 0.9931, + "margin_dpo/margin_mean": 1.0951130390167236, + "margin_dpo/margin_std": 1.6728229522705078, + "step": 352 + }, + { + "epoch": 0.5336356764928194, + "fcm_dpo/beta": 0.7995505332946777, + "fcm_dpo/delta": 0.018024399876594543, + "fcm_dpo/margin": 1.2259821891784668, + "fcm_dpo/q_t": 0.33103376626968384, + "grad_norm": 166.7522430419922, + "learning_rate": 2.658559799141411e-07, + "logits/chosen": 0.1692410409450531, + "logits/rejected": 0.16683810949325562, + "logps/chosen": -71.08948516845703, + "logps/ref_chosen": -69.00918579101562, + "logps/ref_rejected": -72.65840148925781, + "logps/rejected": -75.96468353271484, + "loss": 1.0033, + "margin_dpo/margin_mean": 1.2259814739227295, + "margin_dpo/margin_std": 1.810058832168579, + "step": 353 + }, + { + "epoch": 0.5351473922902494, + "fcm_dpo/beta": 0.7726951837539673, + "fcm_dpo/delta": -0.05429168790578842, + "fcm_dpo/margin": 1.3514502048492432, + "fcm_dpo/q_t": 0.319754034280777, + "grad_norm": 218.87094116210938, + "learning_rate": 2.6453620722761895e-07, + "logits/chosen": 0.20377589762210846, + "logits/rejected": 0.10091987252235413, + "logps/chosen": -41.94265365600586, + "logps/ref_chosen": -39.78833770751953, + "logps/ref_rejected": -69.56885528564453, + "logps/rejected": -73.07461547851562, + "loss": 0.93, + "margin_dpo/margin_mean": 1.3514502048492432, + "margin_dpo/margin_std": 1.7668390274047852, + "step": 354 + }, + { + "epoch": 0.5366591080876795, + "fcm_dpo/beta": 0.7757099866867065, + "fcm_dpo/delta": -0.04570357874035835, + "fcm_dpo/margin": 1.3419021368026733, + "fcm_dpo/q_t": 0.32356125116348267, + "grad_norm": 184.73477172851562, + "learning_rate": 2.632160279321328e-07, + "logits/chosen": 0.14926283061504364, + "logits/rejected": 0.0633564367890358, + "logps/chosen": -48.385475158691406, + "logps/ref_chosen": -46.25537872314453, + "logps/ref_rejected": -78.20236206054688, + "logps/rejected": -81.67436218261719, + "loss": 0.9503, + "margin_dpo/margin_mean": 1.3419021368026733, + "margin_dpo/margin_std": 1.8027377128601074, + "step": 355 + }, + { + "epoch": 0.5381708238851096, + "fcm_dpo/beta": 0.7802422046661377, + "fcm_dpo/delta": 0.09977808594703674, + "fcm_dpo/margin": 1.165019154548645, + "fcm_dpo/q_t": 0.34947988390922546, + "grad_norm": 180.77276611328125, + "learning_rate": 2.618954789559356e-07, + "logits/chosen": 0.15579620003700256, + "logits/rejected": 0.1042039543390274, + "logps/chosen": -50.003562927246094, + "logps/ref_chosen": -47.906158447265625, + "logps/ref_rejected": -74.29397583007812, + "logps/rejected": -77.556396484375, + "loss": 1.1606, + "margin_dpo/margin_mean": 1.1650193929672241, + "margin_dpo/margin_std": 2.1200852394104004, + "step": 356 + }, + { + "epoch": 0.5396825396825397, + "fcm_dpo/beta": 0.7977977991104126, + "fcm_dpo/delta": 0.14315135776996613, + "fcm_dpo/margin": 1.0884279012680054, + "fcm_dpo/q_t": 0.3529171347618103, + "grad_norm": 215.51937866210938, + "learning_rate": 2.6057459723762076e-07, + "logits/chosen": 0.16426903009414673, + "logits/rejected": 0.13840454816818237, + "logps/chosen": -64.85040283203125, + "logps/ref_chosen": -62.63500213623047, + "logps/ref_rejected": -65.11399841308594, + "logps/rejected": -68.41783142089844, + "loss": 1.116, + "margin_dpo/margin_mean": 1.0884283781051636, + "margin_dpo/margin_std": 1.8702547550201416, + "step": 357 + }, + { + "epoch": 0.5411942554799698, + "fcm_dpo/beta": 0.7857924103736877, + "fcm_dpo/delta": -0.2809675335884094, + "fcm_dpo/margin": 1.586600661277771, + "fcm_dpo/q_t": 0.2850938141345978, + "grad_norm": 148.0938720703125, + "learning_rate": 2.5925341972508954e-07, + "logits/chosen": 0.1491168886423111, + "logits/rejected": 0.14593584835529327, + "logps/chosen": -69.29679107666016, + "logps/ref_chosen": -67.20960998535156, + "logps/ref_rejected": -69.34715270996094, + "logps/rejected": -73.02093505859375, + "loss": 0.798, + "margin_dpo/margin_mean": 1.586600661277771, + "margin_dpo/margin_std": 1.7209175825119019, + "step": 358 + }, + { + "epoch": 0.5427059712773998, + "fcm_dpo/beta": 0.7968997955322266, + "fcm_dpo/delta": 0.10445237159729004, + "fcm_dpo/margin": 1.1224002838134766, + "fcm_dpo/q_t": 0.34582918882369995, + "grad_norm": 201.40667724609375, + "learning_rate": 2.579319833745169e-07, + "logits/chosen": 0.14725103974342346, + "logits/rejected": 0.12238387763500214, + "logps/chosen": -64.69267272949219, + "logps/ref_chosen": -62.52578353881836, + "logps/ref_rejected": -76.63114929199219, + "logps/rejected": -79.9204330444336, + "loss": 1.0804, + "margin_dpo/margin_mean": 1.1224000453948975, + "margin_dpo/margin_std": 1.7543888092041016, + "step": 359 + }, + { + "epoch": 0.54421768707483, + "fcm_dpo/beta": 0.7883453369140625, + "fcm_dpo/delta": 0.023167556151747704, + "fcm_dpo/margin": 1.241877794265747, + "fcm_dpo/q_t": 0.3393666446208954, + "grad_norm": 213.6732635498047, + "learning_rate": 2.5661032514931834e-07, + "logits/chosen": 0.11307230591773987, + "logits/rejected": 0.0418628454208374, + "logps/chosen": -65.72219848632812, + "logps/ref_chosen": -63.48772048950195, + "logps/ref_rejected": -90.6891098022461, + "logps/rejected": -94.16546630859375, + "loss": 1.0189, + "margin_dpo/margin_mean": 1.2418781518936157, + "margin_dpo/margin_std": 1.9586660861968994, + "step": 360 + }, + { + "epoch": 0.54572940287226, + "fcm_dpo/beta": 0.7847793698310852, + "fcm_dpo/delta": -0.006262313574552536, + "fcm_dpo/margin": 1.2811236381530762, + "fcm_dpo/q_t": 0.32984602451324463, + "grad_norm": 186.03604125976562, + "learning_rate": 2.552884820191154e-07, + "logits/chosen": 0.21646341681480408, + "logits/rejected": 0.17365378141403198, + "logps/chosen": -60.005924224853516, + "logps/ref_chosen": -57.917144775390625, + "logps/ref_rejected": -72.39089965820312, + "logps/rejected": -75.76080322265625, + "loss": 0.9667, + "margin_dpo/margin_mean": 1.2811236381530762, + "margin_dpo/margin_std": 1.8610559701919556, + "step": 361 + }, + { + "epoch": 0.54724111866969, + "fcm_dpo/beta": 0.7910502552986145, + "fcm_dpo/delta": -0.12872016429901123, + "fcm_dpo/margin": 1.3991649150848389, + "fcm_dpo/q_t": 0.3150370121002197, + "grad_norm": 179.85292053222656, + "learning_rate": 2.53966490958702e-07, + "logits/chosen": 0.20901912450790405, + "logits/rejected": 0.14084625244140625, + "logps/chosen": -65.4914321899414, + "logps/ref_chosen": -63.4434700012207, + "logps/ref_rejected": -103.45516967773438, + "logps/rejected": -106.90229797363281, + "loss": 0.8868, + "margin_dpo/margin_mean": 1.3991665840148926, + "margin_dpo/margin_std": 1.6471703052520752, + "step": 362 + }, + { + "epoch": 0.5487528344671202, + "fcm_dpo/beta": 0.7514413595199585, + "fcm_dpo/delta": -0.15377500653266907, + "fcm_dpo/margin": 1.512410044670105, + "fcm_dpo/q_t": 0.3079715967178345, + "grad_norm": 172.7432403564453, + "learning_rate": 2.526443889470099e-07, + "logits/chosen": 0.18472757935523987, + "logits/rejected": 0.09980542212724686, + "logps/chosen": -51.084228515625, + "logps/ref_chosen": -48.65182876586914, + "logps/ref_rejected": -88.65904235839844, + "logps/rejected": -92.60385131835938, + "loss": 0.9557, + "margin_dpo/margin_mean": 1.512410044670105, + "margin_dpo/margin_std": 2.1208112239837646, + "step": 363 + }, + { + "epoch": 0.5502645502645502, + "fcm_dpo/beta": 0.7271950840950012, + "fcm_dpo/delta": -0.1419481635093689, + "fcm_dpo/margin": 1.5479559898376465, + "fcm_dpo/q_t": 0.31253015995025635, + "grad_norm": 143.4964599609375, + "learning_rate": 2.513222129660744e-07, + "logits/chosen": 0.09488549828529358, + "logits/rejected": 0.020679466426372528, + "logps/chosen": -59.90724563598633, + "logps/ref_chosen": -57.87107467651367, + "logps/ref_rejected": -80.95503234863281, + "logps/rejected": -84.53915405273438, + "loss": 0.9253, + "margin_dpo/margin_mean": 1.5479564666748047, + "margin_dpo/margin_std": 2.0348784923553467, + "step": 364 + }, + { + "epoch": 0.5517762660619804, + "fcm_dpo/beta": 0.7135534882545471, + "fcm_dpo/delta": -0.0528385192155838, + "fcm_dpo/margin": 1.4665672779083252, + "fcm_dpo/q_t": 0.30548185110092163, + "grad_norm": 147.20799255371094, + "learning_rate": 2.5e-07, + "logits/chosen": 0.19886715710163116, + "logits/rejected": 0.18875735998153687, + "logps/chosen": -66.82514190673828, + "logps/ref_chosen": -64.94217681884766, + "logps/ref_rejected": -74.8599853515625, + "logps/rejected": -78.20951843261719, + "loss": 0.868, + "margin_dpo/margin_mean": 1.4665677547454834, + "margin_dpo/margin_std": 1.6899524927139282, + "step": 365 + }, + { + "epoch": 0.5532879818594104, + "fcm_dpo/beta": 0.749003529548645, + "fcm_dpo/delta": 0.20677639544010162, + "fcm_dpo/margin": 1.071431279182434, + "fcm_dpo/q_t": 0.36068403720855713, + "grad_norm": 177.8300323486328, + "learning_rate": 2.486777870339255e-07, + "logits/chosen": 0.1121918261051178, + "logits/rejected": 0.09326402097940445, + "logps/chosen": -56.96438217163086, + "logps/ref_chosen": -55.16598129272461, + "logps/ref_rejected": -65.26121520996094, + "logps/rejected": -68.13104248046875, + "loss": 1.1563, + "margin_dpo/margin_mean": 1.0714313983917236, + "margin_dpo/margin_std": 1.9580434560775757, + "step": 366 + }, + { + "epoch": 0.5547996976568406, + "fcm_dpo/beta": 0.7712104320526123, + "fcm_dpo/delta": 0.2716788947582245, + "fcm_dpo/margin": 0.9718486070632935, + "fcm_dpo/q_t": 0.3597671687602997, + "grad_norm": 178.6884002685547, + "learning_rate": 2.4735561105299014e-07, + "logits/chosen": 0.1118677407503128, + "logits/rejected": 0.03625689074397087, + "logps/chosen": -58.22844696044922, + "logps/ref_chosen": -56.01046371459961, + "logps/ref_rejected": -77.31010437011719, + "logps/rejected": -80.49993896484375, + "loss": 1.0665, + "margin_dpo/margin_mean": 0.971848726272583, + "margin_dpo/margin_std": 1.5560146570205688, + "step": 367 + }, + { + "epoch": 0.5563114134542706, + "fcm_dpo/beta": 0.8040578365325928, + "fcm_dpo/delta": 0.1679600328207016, + "fcm_dpo/margin": 1.0528676509857178, + "fcm_dpo/q_t": 0.3611965775489807, + "grad_norm": 214.42361450195312, + "learning_rate": 2.46033509041298e-07, + "logits/chosen": 0.06508797407150269, + "logits/rejected": 0.056503720581531525, + "logps/chosen": -76.96674346923828, + "logps/ref_chosen": -74.82927703857422, + "logps/ref_rejected": -76.11680603027344, + "logps/rejected": -79.30712890625, + "loss": 1.1432, + "margin_dpo/margin_mean": 1.052868127822876, + "margin_dpo/margin_std": 1.877270221710205, + "step": 368 + }, + { + "epoch": 0.5578231292517006, + "fcm_dpo/beta": 0.83278489112854, + "fcm_dpo/delta": 0.09441757202148438, + "fcm_dpo/margin": 1.095184564590454, + "fcm_dpo/q_t": 0.3382675051689148, + "grad_norm": 172.21173095703125, + "learning_rate": 2.447115179808846e-07, + "logits/chosen": 0.122508205473423, + "logits/rejected": 0.09032686054706573, + "logps/chosen": -60.496971130371094, + "logps/ref_chosen": -58.32621765136719, + "logps/ref_rejected": -80.92183685302734, + "logps/rejected": -84.18777465820312, + "loss": 1.0485, + "margin_dpo/margin_mean": 1.0951846837997437, + "margin_dpo/margin_std": 1.6767044067382812, + "step": 369 + }, + { + "epoch": 0.5593348450491308, + "fcm_dpo/beta": 0.8102331757545471, + "fcm_dpo/delta": -0.10076209902763367, + "fcm_dpo/margin": 1.342164397239685, + "fcm_dpo/q_t": 0.3211871385574341, + "grad_norm": 181.2273406982422, + "learning_rate": 2.4338967485068164e-07, + "logits/chosen": 0.24069687724113464, + "logits/rejected": 0.18847504258155823, + "logps/chosen": -55.11189270019531, + "logps/ref_chosen": -52.88372039794922, + "logps/ref_rejected": -79.43692016601562, + "logps/rejected": -83.00725555419922, + "loss": 1.0596, + "margin_dpo/margin_mean": 1.3421647548675537, + "margin_dpo/margin_std": 2.065488576889038, + "step": 370 + }, + { + "epoch": 0.5608465608465608, + "fcm_dpo/beta": 0.8171911239624023, + "fcm_dpo/delta": -0.08116129040718079, + "fcm_dpo/margin": 1.3074921369552612, + "fcm_dpo/q_t": 0.3368060290813446, + "grad_norm": 183.6264190673828, + "learning_rate": 2.420680166254831e-07, + "logits/chosen": 0.2111239731311798, + "logits/rejected": 0.18440525233745575, + "logps/chosen": -51.46292495727539, + "logps/ref_chosen": -49.224212646484375, + "logps/ref_rejected": -63.348472595214844, + "logps/rejected": -66.8946762084961, + "loss": 1.0896, + "margin_dpo/margin_mean": 1.3074921369552612, + "margin_dpo/margin_std": 2.0974409580230713, + "step": 371 + }, + { + "epoch": 0.562358276643991, + "fcm_dpo/beta": 0.8467217683792114, + "fcm_dpo/delta": 0.2932736575603485, + "fcm_dpo/margin": 0.851443350315094, + "fcm_dpo/q_t": 0.3849431276321411, + "grad_norm": 214.92108154296875, + "learning_rate": 2.4074658027491044e-07, + "logits/chosen": 0.18400293588638306, + "logits/rejected": 0.12166933715343475, + "logps/chosen": -54.62636184692383, + "logps/ref_chosen": -52.269554138183594, + "logps/ref_rejected": -72.99522399902344, + "logps/rejected": -76.20347595214844, + "loss": 1.3565, + "margin_dpo/margin_mean": 0.8514436483383179, + "margin_dpo/margin_std": 2.075626850128174, + "step": 372 + }, + { + "epoch": 0.563869992441421, + "fcm_dpo/beta": 0.8382232189178467, + "fcm_dpo/delta": -0.05690415948629379, + "fcm_dpo/margin": 1.2532103061676025, + "fcm_dpo/q_t": 0.3378145694732666, + "grad_norm": 255.2552947998047, + "learning_rate": 2.394254027623792e-07, + "logits/chosen": 0.19523033499717712, + "logits/rejected": 0.14481961727142334, + "logps/chosen": -63.56736755371094, + "logps/ref_chosen": -61.112998962402344, + "logps/ref_rejected": -76.24851989746094, + "logps/rejected": -79.95610046386719, + "loss": 1.0989, + "margin_dpo/margin_mean": 1.2532098293304443, + "margin_dpo/margin_std": 2.067584991455078, + "step": 373 + }, + { + "epoch": 0.5653817082388511, + "fcm_dpo/beta": 0.788150429725647, + "fcm_dpo/delta": -0.36466753482818604, + "fcm_dpo/margin": 1.6652493476867676, + "fcm_dpo/q_t": 0.2760714292526245, + "grad_norm": 186.27012634277344, + "learning_rate": 2.381045210440644e-07, + "logits/chosen": 0.12782002985477448, + "logits/rejected": 0.11331016570329666, + "logps/chosen": -74.75846099853516, + "logps/ref_chosen": -72.66920471191406, + "logps/ref_rejected": -76.83158874511719, + "logps/rejected": -80.58609771728516, + "loss": 0.7647, + "margin_dpo/margin_mean": 1.6652493476867676, + "margin_dpo/margin_std": 1.7359645366668701, + "step": 374 + }, + { + "epoch": 0.5668934240362812, + "fcm_dpo/beta": 0.8175476789474487, + "fcm_dpo/delta": 0.3215762972831726, + "fcm_dpo/margin": 0.8576102256774902, + "fcm_dpo/q_t": 0.37826499342918396, + "grad_norm": 234.281494140625, + "learning_rate": 2.3678397206786715e-07, + "logits/chosen": 0.1810404360294342, + "logits/rejected": 0.1419374942779541, + "logps/chosen": -59.804378509521484, + "logps/ref_chosen": -57.68330383300781, + "logps/ref_rejected": -79.34097290039062, + "logps/rejected": -82.31965637207031, + "loss": 1.1949, + "margin_dpo/margin_mean": 0.8576098680496216, + "margin_dpo/margin_std": 1.7258224487304688, + "step": 375 + }, + { + "epoch": 0.5684051398337112, + "fcm_dpo/beta": 0.7721197605133057, + "fcm_dpo/delta": -0.40702202916145325, + "fcm_dpo/margin": 1.7395694255828857, + "fcm_dpo/q_t": 0.28671878576278687, + "grad_norm": 153.21202087402344, + "learning_rate": 2.3546379277238103e-07, + "logits/chosen": 0.19977155327796936, + "logits/rejected": 0.15468192100524902, + "logps/chosen": -53.76110076904297, + "logps/ref_chosen": -51.674072265625, + "logps/ref_rejected": -75.69713592529297, + "logps/rejected": -79.52372741699219, + "loss": 0.8446, + "margin_dpo/margin_mean": 1.739569067955017, + "margin_dpo/margin_std": 2.094245433807373, + "step": 376 + }, + { + "epoch": 0.5699168556311414, + "fcm_dpo/beta": 0.7831248641014099, + "fcm_dpo/delta": 0.08697693049907684, + "fcm_dpo/margin": 1.1734894514083862, + "fcm_dpo/q_t": 0.3411799371242523, + "grad_norm": 162.50051879882812, + "learning_rate": 2.3414402008585886e-07, + "logits/chosen": 0.11727416515350342, + "logits/rejected": 0.09625902771949768, + "logps/chosen": -48.64626693725586, + "logps/ref_chosen": -46.17853546142578, + "logps/ref_rejected": -57.756500244140625, + "logps/rejected": -61.39772415161133, + "loss": 1.0109, + "margin_dpo/margin_mean": 1.1734893321990967, + "margin_dpo/margin_std": 1.7293052673339844, + "step": 377 + }, + { + "epoch": 0.5714285714285714, + "fcm_dpo/beta": 0.8013092875480652, + "fcm_dpo/delta": 0.08626553416252136, + "fcm_dpo/margin": 1.1440807580947876, + "fcm_dpo/q_t": 0.35015422105789185, + "grad_norm": 186.11314392089844, + "learning_rate": 2.3282469092517977e-07, + "logits/chosen": 0.19343531131744385, + "logits/rejected": 0.15595267713069916, + "logps/chosen": -61.52544021606445, + "logps/ref_chosen": -59.21887969970703, + "logps/ref_rejected": -71.24818420410156, + "logps/rejected": -74.69883728027344, + "loss": 1.045, + "margin_dpo/margin_mean": 1.1440809965133667, + "margin_dpo/margin_std": 1.8243110179901123, + "step": 378 + }, + { + "epoch": 0.5729402872260015, + "fcm_dpo/beta": 0.7728543281555176, + "fcm_dpo/delta": -0.18132196366786957, + "fcm_dpo/margin": 1.5013034343719482, + "fcm_dpo/q_t": 0.3048959970474243, + "grad_norm": 197.8959197998047, + "learning_rate": 2.3150584219481643e-07, + "logits/chosen": 0.21861502528190613, + "logits/rejected": 0.1745017170906067, + "logps/chosen": -78.35917663574219, + "logps/ref_chosen": -76.31658935546875, + "logps/ref_rejected": -104.26200103759766, + "logps/rejected": -107.80589294433594, + "loss": 0.865, + "margin_dpo/margin_mean": 1.501303791999817, + "margin_dpo/margin_std": 1.88877534866333, + "step": 379 + }, + { + "epoch": 0.5744520030234316, + "fcm_dpo/beta": 0.732434093952179, + "fcm_dpo/delta": -0.3159186840057373, + "fcm_dpo/margin": 1.7422823905944824, + "fcm_dpo/q_t": 0.2831631302833557, + "grad_norm": 152.97654724121094, + "learning_rate": 2.3018751078580283e-07, + "logits/chosen": 0.18231691420078278, + "logits/rejected": 0.15374861657619476, + "logps/chosen": -63.16365051269531, + "logps/ref_chosen": -61.283164978027344, + "logps/ref_rejected": -72.38892364501953, + "logps/rejected": -76.01168823242188, + "loss": 0.8666, + "margin_dpo/margin_mean": 1.7422822713851929, + "margin_dpo/margin_std": 2.142005443572998, + "step": 380 + }, + { + "epoch": 0.5759637188208617, + "fcm_dpo/beta": 0.7804316282272339, + "fcm_dpo/delta": 0.42041903734207153, + "fcm_dpo/margin": 0.7539185285568237, + "fcm_dpo/q_t": 0.3955872654914856, + "grad_norm": 201.4615936279297, + "learning_rate": 2.288697335747027e-07, + "logits/chosen": 0.12479900568723679, + "logits/rejected": 0.10375261306762695, + "logps/chosen": -60.67311096191406, + "logps/ref_chosen": -58.2139892578125, + "logps/ref_rejected": -60.78669357299805, + "logps/rejected": -63.999732971191406, + "loss": 1.2184, + "margin_dpo/margin_mean": 0.7539188861846924, + "margin_dpo/margin_std": 1.6283390522003174, + "step": 381 + }, + { + "epoch": 0.5774754346182918, + "fcm_dpo/beta": 0.8213146924972534, + "fcm_dpo/delta": 0.16648587584495544, + "fcm_dpo/margin": 1.0126454830169678, + "fcm_dpo/q_t": 0.3555706739425659, + "grad_norm": 187.99722290039062, + "learning_rate": 2.2755254742257706e-07, + "logits/chosen": 0.1935308575630188, + "logits/rejected": 0.1620044708251953, + "logps/chosen": -64.31198120117188, + "logps/ref_chosen": -61.82532501220703, + "logps/ref_rejected": -83.0452880859375, + "logps/rejected": -86.54458618164062, + "loss": 1.0464, + "margin_dpo/margin_mean": 1.0126454830169678, + "margin_dpo/margin_std": 1.5206871032714844, + "step": 382 + }, + { + "epoch": 0.5789871504157218, + "fcm_dpo/beta": 0.7823382616043091, + "fcm_dpo/delta": -0.15291021764278412, + "fcm_dpo/margin": 1.4475116729736328, + "fcm_dpo/q_t": 0.3087931275367737, + "grad_norm": 219.67236328125, + "learning_rate": 2.2623598917395436e-07, + "logits/chosen": 0.10844056308269501, + "logits/rejected": 0.11368384212255478, + "logps/chosen": -82.67160034179688, + "logps/ref_chosen": -80.56326293945312, + "logps/ref_rejected": -74.62922668457031, + "logps/rejected": -78.18507385253906, + "loss": 0.9182, + "margin_dpo/margin_mean": 1.4475116729736328, + "margin_dpo/margin_std": 1.882810354232788, + "step": 383 + }, + { + "epoch": 0.5804988662131519, + "fcm_dpo/beta": 0.8081178665161133, + "fcm_dpo/delta": 0.1466008424758911, + "fcm_dpo/margin": 1.0682569742202759, + "fcm_dpo/q_t": 0.3386213779449463, + "grad_norm": 210.8586883544922, + "learning_rate": 2.2492009565579875e-07, + "logits/chosen": 0.15908417105674744, + "logits/rejected": 0.1250711977481842, + "logps/chosen": -68.10568237304688, + "logps/ref_chosen": -65.47514343261719, + "logps/ref_rejected": -79.67378234863281, + "logps/rejected": -83.37257385253906, + "loss": 1.014, + "margin_dpo/margin_mean": 1.0682566165924072, + "margin_dpo/margin_std": 1.5980737209320068, + "step": 384 + }, + { + "epoch": 0.582010582010582, + "fcm_dpo/beta": 0.8039337396621704, + "fcm_dpo/delta": -0.06407226622104645, + "fcm_dpo/margin": 1.3148212432861328, + "fcm_dpo/q_t": 0.3122956156730652, + "grad_norm": 201.8307647705078, + "learning_rate": 2.2360490367648084e-07, + "logits/chosen": 0.14007516205310822, + "logits/rejected": 0.11297205090522766, + "logps/chosen": -68.39266967773438, + "logps/ref_chosen": -66.0565185546875, + "logps/ref_rejected": -86.68023681640625, + "logps/rejected": -90.33121490478516, + "loss": 0.9181, + "margin_dpo/margin_mean": 1.3148208856582642, + "margin_dpo/margin_std": 1.7250840663909912, + "step": 385 + }, + { + "epoch": 0.5835222978080121, + "fcm_dpo/beta": 0.7948161363601685, + "fcm_dpo/delta": 0.02499794214963913, + "fcm_dpo/margin": 1.229053020477295, + "fcm_dpo/q_t": 0.3376579284667969, + "grad_norm": 190.71925354003906, + "learning_rate": 2.2229045002474724e-07, + "logits/chosen": 0.1209828183054924, + "logits/rejected": 0.0802159532904625, + "logps/chosen": -78.08750915527344, + "logps/ref_chosen": -75.6236572265625, + "logps/ref_rejected": -92.62330627441406, + "logps/rejected": -96.31620788574219, + "loss": 1.0068, + "margin_dpo/margin_mean": 1.229053258895874, + "margin_dpo/margin_std": 1.8680897951126099, + "step": 386 + }, + { + "epoch": 0.5850340136054422, + "fcm_dpo/beta": 0.7716137170791626, + "fcm_dpo/delta": -0.2335832417011261, + "fcm_dpo/margin": 1.5613083839416504, + "fcm_dpo/q_t": 0.27384334802627563, + "grad_norm": 133.3782501220703, + "learning_rate": 2.209767714686924e-07, + "logits/chosen": 0.15220069885253906, + "logits/rejected": 0.08792141824960709, + "logps/chosen": -49.354488372802734, + "logps/ref_chosen": -47.22170639038086, + "logps/ref_rejected": -87.338134765625, + "logps/rejected": -91.03223419189453, + "loss": 0.7497, + "margin_dpo/margin_mean": 1.5613081455230713, + "margin_dpo/margin_std": 1.4777767658233643, + "step": 387 + }, + { + "epoch": 0.5865457294028723, + "fcm_dpo/beta": 0.763427734375, + "fcm_dpo/delta": 0.034832365810871124, + "fcm_dpo/margin": 1.267984390258789, + "fcm_dpo/q_t": 0.3503814935684204, + "grad_norm": 178.84954833984375, + "learning_rate": 2.1966390475472954e-07, + "logits/chosen": 0.16702687740325928, + "logits/rejected": 0.1540418267250061, + "logps/chosen": -76.91740417480469, + "logps/ref_chosen": -74.5794677734375, + "logps/ref_rejected": -79.92558288574219, + "logps/rejected": -83.531494140625, + "loss": 1.0483, + "margin_dpo/margin_mean": 1.2679840326309204, + "margin_dpo/margin_std": 2.043673515319824, + "step": 388 + }, + { + "epoch": 0.5880574452003023, + "fcm_dpo/beta": 0.7542685270309448, + "fcm_dpo/delta": -0.14975543320178986, + "fcm_dpo/margin": 1.5020861625671387, + "fcm_dpo/q_t": 0.3079478144645691, + "grad_norm": 161.67514038085938, + "learning_rate": 2.1835188660656265e-07, + "logits/chosen": 0.17143261432647705, + "logits/rejected": 0.1412460058927536, + "logps/chosen": -63.959442138671875, + "logps/ref_chosen": -61.624366760253906, + "logps/ref_rejected": -76.50978088378906, + "logps/rejected": -80.3469467163086, + "loss": 0.9259, + "margin_dpo/margin_mean": 1.5020864009857178, + "margin_dpo/margin_std": 1.9342763423919678, + "step": 389 + }, + { + "epoch": 0.5895691609977324, + "fcm_dpo/beta": 0.7468278408050537, + "fcm_dpo/delta": 0.005739331711083651, + "fcm_dpo/margin": 1.3320647478103638, + "fcm_dpo/q_t": 0.3248102068901062, + "grad_norm": 146.38316345214844, + "learning_rate": 2.170407537241599e-07, + "logits/chosen": 0.2075425386428833, + "logits/rejected": 0.16074812412261963, + "logps/chosen": -48.055908203125, + "logps/ref_chosen": -45.871864318847656, + "logps/ref_rejected": -61.305999755859375, + "logps/rejected": -64.82211303710938, + "loss": 0.9054, + "margin_dpo/margin_mean": 1.3320646286010742, + "margin_dpo/margin_std": 1.6783784627914429, + "step": 390 + }, + { + "epoch": 0.5910808767951625, + "fcm_dpo/beta": 0.7326708436012268, + "fcm_dpo/delta": -0.18802016973495483, + "fcm_dpo/margin": 1.591862440109253, + "fcm_dpo/q_t": 0.31688395142555237, + "grad_norm": 162.29026794433594, + "learning_rate": 2.1573054278272636e-07, + "logits/chosen": 0.17586693167686462, + "logits/rejected": 0.12610454857349396, + "logps/chosen": -60.547569274902344, + "logps/ref_chosen": -58.18701171875, + "logps/ref_rejected": -83.63442993164062, + "logps/rejected": -87.58686065673828, + "loss": 1.01, + "margin_dpo/margin_mean": 1.5918623208999634, + "margin_dpo/margin_std": 2.3288512229919434, + "step": 391 + }, + { + "epoch": 0.5925925925925926, + "fcm_dpo/beta": 0.6947846412658691, + "fcm_dpo/delta": -0.25457581877708435, + "fcm_dpo/margin": 1.7612890005111694, + "fcm_dpo/q_t": 0.3048982322216034, + "grad_norm": 162.5065155029297, + "learning_rate": 2.1442129043167873e-07, + "logits/chosen": 0.24103191494941711, + "logits/rejected": 0.19386835396289825, + "logps/chosen": -71.8067626953125, + "logps/ref_chosen": -69.7445297241211, + "logps/ref_rejected": -94.05877685546875, + "logps/rejected": -97.8823013305664, + "loss": 0.9129, + "margin_dpo/margin_mean": 1.761289358139038, + "margin_dpo/margin_std": 2.361508846282959, + "step": 392 + }, + { + "epoch": 0.5941043083900227, + "fcm_dpo/beta": 0.6695666313171387, + "fcm_dpo/delta": -0.061286166310310364, + "fcm_dpo/margin": 1.5697075128555298, + "fcm_dpo/q_t": 0.30320626497268677, + "grad_norm": 139.36740112304688, + "learning_rate": 2.131130332936195e-07, + "logits/chosen": 0.20045891404151917, + "logits/rejected": 0.17006459832191467, + "logps/chosen": -54.7346076965332, + "logps/ref_chosen": -52.33489990234375, + "logps/ref_rejected": -74.33809661865234, + "logps/rejected": -78.30751037597656, + "loss": 0.8145, + "margin_dpo/margin_mean": 1.5697076320648193, + "margin_dpo/margin_std": 1.6137369871139526, + "step": 393 + }, + { + "epoch": 0.5956160241874527, + "fcm_dpo/beta": 0.683825671672821, + "fcm_dpo/delta": 0.04361763596534729, + "fcm_dpo/margin": 1.4036848545074463, + "fcm_dpo/q_t": 0.3363683223724365, + "grad_norm": 153.11077880859375, + "learning_rate": 2.1180580796331323e-07, + "logits/chosen": 0.22308960556983948, + "logits/rejected": 0.1974237710237503, + "logps/chosen": -63.075340270996094, + "logps/ref_chosen": -60.6761360168457, + "logps/ref_rejected": -71.36074829101562, + "logps/rejected": -75.16364288330078, + "loss": 0.9732, + "margin_dpo/margin_mean": 1.4036844968795776, + "margin_dpo/margin_std": 2.0274405479431152, + "step": 394 + }, + { + "epoch": 0.5971277399848829, + "fcm_dpo/beta": 0.6959511041641235, + "fcm_dpo/delta": 0.1088649109005928, + "fcm_dpo/margin": 1.2934165000915527, + "fcm_dpo/q_t": 0.3352372646331787, + "grad_norm": 158.16360473632812, + "learning_rate": 2.104996510066625e-07, + "logits/chosen": 0.1776614785194397, + "logits/rejected": 0.117831751704216, + "logps/chosen": -52.74278259277344, + "logps/ref_chosen": -50.60432434082031, + "logps/ref_rejected": -77.08731079101562, + "logps/rejected": -80.51919555664062, + "loss": 0.9631, + "margin_dpo/margin_mean": 1.2934160232543945, + "margin_dpo/margin_std": 1.7340922355651855, + "step": 395 + }, + { + "epoch": 0.5986394557823129, + "fcm_dpo/beta": 0.6787519454956055, + "fcm_dpo/delta": -0.02714592218399048, + "fcm_dpo/margin": 1.4940762519836426, + "fcm_dpo/q_t": 0.31732630729675293, + "grad_norm": 145.00909423828125, + "learning_rate": 2.0919459895968517e-07, + "logits/chosen": 0.17002803087234497, + "logits/rejected": 0.09879619628190994, + "logps/chosen": -53.40643310546875, + "logps/ref_chosen": -51.35961151123047, + "logps/ref_rejected": -79.89360046386719, + "logps/rejected": -83.43449401855469, + "loss": 0.8661, + "margin_dpo/margin_mean": 1.4940763711929321, + "margin_dpo/margin_std": 1.6158559322357178, + "step": 396 + }, + { + "epoch": 0.600151171579743, + "fcm_dpo/beta": 0.7337859869003296, + "fcm_dpo/delta": 0.3556361794471741, + "fcm_dpo/margin": 0.9081060886383057, + "fcm_dpo/q_t": 0.37586018443107605, + "grad_norm": 224.95326232910156, + "learning_rate": 2.078906883274924e-07, + "logits/chosen": 0.12839001417160034, + "logits/rejected": 0.09453357756137848, + "logps/chosen": -68.88770294189453, + "logps/ref_chosen": -66.45622253417969, + "logps/ref_rejected": -85.74736785888672, + "logps/rejected": -89.08694458007812, + "loss": 1.3125, + "margin_dpo/margin_mean": 0.9081062078475952, + "margin_dpo/margin_std": 2.2067739963531494, + "step": 397 + }, + { + "epoch": 0.6016628873771731, + "fcm_dpo/beta": 0.7362475395202637, + "fcm_dpo/delta": -0.06381751596927643, + "fcm_dpo/margin": 1.4355955123901367, + "fcm_dpo/q_t": 0.3111931383609772, + "grad_norm": 139.69631958007812, + "learning_rate": 2.065879555832674e-07, + "logits/chosen": 0.16362245380878448, + "logits/rejected": 0.11482103168964386, + "logps/chosen": -51.42768859863281, + "logps/ref_chosen": -49.244239807128906, + "logps/ref_rejected": -75.18949127197266, + "logps/rejected": -78.80854034423828, + "loss": 0.8542, + "margin_dpo/margin_mean": 1.4355957508087158, + "margin_dpo/margin_std": 1.6850342750549316, + "step": 398 + }, + { + "epoch": 0.6031746031746031, + "fcm_dpo/beta": 0.7134385108947754, + "fcm_dpo/delta": -0.1938168704509735, + "fcm_dpo/margin": 1.6412277221679688, + "fcm_dpo/q_t": 0.32044440507888794, + "grad_norm": 166.16122436523438, + "learning_rate": 2.052864371672457e-07, + "logits/chosen": 0.12605230510234833, + "logits/rejected": 0.031615402549505234, + "logps/chosen": -70.56350708007812, + "logps/ref_chosen": -68.30679321289062, + "logps/ref_rejected": -113.2708511352539, + "logps/rejected": -117.16879272460938, + "loss": 0.9153, + "margin_dpo/margin_mean": 1.641228437423706, + "margin_dpo/margin_std": 2.2904515266418457, + "step": 399 + }, + { + "epoch": 0.6046863189720333, + "fcm_dpo/beta": 0.7214968204498291, + "fcm_dpo/delta": 0.2427944540977478, + "fcm_dpo/margin": 1.0737788677215576, + "fcm_dpo/q_t": 0.37354040145874023, + "grad_norm": 195.83456420898438, + "learning_rate": 2.0398616948569493e-07, + "logits/chosen": 0.18185412883758545, + "logits/rejected": 0.14102932810783386, + "logps/chosen": -74.3447036743164, + "logps/ref_chosen": -71.62649536132812, + "logps/ref_rejected": -90.98765563964844, + "logps/rejected": -94.77964782714844, + "loss": 1.0799, + "margin_dpo/margin_mean": 1.0737799406051636, + "margin_dpo/margin_std": 1.769069790840149, + "step": 400 + }, + { + "epoch": 0.6046863189720333, + "eval_fcm_dpo/beta": 0.7420421838760376, + "eval_logits/chosen": 0.20529566705226898, + "eval_logits/rejected": 0.1674942821264267, + "eval_logps/chosen": -77.24707794189453, + "eval_logps/ref_chosen": -74.85946655273438, + "eval_logps/ref_rejected": -79.54898834228516, + "eval_logps/rejected": -83.04328918457031, + "eval_loss": 0.5736358761787415, + "eval_margin_dpo/margin_mean": 1.1066911220550537, + "eval_margin_dpo/margin_std": 2.0390639305114746, + "eval_runtime": 37.9981, + "eval_samples_per_second": 60.608, + "eval_steps_per_second": 1.895, + "step": 400 + }, + { + "epoch": 0.6061980347694633, + "fcm_dpo/beta": 0.7018467783927917, + "fcm_dpo/delta": -0.38142985105514526, + "fcm_dpo/margin": 1.8959496021270752, + "fcm_dpo/q_t": 0.28283387422561646, + "grad_norm": 134.66848754882812, + "learning_rate": 2.0268718890989752e-07, + "logits/chosen": 0.19736307859420776, + "logits/rejected": 0.12394518405199051, + "logps/chosen": -55.94346618652344, + "logps/ref_chosen": -53.72495651245117, + "logps/ref_rejected": -75.06304931640625, + "logps/rejected": -79.17750549316406, + "loss": 0.7958, + "margin_dpo/margin_mean": 1.8959496021270752, + "margin_dpo/margin_std": 2.1448566913604736, + "step": 401 + }, + { + "epoch": 0.6077097505668935, + "fcm_dpo/beta": 0.6789320707321167, + "fcm_dpo/delta": -0.07589547336101532, + "fcm_dpo/margin": 1.5725529193878174, + "fcm_dpo/q_t": 0.31703245639801025, + "grad_norm": 150.985595703125, + "learning_rate": 2.013895317751323e-07, + "logits/chosen": 0.18823865056037903, + "logits/rejected": 0.155757337808609, + "logps/chosen": -64.17735290527344, + "logps/ref_chosen": -61.873931884765625, + "logps/ref_rejected": -66.15198516845703, + "logps/rejected": -70.0279541015625, + "loss": 0.8937, + "margin_dpo/margin_mean": 1.57255220413208, + "margin_dpo/margin_std": 1.990880012512207, + "step": 402 + }, + { + "epoch": 0.6092214663643235, + "fcm_dpo/beta": 0.6459471583366394, + "fcm_dpo/delta": -0.33136266469955444, + "fcm_dpo/margin": 1.995465636253357, + "fcm_dpo/q_t": 0.2850838303565979, + "grad_norm": 122.05958557128906, + "learning_rate": 2.0009323437965898e-07, + "logits/chosen": 0.254935085773468, + "logits/rejected": 0.19566068053245544, + "logps/chosen": -53.623878479003906, + "logps/ref_chosen": -51.321502685546875, + "logps/ref_rejected": -86.54010772705078, + "logps/rejected": -90.83795166015625, + "loss": 0.822, + "margin_dpo/margin_mean": 1.9954662322998047, + "margin_dpo/margin_std": 2.2620480060577393, + "step": 403 + }, + { + "epoch": 0.6107331821617535, + "fcm_dpo/beta": 0.6391547918319702, + "fcm_dpo/delta": 0.12487616389989853, + "fcm_dpo/margin": 1.3852322101593018, + "fcm_dpo/q_t": 0.33947524428367615, + "grad_norm": 162.3168487548828, + "learning_rate": 1.9879833298370237e-07, + "logits/chosen": 0.15734031796455383, + "logits/rejected": 0.0940411314368248, + "logps/chosen": -64.49229431152344, + "logps/ref_chosen": -62.26288604736328, + "logps/ref_rejected": -95.19029998779297, + "logps/rejected": -98.80493927001953, + "loss": 0.9692, + "margin_dpo/margin_mean": 1.3852319717407227, + "margin_dpo/margin_std": 1.8755829334259033, + "step": 404 + }, + { + "epoch": 0.6122448979591837, + "fcm_dpo/beta": 0.634131669998169, + "fcm_dpo/delta": 0.03272247314453125, + "fcm_dpo/margin": 1.5051655769348145, + "fcm_dpo/q_t": 0.3402136266231537, + "grad_norm": 127.78704071044922, + "learning_rate": 1.975048638084379e-07, + "logits/chosen": 0.19416359066963196, + "logits/rejected": 0.16025002300739288, + "logps/chosen": -52.953269958496094, + "logps/ref_chosen": -50.5843391418457, + "logps/ref_rejected": -65.43156433105469, + "logps/rejected": -69.3056640625, + "loss": 0.968, + "margin_dpo/margin_mean": 1.5051651000976562, + "margin_dpo/margin_std": 2.0198493003845215, + "step": 405 + }, + { + "epoch": 0.6137566137566137, + "fcm_dpo/beta": 0.632315993309021, + "fcm_dpo/delta": -0.13053575158119202, + "fcm_dpo/margin": 1.755932331085205, + "fcm_dpo/q_t": 0.299167662858963, + "grad_norm": 111.40565490722656, + "learning_rate": 1.9621286303497914e-07, + "logits/chosen": 0.19205182790756226, + "logits/rejected": 0.0876828134059906, + "logps/chosen": -51.30841827392578, + "logps/ref_chosen": -48.99560546875, + "logps/ref_rejected": -92.47774505615234, + "logps/rejected": -96.54649353027344, + "loss": 0.8306, + "margin_dpo/margin_mean": 1.7559325695037842, + "margin_dpo/margin_std": 1.8786392211914062, + "step": 406 + }, + { + "epoch": 0.6152683295540439, + "fcm_dpo/beta": 0.657385528087616, + "fcm_dpo/delta": 0.21200606226921082, + "fcm_dpo/margin": 1.224708914756775, + "fcm_dpo/q_t": 0.3558533787727356, + "grad_norm": 209.7600555419922, + "learning_rate": 1.9492236680336483e-07, + "logits/chosen": 0.13539977371692657, + "logits/rejected": 0.08569268882274628, + "logps/chosen": -92.08710479736328, + "logps/ref_chosen": -89.40056610107422, + "logps/ref_rejected": -99.28775024414062, + "logps/rejected": -103.19900512695312, + "loss": 1.0517, + "margin_dpo/margin_mean": 1.224708914756775, + "margin_dpo/margin_std": 2.031216621398926, + "step": 407 + }, + { + "epoch": 0.6167800453514739, + "fcm_dpo/beta": 0.6533396244049072, + "fcm_dpo/delta": -0.2026488482952118, + "fcm_dpo/margin": 1.8033205270767212, + "fcm_dpo/q_t": 0.2949105501174927, + "grad_norm": 119.43750762939453, + "learning_rate": 1.9363341121154895e-07, + "logits/chosen": 0.1879599392414093, + "logits/rejected": 0.1291370391845703, + "logps/chosen": -56.83644485473633, + "logps/ref_chosen": -54.70391845703125, + "logps/ref_rejected": -73.98648834228516, + "logps/rejected": -77.92233276367188, + "loss": 0.8189, + "margin_dpo/margin_mean": 1.8033205270767212, + "margin_dpo/margin_std": 1.967519998550415, + "step": 408 + }, + { + "epoch": 0.618291761148904, + "fcm_dpo/beta": 0.675295352935791, + "fcm_dpo/delta": 0.34728795289993286, + "fcm_dpo/margin": 0.9984000325202942, + "fcm_dpo/q_t": 0.3867419362068176, + "grad_norm": 168.26300048828125, + "learning_rate": 1.9234603231438994e-07, + "logits/chosen": 0.1908985674381256, + "logits/rejected": 0.19027680158615112, + "logps/chosen": -64.74275970458984, + "logps/ref_chosen": -62.11822509765625, + "logps/ref_rejected": -61.933509826660156, + "logps/rejected": -65.55644226074219, + "loss": 1.1776, + "margin_dpo/margin_mean": 0.9983994960784912, + "margin_dpo/margin_std": 2.0483100414276123, + "step": 409 + }, + { + "epoch": 0.6198034769463341, + "fcm_dpo/beta": 0.6897294521331787, + "fcm_dpo/delta": 0.15048189461231232, + "fcm_dpo/margin": 1.2474337816238403, + "fcm_dpo/q_t": 0.3310784697532654, + "grad_norm": 171.6697998046875, + "learning_rate": 1.9106026612264315e-07, + "logits/chosen": 0.19930198788642883, + "logits/rejected": 0.1775505691766739, + "logps/chosen": -64.07925415039062, + "logps/ref_chosen": -61.80266189575195, + "logps/ref_rejected": -76.60002136230469, + "logps/rejected": -80.12403869628906, + "loss": 0.9295, + "margin_dpo/margin_mean": 1.2474339008331299, + "margin_dpo/margin_std": 1.4843469858169556, + "step": 410 + }, + { + "epoch": 0.6213151927437641, + "fcm_dpo/beta": 0.6963478326797485, + "fcm_dpo/delta": -0.008774511516094208, + "fcm_dpo/margin": 1.4437086582183838, + "fcm_dpo/q_t": 0.3453066349029541, + "grad_norm": 181.04974365234375, + "learning_rate": 1.8977614860195296e-07, + "logits/chosen": 0.15856947004795074, + "logits/rejected": 0.1135367900133133, + "logps/chosen": -57.06800842285156, + "logps/ref_chosen": -54.44539260864258, + "logps/ref_rejected": -74.5650863647461, + "logps/rejected": -78.63140869140625, + "loss": 1.0746, + "margin_dpo/margin_mean": 1.4437092542648315, + "margin_dpo/margin_std": 2.3472909927368164, + "step": 411 + }, + { + "epoch": 0.6228269085411943, + "fcm_dpo/beta": 0.6941448450088501, + "fcm_dpo/delta": -0.037317849695682526, + "fcm_dpo/margin": 1.4864020347595215, + "fcm_dpo/q_t": 0.3218909502029419, + "grad_norm": 150.66571044921875, + "learning_rate": 1.8849371567184662e-07, + "logits/chosen": 0.15843887627124786, + "logits/rejected": 0.1075374186038971, + "logps/chosen": -58.101863861083984, + "logps/ref_chosen": -55.248085021972656, + "logps/ref_rejected": -68.96623229980469, + "logps/rejected": -73.30641174316406, + "loss": 0.9281, + "margin_dpo/margin_mean": 1.4864020347595215, + "margin_dpo/margin_std": 1.928009033203125, + "step": 412 + }, + { + "epoch": 0.6243386243386243, + "fcm_dpo/beta": 0.7043063640594482, + "fcm_dpo/delta": 0.07277508080005646, + "fcm_dpo/margin": 1.3262073993682861, + "fcm_dpo/q_t": 0.3529996871948242, + "grad_norm": 191.52565002441406, + "learning_rate": 1.872130032047302e-07, + "logits/chosen": 0.07102949917316437, + "logits/rejected": 0.04926881939172745, + "logps/chosen": -71.4292984008789, + "logps/ref_chosen": -68.72074890136719, + "logps/ref_rejected": -78.76539611816406, + "logps/rejected": -82.80016326904297, + "loss": 1.102, + "margin_dpo/margin_mean": 1.3262066841125488, + "margin_dpo/margin_std": 2.299673557281494, + "step": 413 + }, + { + "epoch": 0.6258503401360545, + "fcm_dpo/beta": 0.6957840919494629, + "fcm_dpo/delta": -0.08349283784627914, + "fcm_dpo/margin": 1.542493462562561, + "fcm_dpo/q_t": 0.3177918791770935, + "grad_norm": 144.82449340820312, + "learning_rate": 1.8593404702488436e-07, + "logits/chosen": 0.17691153287887573, + "logits/rejected": 0.12794461846351624, + "logps/chosen": -56.64301300048828, + "logps/ref_chosen": -54.138214111328125, + "logps/ref_rejected": -74.65741729736328, + "logps/rejected": -78.7047119140625, + "loss": 0.9185, + "margin_dpo/margin_mean": 1.542493462562561, + "margin_dpo/margin_std": 2.0337252616882324, + "step": 414 + }, + { + "epoch": 0.6273620559334845, + "fcm_dpo/beta": 0.7059125900268555, + "fcm_dpo/delta": 0.12531909346580505, + "fcm_dpo/margin": 1.2547566890716553, + "fcm_dpo/q_t": 0.3529791235923767, + "grad_norm": 175.94845581054688, + "learning_rate": 1.846568829074628e-07, + "logits/chosen": 0.17382574081420898, + "logits/rejected": 0.15772220492362976, + "logps/chosen": -58.64318084716797, + "logps/ref_chosen": -55.91856002807617, + "logps/ref_rejected": -61.747703552246094, + "logps/rejected": -65.72708129882812, + "loss": 1.1261, + "margin_dpo/margin_mean": 1.2547566890716553, + "margin_dpo/margin_std": 2.235924243927002, + "step": 415 + }, + { + "epoch": 0.6288737717309146, + "fcm_dpo/beta": 0.7500655651092529, + "fcm_dpo/delta": 0.13376402854919434, + "fcm_dpo/margin": 1.1456831693649292, + "fcm_dpo/q_t": 0.356852650642395, + "grad_norm": 205.03404235839844, + "learning_rate": 1.8338154657749128e-07, + "logits/chosen": 0.18582022190093994, + "logits/rejected": 0.1482175588607788, + "logps/chosen": -57.20629119873047, + "logps/ref_chosen": -54.72308349609375, + "logps/ref_rejected": -69.17388916015625, + "logps/rejected": -72.80278015136719, + "loss": 1.154, + "margin_dpo/margin_mean": 1.1456834077835083, + "margin_dpo/margin_std": 2.031163454055786, + "step": 416 + }, + { + "epoch": 0.6303854875283447, + "fcm_dpo/beta": 0.7249786853790283, + "fcm_dpo/delta": -0.24252735078334808, + "fcm_dpo/margin": 1.6672532558441162, + "fcm_dpo/q_t": 0.29793938994407654, + "grad_norm": 194.7884521484375, + "learning_rate": 1.8210807370886849e-07, + "logits/chosen": 0.24135205149650574, + "logits/rejected": 0.19347181916236877, + "logps/chosen": -59.584938049316406, + "logps/ref_chosen": -56.791259765625, + "logps/ref_rejected": -68.7791748046875, + "logps/rejected": -73.2401123046875, + "loss": 0.9246, + "margin_dpo/margin_mean": 1.6672537326812744, + "margin_dpo/margin_std": 2.092496395111084, + "step": 417 + }, + { + "epoch": 0.6318972033257747, + "fcm_dpo/beta": 0.7283662557601929, + "fcm_dpo/delta": 0.3391045331954956, + "fcm_dpo/margin": 0.9409202337265015, + "fcm_dpo/q_t": 0.3989126980304718, + "grad_norm": 244.12945556640625, + "learning_rate": 1.8083649992336825e-07, + "logits/chosen": 0.19916735589504242, + "logits/rejected": 0.2042698860168457, + "logps/chosen": -72.2450942993164, + "logps/ref_chosen": -69.10798645019531, + "logps/ref_rejected": -75.09132385253906, + "logps/rejected": -79.16935729980469, + "loss": 1.3312, + "margin_dpo/margin_mean": 0.9409199357032776, + "margin_dpo/margin_std": 2.4089250564575195, + "step": 418 + }, + { + "epoch": 0.6334089191232048, + "fcm_dpo/beta": 0.7232016324996948, + "fcm_dpo/delta": -0.157709002494812, + "fcm_dpo/margin": 1.570444941520691, + "fcm_dpo/q_t": 0.3125431537628174, + "grad_norm": 159.44833374023438, + "learning_rate": 1.7956686078964255e-07, + "logits/chosen": 0.09053687751293182, + "logits/rejected": 0.04949123412370682, + "logps/chosen": -60.52400588989258, + "logps/ref_chosen": -58.1717643737793, + "logps/ref_rejected": -71.67066955566406, + "logps/rejected": -75.59335327148438, + "loss": 0.9202, + "margin_dpo/margin_mean": 1.5704445838928223, + "margin_dpo/margin_std": 2.1407108306884766, + "step": 419 + }, + { + "epoch": 0.6349206349206349, + "fcm_dpo/beta": 0.7380191683769226, + "fcm_dpo/delta": 0.1838487833738327, + "fcm_dpo/margin": 1.1262156963348389, + "fcm_dpo/q_t": 0.37532341480255127, + "grad_norm": 189.95645141601562, + "learning_rate": 1.782991918222275e-07, + "logits/chosen": 0.15650036931037903, + "logits/rejected": 0.11561809480190277, + "logps/chosen": -60.085960388183594, + "logps/ref_chosen": -57.05351257324219, + "logps/ref_rejected": -62.670982360839844, + "logps/rejected": -66.82964324951172, + "loss": 1.2387, + "margin_dpo/margin_mean": 1.126215934753418, + "margin_dpo/margin_std": 2.343519687652588, + "step": 420 + }, + { + "epoch": 0.636432350718065, + "fcm_dpo/beta": 0.7731253504753113, + "fcm_dpo/delta": 0.1831362247467041, + "fcm_dpo/margin": 1.076323390007019, + "fcm_dpo/q_t": 0.3773455023765564, + "grad_norm": 191.1004180908203, + "learning_rate": 1.7703352848054887e-07, + "logits/chosen": 0.13414627313613892, + "logits/rejected": 0.09018285572528839, + "logps/chosen": -60.364906311035156, + "logps/ref_chosen": -57.32324981689453, + "logps/ref_rejected": -75.33782958984375, + "logps/rejected": -79.455810546875, + "loss": 1.3191, + "margin_dpo/margin_mean": 1.0763235092163086, + "margin_dpo/margin_std": 2.4303441047668457, + "step": 421 + }, + { + "epoch": 0.6379440665154951, + "fcm_dpo/beta": 0.7678795456886292, + "fcm_dpo/delta": -0.15709903836250305, + "fcm_dpo/margin": 1.4837114810943604, + "fcm_dpo/q_t": 0.3305337727069855, + "grad_norm": 208.15638732910156, + "learning_rate": 1.7576990616793137e-07, + "logits/chosen": 0.18905231356620789, + "logits/rejected": 0.15734535455703735, + "logps/chosen": -69.5533447265625, + "logps/ref_chosen": -67.05757141113281, + "logps/ref_rejected": -72.12803649902344, + "logps/rejected": -76.10751342773438, + "loss": 1.0168, + "margin_dpo/margin_mean": 1.4837113618850708, + "margin_dpo/margin_std": 2.3469998836517334, + "step": 422 + }, + { + "epoch": 0.6394557823129252, + "fcm_dpo/beta": 0.7414518594741821, + "fcm_dpo/delta": -0.12381698191165924, + "fcm_dpo/margin": 1.4967904090881348, + "fcm_dpo/q_t": 0.3256801962852478, + "grad_norm": 163.99639892578125, + "learning_rate": 1.745083602306071e-07, + "logits/chosen": 0.17420369386672974, + "logits/rejected": 0.12361004948616028, + "logps/chosen": -56.651309967041016, + "logps/ref_chosen": -54.06167221069336, + "logps/ref_rejected": -76.64092254638672, + "logps/rejected": -80.72735595703125, + "loss": 1.0145, + "margin_dpo/margin_mean": 1.4967900514602661, + "margin_dpo/margin_std": 2.2581710815429688, + "step": 423 + }, + { + "epoch": 0.6409674981103552, + "fcm_dpo/beta": 0.7214820384979248, + "fcm_dpo/delta": -0.13758614659309387, + "fcm_dpo/margin": 1.5547800064086914, + "fcm_dpo/q_t": 0.3215063512325287, + "grad_norm": 178.93624877929688, + "learning_rate": 1.7324892595672804e-07, + "logits/chosen": 0.12239620089530945, + "logits/rejected": 0.09341743588447571, + "logps/chosen": -56.10038757324219, + "logps/ref_chosen": -53.60887145996094, + "logps/ref_rejected": -79.2139892578125, + "logps/rejected": -83.26029205322266, + "loss": 0.9368, + "margin_dpo/margin_mean": 1.554780125617981, + "margin_dpo/margin_std": 2.143885612487793, + "step": 424 + }, + { + "epoch": 0.6424792139077853, + "fcm_dpo/beta": 0.7137551307678223, + "fcm_dpo/delta": -0.05110887810587883, + "fcm_dpo/margin": 1.4651919603347778, + "fcm_dpo/q_t": 0.32126736640930176, + "grad_norm": 145.7388153076172, + "learning_rate": 1.7199163857537824e-07, + "logits/chosen": 0.18603307008743286, + "logits/rejected": 0.16199590265750885, + "logps/chosen": -60.9288330078125, + "logps/ref_chosen": -58.41468048095703, + "logps/ref_rejected": -66.59054565429688, + "logps/rejected": -70.56989288330078, + "loss": 0.9247, + "margin_dpo/margin_mean": 1.4651916027069092, + "margin_dpo/margin_std": 1.9558470249176025, + "step": 425 + }, + { + "epoch": 0.6439909297052154, + "fcm_dpo/beta": 0.7687985301017761, + "fcm_dpo/delta": 0.5285735130310059, + "fcm_dpo/margin": 0.6476625204086304, + "fcm_dpo/q_t": 0.4075689911842346, + "grad_norm": 223.34030151367188, + "learning_rate": 1.7073653325558828e-07, + "logits/chosen": 0.14943718910217285, + "logits/rejected": 0.15013810992240906, + "logps/chosen": -74.68634033203125, + "logps/ref_chosen": -71.70822143554688, + "logps/ref_rejected": -73.57725524902344, + "logps/rejected": -77.20303344726562, + "loss": 1.3908, + "margin_dpo/margin_mean": 0.6476625800132751, + "margin_dpo/margin_std": 1.9961354732513428, + "step": 426 + }, + { + "epoch": 0.6455026455026455, + "fcm_dpo/beta": 0.7852897644042969, + "fcm_dpo/delta": -0.0677163228392601, + "fcm_dpo/margin": 1.3504165410995483, + "fcm_dpo/q_t": 0.33633100986480713, + "grad_norm": 163.32342529296875, + "learning_rate": 1.6948364510535218e-07, + "logits/chosen": 0.20361992716789246, + "logits/rejected": 0.16401880979537964, + "logps/chosen": -61.42845916748047, + "logps/ref_chosen": -58.64276885986328, + "logps/ref_rejected": -86.25437927246094, + "logps/rejected": -90.39048767089844, + "loss": 1.0482, + "margin_dpo/margin_mean": 1.350417137145996, + "margin_dpo/margin_std": 2.189502716064453, + "step": 427 + }, + { + "epoch": 0.6470143613000756, + "fcm_dpo/beta": 0.7506411671638489, + "fcm_dpo/delta": -0.2936919033527374, + "fcm_dpo/margin": 1.6753690242767334, + "fcm_dpo/q_t": 0.29713624715805054, + "grad_norm": 168.94140625, + "learning_rate": 1.6823300917064458e-07, + "logits/chosen": 0.1335376501083374, + "logits/rejected": 0.09132213890552521, + "logps/chosen": -69.21764373779297, + "logps/ref_chosen": -66.5960464477539, + "logps/ref_rejected": -82.3941650390625, + "logps/rejected": -86.69113159179688, + "loss": 0.8538, + "margin_dpo/margin_mean": 1.6753690242767334, + "margin_dpo/margin_std": 2.044569969177246, + "step": 428 + }, + { + "epoch": 0.6485260770975056, + "fcm_dpo/beta": 0.730757474899292, + "fcm_dpo/delta": -0.06324490904808044, + "fcm_dpo/margin": 1.4457752704620361, + "fcm_dpo/q_t": 0.3198990225791931, + "grad_norm": 177.0709991455078, + "learning_rate": 1.669846604344412e-07, + "logits/chosen": 0.13211305439472198, + "logits/rejected": 0.13196702301502228, + "logps/chosen": -59.602317810058594, + "logps/ref_chosen": -57.00970458984375, + "logps/ref_rejected": -59.86549377441406, + "logps/rejected": -63.90388488769531, + "loss": 0.969, + "margin_dpo/margin_mean": 1.4457753896713257, + "margin_dpo/margin_std": 2.0829176902770996, + "step": 429 + }, + { + "epoch": 0.6500377928949358, + "fcm_dpo/beta": 0.733278751373291, + "fcm_dpo/delta": 0.05039960518479347, + "fcm_dpo/margin": 1.3011215925216675, + "fcm_dpo/q_t": 0.3425254225730896, + "grad_norm": 174.14768981933594, + "learning_rate": 1.6573863381573954e-07, + "logits/chosen": 0.08477595448493958, + "logits/rejected": 0.0728166401386261, + "logps/chosen": -61.990821838378906, + "logps/ref_chosen": -59.563194274902344, + "logps/ref_rejected": -70.52289581298828, + "logps/rejected": -74.25164794921875, + "loss": 1.0004, + "margin_dpo/margin_mean": 1.3011209964752197, + "margin_dpo/margin_std": 1.9769561290740967, + "step": 430 + }, + { + "epoch": 0.6515495086923658, + "fcm_dpo/beta": 0.7297165393829346, + "fcm_dpo/delta": 0.036733031272888184, + "fcm_dpo/margin": 1.3228065967559814, + "fcm_dpo/q_t": 0.3405199646949768, + "grad_norm": 171.15907287597656, + "learning_rate": 1.6449496416858282e-07, + "logits/chosen": 0.17870327830314636, + "logits/rejected": 0.14549797773361206, + "logps/chosen": -52.56354904174805, + "logps/ref_chosen": -50.20032501220703, + "logps/ref_rejected": -77.81680297851562, + "logps/rejected": -81.50283813476562, + "loss": 1.0051, + "margin_dpo/margin_mean": 1.3228061199188232, + "margin_dpo/margin_std": 2.025378704071045, + "step": 431 + }, + { + "epoch": 0.6530612244897959, + "fcm_dpo/beta": 0.7157025337219238, + "fcm_dpo/delta": -0.10452289134263992, + "fcm_dpo/margin": 1.518718957901001, + "fcm_dpo/q_t": 0.32164376974105835, + "grad_norm": 169.30746459960938, + "learning_rate": 1.632536862810844e-07, + "logits/chosen": 0.17181310057640076, + "logits/rejected": 0.1360006034374237, + "logps/chosen": -64.20341491699219, + "logps/ref_chosen": -61.662757873535156, + "logps/ref_rejected": -83.94496154785156, + "logps/rejected": -88.00434112548828, + "loss": 0.9613, + "margin_dpo/margin_mean": 1.5187186002731323, + "margin_dpo/margin_std": 2.0636777877807617, + "step": 432 + }, + { + "epoch": 0.654572940287226, + "fcm_dpo/beta": 0.6925790309906006, + "fcm_dpo/delta": -0.28451234102249146, + "fcm_dpo/margin": 1.802809476852417, + "fcm_dpo/q_t": 0.3154519498348236, + "grad_norm": 145.2095947265625, + "learning_rate": 1.6201483487445515e-07, + "logits/chosen": 0.24505794048309326, + "logits/rejected": 0.23090487718582153, + "logps/chosen": -66.46299743652344, + "logps/ref_chosen": -63.72917938232422, + "logps/ref_rejected": -65.8391342163086, + "logps/rejected": -70.3757553100586, + "loss": 0.9081, + "margin_dpo/margin_mean": 1.802809476852417, + "margin_dpo/margin_std": 2.475156784057617, + "step": 433 + }, + { + "epoch": 0.656084656084656, + "fcm_dpo/beta": 0.659028947353363, + "fcm_dpo/delta": -0.13485188782215118, + "fcm_dpo/margin": 1.6889315843582153, + "fcm_dpo/q_t": 0.31248465180397034, + "grad_norm": 130.46426391601562, + "learning_rate": 1.6077844460203204e-07, + "logits/chosen": 0.21205420792102814, + "logits/rejected": 0.16366201639175415, + "logps/chosen": -50.280784606933594, + "logps/ref_chosen": -47.97331619262695, + "logps/ref_rejected": -72.51132202148438, + "logps/rejected": -76.50772857666016, + "loss": 0.9792, + "margin_dpo/margin_mean": 1.6889313459396362, + "margin_dpo/margin_std": 2.39209246635437, + "step": 434 + }, + { + "epoch": 0.6575963718820862, + "fcm_dpo/beta": 0.6766383647918701, + "fcm_dpo/delta": 0.04958092421293259, + "fcm_dpo/margin": 1.4080250263214111, + "fcm_dpo/q_t": 0.3341384530067444, + "grad_norm": 151.441162109375, + "learning_rate": 1.5954455004830878e-07, + "logits/chosen": 0.22931721806526184, + "logits/rejected": 0.1980956494808197, + "logps/chosen": -59.903053283691406, + "logps/ref_chosen": -57.06024932861328, + "logps/ref_rejected": -71.69146728515625, + "logps/rejected": -75.94229888916016, + "loss": 0.9779, + "margin_dpo/margin_mean": 1.4080252647399902, + "margin_dpo/margin_std": 2.0040295124053955, + "step": 435 + }, + { + "epoch": 0.6591080876795162, + "fcm_dpo/beta": 0.6710116863250732, + "fcm_dpo/delta": -0.004951075650751591, + "fcm_dpo/margin": 1.4969241619110107, + "fcm_dpo/q_t": 0.3257167935371399, + "grad_norm": 149.94261169433594, + "learning_rate": 1.5831318572796847e-07, + "logits/chosen": 0.16502049565315247, + "logits/rejected": 0.1188136488199234, + "logps/chosen": -58.86479949951172, + "logps/ref_chosen": -56.158050537109375, + "logps/ref_rejected": -67.63787841796875, + "logps/rejected": -71.841552734375, + "loss": 0.9499, + "margin_dpo/margin_mean": 1.4969241619110107, + "margin_dpo/margin_std": 2.013727903366089, + "step": 436 + }, + { + "epoch": 0.6606198034769464, + "fcm_dpo/beta": 0.6555310487747192, + "fcm_dpo/delta": 0.013455048203468323, + "fcm_dpo/margin": 1.48863685131073, + "fcm_dpo/q_t": 0.3591005504131317, + "grad_norm": 169.48338317871094, + "learning_rate": 1.5708438608491815e-07, + "logits/chosen": 0.15093836188316345, + "logits/rejected": 0.05515362694859505, + "logps/chosen": -59.8901252746582, + "logps/ref_chosen": -56.98578643798828, + "logps/ref_rejected": -85.61524963378906, + "logps/rejected": -90.00823974609375, + "loss": 1.1728, + "margin_dpo/margin_mean": 1.4886366128921509, + "margin_dpo/margin_std": 2.654061794281006, + "step": 437 + }, + { + "epoch": 0.6621315192743764, + "fcm_dpo/beta": 0.6835014820098877, + "fcm_dpo/delta": 0.04397985339164734, + "fcm_dpo/margin": 1.4018324613571167, + "fcm_dpo/q_t": 0.3333927392959595, + "grad_norm": 121.23139190673828, + "learning_rate": 1.558581854913253e-07, + "logits/chosen": 0.19322752952575684, + "logits/rejected": 0.14423127472400665, + "logps/chosen": -43.83991241455078, + "logps/ref_chosen": -41.27777862548828, + "logps/ref_rejected": -65.33840942382812, + "logps/rejected": -69.3023681640625, + "loss": 0.9826, + "margin_dpo/margin_mean": 1.401832938194275, + "margin_dpo/margin_std": 1.9659230709075928, + "step": 438 + }, + { + "epoch": 0.6636432350718064, + "fcm_dpo/beta": 0.6828280091285706, + "fcm_dpo/delta": -0.04300057888031006, + "fcm_dpo/margin": 1.5173701047897339, + "fcm_dpo/q_t": 0.31167930364608765, + "grad_norm": 186.42234802246094, + "learning_rate": 1.5463461824665658e-07, + "logits/chosen": 0.11366529762744904, + "logits/rejected": 0.09049699455499649, + "logps/chosen": -83.722900390625, + "logps/ref_chosen": -81.41764831542969, + "logps/ref_rejected": -94.72309875488281, + "logps/rejected": -98.54571533203125, + "loss": 0.9309, + "margin_dpo/margin_mean": 1.5173696279525757, + "margin_dpo/margin_std": 1.9623498916625977, + "step": 439 + }, + { + "epoch": 0.6651549508692366, + "fcm_dpo/beta": 0.65775465965271, + "fcm_dpo/delta": -0.16628237068653107, + "fcm_dpo/margin": 1.7444255352020264, + "fcm_dpo/q_t": 0.30622392892837524, + "grad_norm": 129.40713500976562, + "learning_rate": 1.534137185767178e-07, + "logits/chosen": 0.1228909119963646, + "logits/rejected": 0.04697669297456741, + "logps/chosen": -44.876808166503906, + "logps/ref_chosen": -42.538185119628906, + "logps/ref_rejected": -69.78813934326172, + "logps/rejected": -73.87118530273438, + "loss": 0.8618, + "margin_dpo/margin_mean": 1.744425654411316, + "margin_dpo/margin_std": 2.120790481567383, + "step": 440 + }, + { + "epoch": 0.6666666666666666, + "fcm_dpo/beta": 0.6342558860778809, + "fcm_dpo/delta": -0.15066742897033691, + "fcm_dpo/margin": 1.7861425876617432, + "fcm_dpo/q_t": 0.292005717754364, + "grad_norm": 122.53746032714844, + "learning_rate": 1.521955206326976e-07, + "logits/chosen": 0.14506568014621735, + "logits/rejected": 0.07649335265159607, + "logps/chosen": -59.81481170654297, + "logps/ref_chosen": -57.593223571777344, + "logps/ref_rejected": -84.82878875732422, + "logps/rejected": -88.83651733398438, + "loss": 0.7795, + "margin_dpo/margin_mean": 1.7861430644989014, + "margin_dpo/margin_std": 1.8194975852966309, + "step": 441 + }, + { + "epoch": 0.6681783824640968, + "fcm_dpo/beta": 0.6411465406417847, + "fcm_dpo/delta": 0.09939359128475189, + "fcm_dpo/margin": 1.418702483177185, + "fcm_dpo/q_t": 0.34212759137153625, + "grad_norm": 168.3711395263672, + "learning_rate": 1.5098005849021078e-07, + "logits/chosen": 0.19873833656311035, + "logits/rejected": 0.16300469636917114, + "logps/chosen": -70.19367980957031, + "logps/ref_chosen": -67.46121978759766, + "logps/ref_rejected": -89.0693588256836, + "logps/rejected": -93.22052001953125, + "loss": 0.9856, + "margin_dpo/margin_mean": 1.418702483177185, + "margin_dpo/margin_std": 2.081653594970703, + "step": 442 + }, + { + "epoch": 0.6696900982615268, + "fcm_dpo/beta": 0.6237589120864868, + "fcm_dpo/delta": -0.24319452047348022, + "fcm_dpo/margin": 1.9465469121932983, + "fcm_dpo/q_t": 0.29789623618125916, + "grad_norm": 127.22098541259766, + "learning_rate": 1.4976736614834662e-07, + "logits/chosen": 0.18308596312999725, + "logits/rejected": 0.12943433225154877, + "logps/chosen": -57.22307586669922, + "logps/ref_chosen": -54.79610061645508, + "logps/ref_rejected": -77.80781555175781, + "logps/rejected": -82.18133544921875, + "loss": 0.9041, + "margin_dpo/margin_mean": 1.946547269821167, + "margin_dpo/margin_std": 2.4835057258605957, + "step": 443 + }, + { + "epoch": 0.671201814058957, + "fcm_dpo/beta": 0.661973237991333, + "fcm_dpo/delta": 0.5227335691452026, + "fcm_dpo/margin": 0.7623451948165894, + "fcm_dpo/q_t": 0.4185434579849243, + "grad_norm": 196.5355224609375, + "learning_rate": 1.4855747752871654e-07, + "logits/chosen": 0.18811815977096558, + "logits/rejected": 0.1319284737110138, + "logps/chosen": -61.6188850402832, + "logps/ref_chosen": -58.749061584472656, + "logps/ref_rejected": -86.87396240234375, + "logps/rejected": -90.50614166259766, + "loss": 1.3947, + "margin_dpo/margin_mean": 0.7623450756072998, + "margin_dpo/margin_std": 2.2845544815063477, + "step": 444 + }, + { + "epoch": 0.672713529856387, + "fcm_dpo/beta": 0.6808522343635559, + "fcm_dpo/delta": -0.0005050599575042725, + "fcm_dpo/margin": 1.4694095849990845, + "fcm_dpo/q_t": 0.3312973380088806, + "grad_norm": 179.10009765625, + "learning_rate": 1.473504264745062e-07, + "logits/chosen": 0.1777781844139099, + "logits/rejected": 0.16238978505134583, + "logps/chosen": -64.01307678222656, + "logps/ref_chosen": -60.91743850708008, + "logps/ref_rejected": -71.5637435913086, + "logps/rejected": -76.1287841796875, + "loss": 1.0013, + "margin_dpo/margin_mean": 1.4694093465805054, + "margin_dpo/margin_std": 2.1590332984924316, + "step": 445 + }, + { + "epoch": 0.674225245653817, + "fcm_dpo/beta": 0.6526922583580017, + "fcm_dpo/delta": -0.4451631009578705, + "fcm_dpo/margin": 2.119361162185669, + "fcm_dpo/q_t": 0.25671201944351196, + "grad_norm": 107.62913513183594, + "learning_rate": 1.461462467495284e-07, + "logits/chosen": 0.19539491832256317, + "logits/rejected": 0.13630658388137817, + "logps/chosen": -51.429866790771484, + "logps/ref_chosen": -48.79924774169922, + "logps/ref_rejected": -71.8719482421875, + "logps/rejected": -76.62193298339844, + "loss": 0.6997, + "margin_dpo/margin_mean": 2.1193606853485107, + "margin_dpo/margin_std": 1.913917064666748, + "step": 446 + }, + { + "epoch": 0.6757369614512472, + "fcm_dpo/beta": 0.6015419363975525, + "fcm_dpo/delta": -0.2893209457397461, + "fcm_dpo/margin": 2.0844779014587402, + "fcm_dpo/q_t": 0.27232983708381653, + "grad_norm": 112.18714141845703, + "learning_rate": 1.4494497203727843e-07, + "logits/chosen": 0.1319853812456131, + "logits/rejected": 0.05933520570397377, + "logps/chosen": -55.8741569519043, + "logps/ref_chosen": -53.682716369628906, + "logps/ref_rejected": -88.17315673828125, + "logps/rejected": -92.44908142089844, + "loss": 0.8532, + "margin_dpo/margin_mean": 2.084477424621582, + "margin_dpo/margin_std": 2.4492878913879395, + "step": 447 + }, + { + "epoch": 0.6772486772486772, + "fcm_dpo/beta": 0.5884913206100464, + "fcm_dpo/delta": -8.67573544383049e-06, + "fcm_dpo/margin": 1.6992573738098145, + "fcm_dpo/q_t": 0.3187348246574402, + "grad_norm": 115.34869384765625, + "learning_rate": 1.4374663593999256e-07, + "logits/chosen": 0.17938536405563354, + "logits/rejected": 0.14003482460975647, + "logps/chosen": -56.258331298828125, + "logps/ref_chosen": -53.75125503540039, + "logps/ref_rejected": -77.17623901367188, + "logps/rejected": -81.382568359375, + "loss": 0.8966, + "margin_dpo/margin_mean": 1.6992576122283936, + "margin_dpo/margin_std": 2.116860866546631, + "step": 448 + }, + { + "epoch": 0.6787603930461074, + "fcm_dpo/beta": 0.6253612041473389, + "fcm_dpo/delta": 0.423923134803772, + "fcm_dpo/margin": 0.9630190134048462, + "fcm_dpo/q_t": 0.38473382592201233, + "grad_norm": 156.52899169921875, + "learning_rate": 1.4255127197770707e-07, + "logits/chosen": 0.07655443251132965, + "logits/rejected": 0.061458148062229156, + "logps/chosen": -78.82083129882812, + "logps/ref_chosen": -75.82737731933594, + "logps/ref_rejected": -82.20687866210938, + "logps/rejected": -86.16334533691406, + "loss": 1.1046, + "margin_dpo/margin_mean": 0.9630191326141357, + "margin_dpo/margin_std": 1.6913801431655884, + "step": 449 + }, + { + "epoch": 0.6802721088435374, + "fcm_dpo/beta": 0.6548283100128174, + "fcm_dpo/delta": 0.12034881114959717, + "fcm_dpo/margin": 1.3581256866455078, + "fcm_dpo/q_t": 0.35161659121513367, + "grad_norm": 150.02163696289062, + "learning_rate": 1.4135891358732205e-07, + "logits/chosen": 0.21886947751045227, + "logits/rejected": 0.1426752656698227, + "logps/chosen": -49.74790954589844, + "logps/ref_chosen": -47.11572265625, + "logps/ref_rejected": -78.7546615600586, + "logps/rejected": -82.7449722290039, + "loss": 1.0203, + "margin_dpo/margin_mean": 1.3581254482269287, + "margin_dpo/margin_std": 2.10986590385437, + "step": 450 + }, + { + "epoch": 0.6817838246409675, + "fcm_dpo/beta": 0.6705623865127563, + "fcm_dpo/delta": 0.17599515616893768, + "fcm_dpo/margin": 1.2513468265533447, + "fcm_dpo/q_t": 0.3569420576095581, + "grad_norm": 179.85128784179688, + "learning_rate": 1.4016959412166437e-07, + "logits/chosen": 0.18319055438041687, + "logits/rejected": 0.14441323280334473, + "logps/chosen": -66.11190795898438, + "logps/ref_chosen": -63.350440979003906, + "logps/ref_rejected": -76.28530883789062, + "logps/rejected": -80.29811096191406, + "loss": 1.0998, + "margin_dpo/margin_mean": 1.251347541809082, + "margin_dpo/margin_std": 2.137026786804199, + "step": 451 + }, + { + "epoch": 0.6832955404383976, + "fcm_dpo/beta": 0.6785616874694824, + "fcm_dpo/delta": -0.009446687065064907, + "fcm_dpo/margin": 1.486222743988037, + "fcm_dpo/q_t": 0.32352200150489807, + "grad_norm": 165.91653442382812, + "learning_rate": 1.3898334684855645e-07, + "logits/chosen": 0.13353146612644196, + "logits/rejected": 0.08063468337059021, + "logps/chosen": -58.241943359375, + "logps/ref_chosen": -55.58583450317383, + "logps/ref_rejected": -77.68738555908203, + "logps/rejected": -81.8297119140625, + "loss": 0.9588, + "margin_dpo/margin_mean": 1.4862233400344849, + "margin_dpo/margin_std": 2.007995128631592, + "step": 452 + }, + { + "epoch": 0.6848072562358276, + "fcm_dpo/beta": 0.6866965293884277, + "fcm_dpo/delta": 0.07683775573968887, + "fcm_dpo/margin": 1.354539394378662, + "fcm_dpo/q_t": 0.36106228828430176, + "grad_norm": 152.8515167236328, + "learning_rate": 1.3780020494988445e-07, + "logits/chosen": 0.12297318130731583, + "logits/rejected": 0.09657086431980133, + "logps/chosen": -64.26089477539062, + "logps/ref_chosen": -61.778202056884766, + "logps/ref_rejected": -71.51403045654297, + "logps/rejected": -75.35125732421875, + "loss": 1.083, + "margin_dpo/margin_mean": 1.354539394378662, + "margin_dpo/margin_std": 2.2785511016845703, + "step": 453 + }, + { + "epoch": 0.6863189720332578, + "fcm_dpo/beta": 0.6685348749160767, + "fcm_dpo/delta": -0.19035013020038605, + "fcm_dpo/margin": 1.7468868494033813, + "fcm_dpo/q_t": 0.3080099821090698, + "grad_norm": 127.66053771972656, + "learning_rate": 1.366202015206706e-07, + "logits/chosen": 0.16820810735225677, + "logits/rejected": 0.13530485332012177, + "logps/chosen": -53.909812927246094, + "logps/ref_chosen": -51.59515380859375, + "logps/ref_rejected": -63.96732711791992, + "logps/rejected": -68.02886962890625, + "loss": 0.9374, + "margin_dpo/margin_mean": 1.7468867301940918, + "margin_dpo/margin_std": 2.297898292541504, + "step": 454 + }, + { + "epoch": 0.6878306878306878, + "fcm_dpo/beta": 0.6465753316879272, + "fcm_dpo/delta": -0.1078774556517601, + "fcm_dpo/margin": 1.6916618347167969, + "fcm_dpo/q_t": 0.31326356530189514, + "grad_norm": 150.88607788085938, + "learning_rate": 1.354433695681474e-07, + "logits/chosen": 0.040207911282777786, + "logits/rejected": 0.007178250700235367, + "logps/chosen": -73.28399658203125, + "logps/ref_chosen": -70.65170288085938, + "logps/ref_rejected": -77.44276428222656, + "logps/rejected": -81.7667236328125, + "loss": 0.8921, + "margin_dpo/margin_mean": 1.6916615962982178, + "margin_dpo/margin_std": 2.1551051139831543, + "step": 455 + }, + { + "epoch": 0.6893424036281179, + "fcm_dpo/beta": 0.6534501910209656, + "fcm_dpo/delta": 0.01624855026602745, + "fcm_dpo/margin": 1.5070427656173706, + "fcm_dpo/q_t": 0.32177361845970154, + "grad_norm": 149.37515258789062, + "learning_rate": 1.3426974201083439e-07, + "logits/chosen": 0.11166486144065857, + "logits/rejected": 0.07302643358707428, + "logps/chosen": -59.19749450683594, + "logps/ref_chosen": -56.398284912109375, + "logps/ref_rejected": -82.61642456054688, + "logps/rejected": -86.92267608642578, + "loss": 0.921, + "margin_dpo/margin_mean": 1.507042646408081, + "margin_dpo/margin_std": 1.9450860023498535, + "step": 456 + }, + { + "epoch": 0.690854119425548, + "fcm_dpo/beta": 0.6657828092575073, + "fcm_dpo/delta": 0.21099001169204712, + "fcm_dpo/margin": 1.2110447883605957, + "fcm_dpo/q_t": 0.3547195792198181, + "grad_norm": 146.60675048828125, + "learning_rate": 1.3309935167761717e-07, + "logits/chosen": 0.20737716555595398, + "logits/rejected": 0.1518602967262268, + "logps/chosen": -47.50776290893555, + "logps/ref_chosen": -44.72057342529297, + "logps/ref_rejected": -68.1158676147461, + "logps/rejected": -72.11409759521484, + "loss": 1.0168, + "margin_dpo/margin_mean": 1.2110450267791748, + "margin_dpo/margin_std": 1.8031151294708252, + "step": 457 + }, + { + "epoch": 0.6923658352229781, + "fcm_dpo/beta": 0.6568002104759216, + "fcm_dpo/delta": -0.16649408638477325, + "fcm_dpo/margin": 1.7431389093399048, + "fcm_dpo/q_t": 0.2945740222930908, + "grad_norm": 143.60699462890625, + "learning_rate": 1.3193223130682936e-07, + "logits/chosen": 0.15916739404201508, + "logits/rejected": 0.07576747238636017, + "logps/chosen": -52.41571807861328, + "logps/ref_chosen": -50.00569152832031, + "logps/ref_rejected": -87.50015258789062, + "logps/rejected": -91.6533203125, + "loss": 0.8903, + "margin_dpo/margin_mean": 1.7431399822235107, + "margin_dpo/margin_std": 2.1261298656463623, + "step": 458 + }, + { + "epoch": 0.6938775510204082, + "fcm_dpo/beta": 0.6480120420455933, + "fcm_dpo/delta": -0.23831713199615479, + "fcm_dpo/margin": 1.8602559566497803, + "fcm_dpo/q_t": 0.2993336021900177, + "grad_norm": 136.50265502929688, + "learning_rate": 1.3076841354533658e-07, + "logits/chosen": 0.18282179534435272, + "logits/rejected": 0.15275558829307556, + "logps/chosen": -67.98956298828125, + "logps/ref_chosen": -65.37794494628906, + "logps/ref_rejected": -88.19244384765625, + "logps/rejected": -92.66431427001953, + "loss": 0.8494, + "margin_dpo/margin_mean": 1.8602561950683594, + "margin_dpo/margin_std": 2.121655225753784, + "step": 459 + }, + { + "epoch": 0.6953892668178382, + "fcm_dpo/beta": 0.6043037176132202, + "fcm_dpo/delta": -0.21189001202583313, + "fcm_dpo/margin": 1.9620928764343262, + "fcm_dpo/q_t": 0.3046306073665619, + "grad_norm": 148.3003692626953, + "learning_rate": 1.2960793094762345e-07, + "logits/chosen": 0.19387787580490112, + "logits/rejected": 0.09904222190380096, + "logps/chosen": -67.26277160644531, + "logps/ref_chosen": -64.5616683959961, + "logps/ref_rejected": -88.67890167236328, + "logps/rejected": -93.34209442138672, + "loss": 0.8275, + "margin_dpo/margin_mean": 1.962093472480774, + "margin_dpo/margin_std": 2.332686185836792, + "step": 460 + }, + { + "epoch": 0.6969009826152683, + "fcm_dpo/beta": 0.5892372131347656, + "fcm_dpo/delta": -0.03291664272546768, + "fcm_dpo/margin": 1.7409393787384033, + "fcm_dpo/q_t": 0.31874844431877136, + "grad_norm": 127.28734588623047, + "learning_rate": 1.2845081597488286e-07, + "logits/chosen": 0.23900935053825378, + "logits/rejected": 0.17159438133239746, + "logps/chosen": -52.00554275512695, + "logps/ref_chosen": -49.4779167175293, + "logps/ref_rejected": -72.65262603759766, + "logps/rejected": -76.92119598388672, + "loss": 0.9043, + "margin_dpo/margin_mean": 1.7409393787384033, + "margin_dpo/margin_std": 2.132420063018799, + "step": 461 + }, + { + "epoch": 0.6984126984126984, + "fcm_dpo/beta": 0.577314019203186, + "fcm_dpo/delta": -0.1604897826910019, + "fcm_dpo/margin": 1.9741871356964111, + "fcm_dpo/q_t": 0.2894290089607239, + "grad_norm": 113.71949005126953, + "learning_rate": 1.27297100994108e-07, + "logits/chosen": 0.13943126797676086, + "logits/rejected": 0.09149923920631409, + "logps/chosen": -63.15895080566406, + "logps/ref_chosen": -60.4951171875, + "logps/ref_rejected": -74.82136535644531, + "logps/rejected": -79.45939636230469, + "loss": 0.7873, + "margin_dpo/margin_mean": 1.9741871356964111, + "margin_dpo/margin_std": 2.1054553985595703, + "step": 462 + }, + { + "epoch": 0.6999244142101285, + "fcm_dpo/beta": 0.5867961049079895, + "fcm_dpo/delta": 0.11635659635066986, + "fcm_dpo/margin": 1.523716688156128, + "fcm_dpo/q_t": 0.3367578387260437, + "grad_norm": 127.23412322998047, + "learning_rate": 1.2614681827718695e-07, + "logits/chosen": 0.15458309650421143, + "logits/rejected": 0.14070303738117218, + "logps/chosen": -70.25871276855469, + "logps/ref_chosen": -67.68511962890625, + "logps/ref_rejected": -71.32196044921875, + "logps/rejected": -75.41926574707031, + "loss": 0.931, + "margin_dpo/margin_mean": 1.523715853691101, + "margin_dpo/margin_std": 1.9335708618164062, + "step": 463 + }, + { + "epoch": 0.7014361300075586, + "fcm_dpo/beta": 0.6116993427276611, + "fcm_dpo/delta": 0.15539291501045227, + "fcm_dpo/margin": 1.3953006267547607, + "fcm_dpo/q_t": 0.3521912395954132, + "grad_norm": 161.49534606933594, + "learning_rate": 1.2500000000000005e-07, + "logits/chosen": 0.12334546446800232, + "logits/rejected": 0.10288789868354797, + "logps/chosen": -61.994964599609375, + "logps/ref_chosen": -59.16564178466797, + "logps/ref_rejected": -69.56146240234375, + "logps/rejected": -73.78608703613281, + "loss": 1.0905, + "margin_dpo/margin_mean": 1.3953003883361816, + "margin_dpo/margin_std": 2.2576606273651123, + "step": 464 + }, + { + "epoch": 0.7029478458049887, + "fcm_dpo/beta": 0.6138174533843994, + "fcm_dpo/delta": 0.10352025926113129, + "fcm_dpo/margin": 1.4749349355697632, + "fcm_dpo/q_t": 0.3457157015800476, + "grad_norm": 138.18177795410156, + "learning_rate": 1.238566782415197e-07, + "logits/chosen": 0.2334330677986145, + "logits/rejected": 0.18456201255321503, + "logps/chosen": -61.38888168334961, + "logps/ref_chosen": -58.513671875, + "logps/ref_rejected": -84.31745910644531, + "logps/rejected": -88.6676025390625, + "loss": 1.0361, + "margin_dpo/margin_mean": 1.4749336242675781, + "margin_dpo/margin_std": 2.255413293838501, + "step": 465 + }, + { + "epoch": 0.7044595616024187, + "fcm_dpo/beta": 0.6493447422981262, + "fcm_dpo/delta": 0.3105998635292053, + "fcm_dpo/margin": 1.0969356298446655, + "fcm_dpo/q_t": 0.37098121643066406, + "grad_norm": 185.26171875, + "learning_rate": 1.2271688498291334e-07, + "logits/chosen": 0.18046687543392181, + "logits/rejected": 0.1757928878068924, + "logps/chosen": -76.58619689941406, + "logps/ref_chosen": -73.26580810546875, + "logps/ref_rejected": -74.83621215820312, + "logps/rejected": -79.25353240966797, + "loss": 1.0578, + "margin_dpo/margin_mean": 1.096935749053955, + "margin_dpo/margin_std": 1.7959051132202148, + "step": 466 + }, + { + "epoch": 0.7059712773998488, + "fcm_dpo/beta": 0.6433865427970886, + "fcm_dpo/delta": -0.17267094552516937, + "fcm_dpo/margin": 1.7906594276428223, + "fcm_dpo/q_t": 0.3067885637283325, + "grad_norm": 113.32935333251953, + "learning_rate": 1.2158065210664848e-07, + "logits/chosen": 0.16693203151226044, + "logits/rejected": 0.060477063059806824, + "logps/chosen": -50.42617416381836, + "logps/ref_chosen": -47.57947540283203, + "logps/ref_rejected": -78.68522644042969, + "logps/rejected": -83.32258605957031, + "loss": 0.8707, + "margin_dpo/margin_mean": 1.7906594276428223, + "margin_dpo/margin_std": 2.2650554180145264, + "step": 467 + }, + { + "epoch": 0.7074829931972789, + "fcm_dpo/beta": 0.6142877340316772, + "fcm_dpo/delta": -0.2895115911960602, + "fcm_dpo/margin": 2.040811538696289, + "fcm_dpo/q_t": 0.28548452258110046, + "grad_norm": 135.33714294433594, + "learning_rate": 1.204480113956011e-07, + "logits/chosen": 0.17146506905555725, + "logits/rejected": 0.1613762527704239, + "logps/chosen": -66.34950256347656, + "logps/ref_chosen": -63.92778778076172, + "logps/ref_rejected": -76.51626586914062, + "logps/rejected": -80.97879028320312, + "loss": 0.796, + "margin_dpo/margin_mean": 2.040811061859131, + "margin_dpo/margin_std": 2.2666611671447754, + "step": 468 + }, + { + "epoch": 0.708994708994709, + "fcm_dpo/beta": 0.5907766222953796, + "fcm_dpo/delta": -0.01965993642807007, + "fcm_dpo/margin": 1.710775375366211, + "fcm_dpo/q_t": 0.3215448260307312, + "grad_norm": 115.97638702392578, + "learning_rate": 1.1931899453216697e-07, + "logits/chosen": 0.21937254071235657, + "logits/rejected": 0.2032082974910736, + "logps/chosen": -61.57379150390625, + "logps/ref_chosen": -59.05818176269531, + "logps/ref_rejected": -75.67672729492188, + "logps/rejected": -79.90310668945312, + "loss": 0.8873, + "margin_dpo/margin_mean": 1.710775375366211, + "margin_dpo/margin_std": 2.0356462001800537, + "step": 469 + }, + { + "epoch": 0.7105064247921391, + "fcm_dpo/beta": 0.6098539233207703, + "fcm_dpo/delta": 0.04422697797417641, + "fcm_dpo/margin": 1.5718050003051758, + "fcm_dpo/q_t": 0.3227683901786804, + "grad_norm": 119.2257308959961, + "learning_rate": 1.1819363309737438e-07, + "logits/chosen": 0.13354477286338806, + "logits/rejected": 0.08637814223766327, + "logps/chosen": -50.68056106567383, + "logps/ref_chosen": -47.86743927001953, + "logps/ref_rejected": -65.96859741210938, + "logps/rejected": -70.353515625, + "loss": 0.9471, + "margin_dpo/margin_mean": 1.5718050003051758, + "margin_dpo/margin_std": 2.07285475730896, + "step": 470 + }, + { + "epoch": 0.7120181405895691, + "fcm_dpo/beta": 0.6002909541130066, + "fcm_dpo/delta": -0.08815348893404007, + "fcm_dpo/margin": 1.7969985008239746, + "fcm_dpo/q_t": 0.3020731508731842, + "grad_norm": 131.24606323242188, + "learning_rate": 1.1707195857000215e-07, + "logits/chosen": 0.1816762238740921, + "logits/rejected": 0.12994712591171265, + "logps/chosen": -60.345733642578125, + "logps/ref_chosen": -57.777854919433594, + "logps/ref_rejected": -73.81172180175781, + "logps/rejected": -78.17660522460938, + "loss": 0.9, + "margin_dpo/margin_mean": 1.796998381614685, + "margin_dpo/margin_std": 2.2218680381774902, + "step": 471 + }, + { + "epoch": 0.7135298563869993, + "fcm_dpo/beta": 0.6000721454620361, + "fcm_dpo/delta": 0.006265308707952499, + "fcm_dpo/margin": 1.6558948755264282, + "fcm_dpo/q_t": 0.3246491551399231, + "grad_norm": 141.67776489257812, + "learning_rate": 1.1595400232569768e-07, + "logits/chosen": 0.20169669389724731, + "logits/rejected": 0.15981845557689667, + "logps/chosen": -58.36172866821289, + "logps/ref_chosen": -55.908668518066406, + "logps/ref_rejected": -74.70294189453125, + "logps/rejected": -78.81190490722656, + "loss": 0.9713, + "margin_dpo/margin_mean": 1.6558947563171387, + "margin_dpo/margin_std": 2.2935879230499268, + "step": 472 + }, + { + "epoch": 0.7150415721844293, + "fcm_dpo/beta": 0.5857222080230713, + "fcm_dpo/delta": -0.07999872416257858, + "fcm_dpo/margin": 1.8269248008728027, + "fcm_dpo/q_t": 0.32797205448150635, + "grad_norm": 127.49575805664062, + "learning_rate": 1.1483979563610069e-07, + "logits/chosen": 0.23042967915534973, + "logits/rejected": 0.15509197115898132, + "logps/chosen": -56.60422134399414, + "logps/ref_chosen": -54.16088104248047, + "logps/ref_rejected": -92.76789855957031, + "logps/rejected": -97.03816223144531, + "loss": 0.9793, + "margin_dpo/margin_mean": 1.8269245624542236, + "margin_dpo/margin_std": 2.6278867721557617, + "step": 473 + }, + { + "epoch": 0.7165532879818595, + "fcm_dpo/beta": 0.6013132333755493, + "fcm_dpo/delta": 0.11396686732769012, + "fcm_dpo/margin": 1.4877792596817017, + "fcm_dpo/q_t": 0.34768766164779663, + "grad_norm": 151.1398162841797, + "learning_rate": 1.1372936966796709e-07, + "logits/chosen": 0.210426926612854, + "logits/rejected": 0.1586945503950119, + "logps/chosen": -49.76041793823242, + "logps/ref_chosen": -46.685707092285156, + "logps/ref_rejected": -71.44731903076172, + "logps/rejected": -76.00980377197266, + "loss": 1.0262, + "margin_dpo/margin_mean": 1.487779140472412, + "margin_dpo/margin_std": 2.2466931343078613, + "step": 474 + }, + { + "epoch": 0.7180650037792895, + "fcm_dpo/beta": 0.56545090675354, + "fcm_dpo/delta": -0.3536713421344757, + "fcm_dpo/margin": 2.305065631866455, + "fcm_dpo/q_t": 0.26997214555740356, + "grad_norm": 108.57855224609375, + "learning_rate": 1.126227554822985e-07, + "logits/chosen": 0.15623445808887482, + "logits/rejected": 0.11391064524650574, + "logps/chosen": -61.26993179321289, + "logps/ref_chosen": -58.4873046875, + "logps/ref_rejected": -87.00187683105469, + "logps/rejected": -92.08956909179688, + "loss": 0.7329, + "margin_dpo/margin_mean": 2.3050661087036133, + "margin_dpo/margin_std": 2.278512477874756, + "step": 475 + }, + { + "epoch": 0.7195767195767195, + "fcm_dpo/beta": 0.5778172016143799, + "fcm_dpo/delta": 0.17008031904697418, + "fcm_dpo/margin": 1.4578584432601929, + "fcm_dpo/q_t": 0.3523421287536621, + "grad_norm": 163.50177001953125, + "learning_rate": 1.1151998403347243e-07, + "logits/chosen": 0.11702927947044373, + "logits/rejected": 0.10260109603404999, + "logps/chosen": -78.54095458984375, + "logps/ref_chosen": -75.38162231445312, + "logps/ref_rejected": -76.99822235107422, + "logps/rejected": -81.61541748046875, + "loss": 1.0833, + "margin_dpo/margin_mean": 1.4578593969345093, + "margin_dpo/margin_std": 2.4156503677368164, + "step": 476 + }, + { + "epoch": 0.7210884353741497, + "fcm_dpo/beta": 0.591883659362793, + "fcm_dpo/delta": 0.1255907416343689, + "fcm_dpo/margin": 1.4951434135437012, + "fcm_dpo/q_t": 0.3490224778652191, + "grad_norm": 168.64578247070312, + "learning_rate": 1.1042108616837692e-07, + "logits/chosen": 0.19208115339279175, + "logits/rejected": 0.15968218445777893, + "logps/chosen": -64.10305786132812, + "logps/ref_chosen": -61.073387145996094, + "logps/ref_rejected": -81.34375, + "logps/rejected": -85.86856079101562, + "loss": 1.0765, + "margin_dpo/margin_mean": 1.495143175125122, + "margin_dpo/margin_std": 2.407855987548828, + "step": 477 + }, + { + "epoch": 0.7226001511715797, + "fcm_dpo/beta": 0.6088930368423462, + "fcm_dpo/delta": 0.1552659273147583, + "fcm_dpo/margin": 1.4085874557495117, + "fcm_dpo/q_t": 0.35270342230796814, + "grad_norm": 148.8921356201172, + "learning_rate": 1.0932609262554746e-07, + "logits/chosen": 0.12717093527317047, + "logits/rejected": 0.1243201345205307, + "logps/chosen": -59.892295837402344, + "logps/ref_chosen": -57.16731643676758, + "logps/ref_rejected": -53.30917739868164, + "logps/rejected": -57.44274139404297, + "loss": 1.0176, + "margin_dpo/margin_mean": 1.408586859703064, + "margin_dpo/margin_std": 2.1223185062408447, + "step": 478 + }, + { + "epoch": 0.7241118669690099, + "fcm_dpo/beta": 0.607843816280365, + "fcm_dpo/delta": 0.027871206402778625, + "fcm_dpo/margin": 1.5997779369354248, + "fcm_dpo/q_t": 0.3466408848762512, + "grad_norm": 140.39195251464844, + "learning_rate": 1.0823503403430734e-07, + "logits/chosen": 0.08278117328882217, + "logits/rejected": 0.03745885565876961, + "logps/chosen": -62.16324996948242, + "logps/ref_chosen": -58.91331481933594, + "logps/ref_rejected": -63.7403450012207, + "logps/rejected": -68.59005737304688, + "loss": 1.0645, + "margin_dpo/margin_mean": 1.5997782945632935, + "margin_dpo/margin_std": 2.665220260620117, + "step": 479 + }, + { + "epoch": 0.7256235827664399, + "fcm_dpo/beta": 0.6309263110160828, + "fcm_dpo/delta": 0.006531953811645508, + "fcm_dpo/margin": 1.5598734617233276, + "fcm_dpo/q_t": 0.31962987780570984, + "grad_norm": 165.5882568359375, + "learning_rate": 1.0714794091391072e-07, + "logits/chosen": 0.13291680812835693, + "logits/rejected": 0.1215020939707756, + "logps/chosen": -65.6745376586914, + "logps/ref_chosen": -62.80061340332031, + "logps/ref_rejected": -67.58859252929688, + "logps/rejected": -72.02239227294922, + "loss": 1.0675, + "margin_dpo/margin_mean": 1.5598732233047485, + "margin_dpo/margin_std": 2.3550682067871094, + "step": 480 + }, + { + "epoch": 0.72713529856387, + "fcm_dpo/beta": 0.6073616743087769, + "fcm_dpo/delta": -0.08397047966718674, + "fcm_dpo/margin": 1.7691869735717773, + "fcm_dpo/q_t": 0.3220970034599304, + "grad_norm": 130.34205627441406, + "learning_rate": 1.0606484367268906e-07, + "logits/chosen": 0.11158512532711029, + "logits/rejected": 0.10091142356395721, + "logps/chosen": -67.81123352050781, + "logps/ref_chosen": -65.28649139404297, + "logps/ref_rejected": -70.78668212890625, + "logps/rejected": -75.08061218261719, + "loss": 0.9249, + "margin_dpo/margin_mean": 1.7691867351531982, + "margin_dpo/margin_std": 2.550887107849121, + "step": 481 + }, + { + "epoch": 0.7286470143613001, + "fcm_dpo/beta": 0.620997428894043, + "fcm_dpo/delta": 0.16822174191474915, + "fcm_dpo/margin": 1.3626244068145752, + "fcm_dpo/q_t": 0.35040992498397827, + "grad_norm": 171.70579528808594, + "learning_rate": 1.0498577260720048e-07, + "logits/chosen": 0.12327564507722855, + "logits/rejected": 0.022273845970630646, + "logps/chosen": -63.85777282714844, + "logps/ref_chosen": -60.906185150146484, + "logps/ref_rejected": -103.44656372070312, + "logps/rejected": -107.76078033447266, + "loss": 1.0802, + "margin_dpo/margin_mean": 1.3626246452331543, + "margin_dpo/margin_std": 2.2782201766967773, + "step": 482 + }, + { + "epoch": 0.7301587301587301, + "fcm_dpo/beta": 0.6081717014312744, + "fcm_dpo/delta": -0.14480583369731903, + "fcm_dpo/margin": 1.8506314754486084, + "fcm_dpo/q_t": 0.31974995136260986, + "grad_norm": 137.3916473388672, + "learning_rate": 1.0391075790138232e-07, + "logits/chosen": 0.20215514302253723, + "logits/rejected": 0.13051192462444305, + "logps/chosen": -56.16196823120117, + "logps/ref_chosen": -53.192012786865234, + "logps/ref_rejected": -81.83927154541016, + "logps/rejected": -86.65986633300781, + "loss": 0.9449, + "margin_dpo/margin_mean": 1.850631594657898, + "margin_dpo/margin_std": 2.5201127529144287, + "step": 483 + }, + { + "epoch": 0.7316704459561603, + "fcm_dpo/beta": 0.6308771967887878, + "fcm_dpo/delta": 0.20309945940971375, + "fcm_dpo/margin": 1.2873001098632812, + "fcm_dpo/q_t": 0.3507644832134247, + "grad_norm": 157.5721435546875, + "learning_rate": 1.0283982962570681e-07, + "logits/chosen": 0.18846221268177032, + "logits/rejected": 0.15292689204216003, + "logps/chosen": -60.82984924316406, + "logps/ref_chosen": -57.76945877075195, + "logps/ref_rejected": -71.6829833984375, + "logps/rejected": -76.03067016601562, + "loss": 1.0035, + "margin_dpo/margin_mean": 1.2873002290725708, + "margin_dpo/margin_std": 1.9288554191589355, + "step": 484 + }, + { + "epoch": 0.7331821617535903, + "fcm_dpo/beta": 0.5989984273910522, + "fcm_dpo/delta": -0.20387829840183258, + "fcm_dpo/margin": 1.9372670650482178, + "fcm_dpo/q_t": 0.30228108167648315, + "grad_norm": 129.9238739013672, + "learning_rate": 1.0177301773633992e-07, + "logits/chosen": 0.1555819809436798, + "logits/rejected": 0.13119317591190338, + "logps/chosen": -59.23163604736328, + "logps/ref_chosen": -56.63584899902344, + "logps/ref_rejected": -70.85614013671875, + "logps/rejected": -75.38919067382812, + "loss": 0.8593, + "margin_dpo/margin_mean": 1.9372668266296387, + "margin_dpo/margin_std": 2.2261605262756348, + "step": 485 + }, + { + "epoch": 0.7346938775510204, + "fcm_dpo/beta": 0.6060769557952881, + "fcm_dpo/delta": -0.02147604152560234, + "fcm_dpo/margin": 1.6815690994262695, + "fcm_dpo/q_t": 0.34005385637283325, + "grad_norm": 140.710693359375, + "learning_rate": 1.007103520743035e-07, + "logits/chosen": 0.1885671615600586, + "logits/rejected": 0.10879142582416534, + "logps/chosen": -59.6024169921875, + "logps/ref_chosen": -56.347023010253906, + "logps/ref_rejected": -85.97221374511719, + "logps/rejected": -90.9091796875, + "loss": 1.0424, + "margin_dpo/margin_mean": 1.6815693378448486, + "margin_dpo/margin_std": 2.760887861251831, + "step": 486 + }, + { + "epoch": 0.7362055933484505, + "fcm_dpo/beta": 0.6121037006378174, + "fcm_dpo/delta": 0.00196036696434021, + "fcm_dpo/margin": 1.6285200119018555, + "fcm_dpo/q_t": 0.3259222209453583, + "grad_norm": 140.3463897705078, + "learning_rate": 9.965186236464046e-08, + "logits/chosen": 0.1968272179365158, + "logits/rejected": 0.15555810928344727, + "logps/chosen": -63.40202331542969, + "logps/ref_chosen": -60.617218017578125, + "logps/ref_rejected": -82.50975036621094, + "logps/rejected": -86.9230728149414, + "loss": 0.9054, + "margin_dpo/margin_mean": 1.6285200119018555, + "margin_dpo/margin_std": 2.1146717071533203, + "step": 487 + }, + { + "epoch": 0.7377173091458806, + "fcm_dpo/beta": 0.6010755300521851, + "fcm_dpo/delta": -0.20396147668361664, + "fcm_dpo/margin": 1.956758975982666, + "fcm_dpo/q_t": 0.29864153265953064, + "grad_norm": 125.88450622558594, + "learning_rate": 9.859757821558337e-08, + "logits/chosen": 0.1706887185573578, + "logits/rejected": 0.11105503141880035, + "logps/chosen": -65.75413513183594, + "logps/ref_chosen": -63.10905075073242, + "logps/ref_rejected": -82.49348449707031, + "logps/rejected": -87.09532165527344, + "loss": 0.829, + "margin_dpo/margin_mean": 1.956758737564087, + "margin_dpo/margin_std": 2.2036216259002686, + "step": 488 + }, + { + "epoch": 0.7392290249433107, + "fcm_dpo/beta": 0.6157445311546326, + "fcm_dpo/delta": 0.3702337145805359, + "fcm_dpo/margin": 1.0620077848434448, + "fcm_dpo/q_t": 0.3890204429626465, + "grad_norm": 165.01705932617188, + "learning_rate": 9.754752911772615e-08, + "logits/chosen": 0.19015483558177948, + "logits/rejected": 0.15638966858386993, + "logps/chosen": -67.7677993774414, + "logps/ref_chosen": -64.98896026611328, + "logps/ref_rejected": -84.39607238769531, + "logps/rejected": -88.23690795898438, + "loss": 1.2058, + "margin_dpo/margin_mean": 1.0620079040527344, + "margin_dpo/margin_std": 2.2157487869262695, + "step": 489 + }, + { + "epoch": 0.7407407407407407, + "fcm_dpo/beta": 0.6417911648750305, + "fcm_dpo/delta": 0.10067185759544373, + "fcm_dpo/margin": 1.4126074314117432, + "fcm_dpo/q_t": 0.36741340160369873, + "grad_norm": 186.93276977539062, + "learning_rate": 9.650174444319956e-08, + "logits/chosen": 0.22295230627059937, + "logits/rejected": 0.2012663632631302, + "logps/chosen": -64.97543334960938, + "logps/ref_chosen": -61.90874481201172, + "logps/ref_rejected": -70.58566284179688, + "logps/rejected": -75.06495666503906, + "loss": 1.1948, + "margin_dpo/margin_mean": 1.4126070737838745, + "margin_dpo/margin_std": 2.7005350589752197, + "step": 490 + }, + { + "epoch": 0.7422524565381708, + "fcm_dpo/beta": 0.6383862495422363, + "fcm_dpo/delta": 0.07852260023355484, + "fcm_dpo/margin": 1.450089931488037, + "fcm_dpo/q_t": 0.33126381039619446, + "grad_norm": 139.84234619140625, + "learning_rate": 9.546025344484868e-08, + "logits/chosen": 0.11358515918254852, + "logits/rejected": 0.0668938159942627, + "logps/chosen": -58.38109588623047, + "logps/ref_chosen": -55.47570037841797, + "logps/ref_rejected": -78.70318603515625, + "logps/rejected": -83.05867004394531, + "loss": 0.9724, + "margin_dpo/margin_mean": 1.4500904083251953, + "margin_dpo/margin_std": 1.9347925186157227, + "step": 491 + }, + { + "epoch": 0.7437641723356009, + "fcm_dpo/beta": 0.6746935844421387, + "fcm_dpo/delta": 0.11538802087306976, + "fcm_dpo/margin": 1.3122856616973877, + "fcm_dpo/q_t": 0.3527218699455261, + "grad_norm": 183.52841186523438, + "learning_rate": 9.442308525541589e-08, + "logits/chosen": 0.15461723506450653, + "logits/rejected": 0.09914899617433548, + "logps/chosen": -70.79060363769531, + "logps/ref_chosen": -67.28638458251953, + "logps/ref_rejected": -82.78628540039062, + "logps/rejected": -87.60279846191406, + "loss": 1.1795, + "margin_dpo/margin_mean": 1.3122851848602295, + "margin_dpo/margin_std": 2.379267692565918, + "step": 492 + }, + { + "epoch": 0.745275888133031, + "fcm_dpo/beta": 0.6467149257659912, + "fcm_dpo/delta": -0.2506517171859741, + "fcm_dpo/margin": 1.8874269723892212, + "fcm_dpo/q_t": 0.2871362268924713, + "grad_norm": 143.90199279785156, + "learning_rate": 9.339026888672468e-08, + "logits/chosen": 0.13737066090106964, + "logits/rejected": 0.0812433660030365, + "logps/chosen": -58.82109451293945, + "logps/ref_chosen": -55.92750549316406, + "logps/ref_rejected": -79.12149810791016, + "logps/rejected": -83.90251159667969, + "loss": 0.8709, + "margin_dpo/margin_mean": 1.8874274492263794, + "margin_dpo/margin_std": 2.288954734802246, + "step": 493 + }, + { + "epoch": 0.7467876039304611, + "fcm_dpo/beta": 0.6492782831192017, + "fcm_dpo/delta": 0.15562888979911804, + "fcm_dpo/margin": 1.3203234672546387, + "fcm_dpo/q_t": 0.3562992513179779, + "grad_norm": 196.9374542236328, + "learning_rate": 9.236183322886945e-08, + "logits/chosen": 0.06576605886220932, + "logits/rejected": 0.027181722223758698, + "logps/chosen": -70.94686126708984, + "logps/ref_chosen": -67.95410919189453, + "logps/ref_rejected": -90.50865173339844, + "logps/rejected": -94.82173156738281, + "loss": 1.1436, + "margin_dpo/margin_mean": 1.3203232288360596, + "margin_dpo/margin_std": 2.456470251083374, + "step": 494 + }, + { + "epoch": 0.7482993197278912, + "fcm_dpo/beta": 0.6703627109527588, + "fcm_dpo/delta": 0.14631986618041992, + "fcm_dpo/margin": 1.2908090353012085, + "fcm_dpo/q_t": 0.35358014702796936, + "grad_norm": 143.63949584960938, + "learning_rate": 9.133780704940594e-08, + "logits/chosen": 0.21046996116638184, + "logits/rejected": 0.15889891982078552, + "logps/chosen": -55.39236068725586, + "logps/ref_chosen": -52.62546157836914, + "logps/ref_rejected": -72.06781005859375, + "logps/rejected": -76.12551879882812, + "loss": 1.0334, + "margin_dpo/margin_mean": 1.2908086776733398, + "margin_dpo/margin_std": 2.0524096488952637, + "step": 495 + }, + { + "epoch": 0.7498110355253212, + "fcm_dpo/beta": 0.6439297199249268, + "fcm_dpo/delta": -0.214058056473732, + "fcm_dpo/margin": 1.8385138511657715, + "fcm_dpo/q_t": 0.33099353313446045, + "grad_norm": 153.26148986816406, + "learning_rate": 9.031821899254797e-08, + "logits/chosen": 0.17050223052501678, + "logits/rejected": 0.08947437256574631, + "logps/chosen": -60.537879943847656, + "logps/ref_chosen": -57.597320556640625, + "logps/ref_rejected": -94.36127471923828, + "logps/rejected": -99.14034271240234, + "loss": 1.0009, + "margin_dpo/margin_mean": 1.838512897491455, + "margin_dpo/margin_std": 2.8675289154052734, + "step": 496 + }, + { + "epoch": 0.7513227513227513, + "fcm_dpo/beta": 0.5975298881530762, + "fcm_dpo/delta": -0.43948090076446533, + "fcm_dpo/margin": 2.297381639480591, + "fcm_dpo/q_t": 0.27814143896102905, + "grad_norm": 137.8128204345703, + "learning_rate": 8.930309757836516e-08, + "logits/chosen": 0.20551130175590515, + "logits/rejected": 0.17609372735023499, + "logps/chosen": -75.94544982910156, + "logps/ref_chosen": -72.78994750976562, + "logps/ref_rejected": -89.48483276367188, + "logps/rejected": -94.9377212524414, + "loss": 0.8075, + "margin_dpo/margin_mean": 2.29738187789917, + "margin_dpo/margin_std": 2.609776258468628, + "step": 497 + }, + { + "epoch": 0.7528344671201814, + "fcm_dpo/beta": 0.5923163294792175, + "fcm_dpo/delta": -0.017407868057489395, + "fcm_dpo/margin": 1.7135266065597534, + "fcm_dpo/q_t": 0.3283523619174957, + "grad_norm": 158.02391052246094, + "learning_rate": 8.829247120198563e-08, + "logits/chosen": 0.17303167283535004, + "logits/rejected": 0.14456358551979065, + "logps/chosen": -71.15170288085938, + "logps/ref_chosen": -68.36572265625, + "logps/ref_rejected": -71.28846740722656, + "logps/rejected": -75.78797912597656, + "loss": 0.9063, + "margin_dpo/margin_mean": 1.713526964187622, + "margin_dpo/margin_std": 2.233605146408081, + "step": 498 + }, + { + "epoch": 0.7543461829176115, + "fcm_dpo/beta": 0.594528317451477, + "fcm_dpo/delta": 0.08794374763965607, + "fcm_dpo/margin": 1.5478346347808838, + "fcm_dpo/q_t": 0.35329633951187134, + "grad_norm": 145.75146484375, + "learning_rate": 8.728636813280163e-08, + "logits/chosen": 0.16119879484176636, + "logits/rejected": 0.10922683030366898, + "logps/chosen": -64.74832916259766, + "logps/ref_chosen": -61.90882873535156, + "logps/ref_rejected": -91.9411392211914, + "logps/rejected": -96.32847595214844, + "loss": 1.121, + "margin_dpo/margin_mean": 1.547835111618042, + "margin_dpo/margin_std": 2.651371479034424, + "step": 499 + }, + { + "epoch": 0.7558578987150416, + "fcm_dpo/beta": 0.6114327311515808, + "fcm_dpo/delta": 0.14150665700435638, + "fcm_dpo/margin": 1.4240680932998657, + "fcm_dpo/q_t": 0.35586458444595337, + "grad_norm": 174.01441955566406, + "learning_rate": 8.628481651367875e-08, + "logits/chosen": 0.12461017072200775, + "logits/rejected": 0.11526093631982803, + "logps/chosen": -73.16278076171875, + "logps/ref_chosen": -70.225830078125, + "logps/ref_rejected": -71.72203063964844, + "logps/rejected": -76.08305358886719, + "loss": 1.1357, + "margin_dpo/margin_mean": 1.4240679740905762, + "margin_dpo/margin_std": 2.5242857933044434, + "step": 500 + }, + { + "epoch": 0.7573696145124716, + "fcm_dpo/beta": 0.6245852708816528, + "fcm_dpo/delta": 0.013538122177124023, + "fcm_dpo/margin": 1.5765228271484375, + "fcm_dpo/q_t": 0.3233751058578491, + "grad_norm": 121.06085205078125, + "learning_rate": 8.528784436016878e-08, + "logits/chosen": 0.15389983355998993, + "logits/rejected": 0.13935419917106628, + "logps/chosen": -67.49052429199219, + "logps/ref_chosen": -64.59880828857422, + "logps/ref_rejected": -70.59329223632812, + "logps/rejected": -75.0615234375, + "loss": 0.8765, + "margin_dpo/margin_mean": 1.5765225887298584, + "margin_dpo/margin_std": 1.870557188987732, + "step": 501 + }, + { + "epoch": 0.7588813303099018, + "fcm_dpo/beta": 0.6334064602851868, + "fcm_dpo/delta": 0.15443843603134155, + "fcm_dpo/margin": 1.3549081087112427, + "fcm_dpo/q_t": 0.3447534441947937, + "grad_norm": 170.02090454101562, + "learning_rate": 8.4295479559726e-08, + "logits/chosen": 0.17954713106155396, + "logits/rejected": 0.14447346329689026, + "logps/chosen": -68.44071960449219, + "logps/ref_chosen": -65.46662902832031, + "logps/ref_rejected": -90.22233581542969, + "logps/rejected": -94.55133056640625, + "loss": 1.0121, + "margin_dpo/margin_mean": 1.3549081087112427, + "margin_dpo/margin_std": 2.0818443298339844, + "step": 502 + }, + { + "epoch": 0.7603930461073318, + "fcm_dpo/beta": 0.6419456005096436, + "fcm_dpo/delta": 0.04625112935900688, + "fcm_dpo/margin": 1.4924449920654297, + "fcm_dpo/q_t": 0.3314594030380249, + "grad_norm": 147.28880310058594, + "learning_rate": 8.330774987092712e-08, + "logits/chosen": 0.16441036760807037, + "logits/rejected": 0.14501985907554626, + "logps/chosen": -54.53971862792969, + "logps/ref_chosen": -51.83476257324219, + "logps/ref_rejected": -57.62522506713867, + "logps/rejected": -61.82262420654297, + "loss": 0.9904, + "margin_dpo/margin_mean": 1.4924452304840088, + "margin_dpo/margin_std": 2.1151676177978516, + "step": 503 + }, + { + "epoch": 0.7619047619047619, + "fcm_dpo/beta": 0.6248334646224976, + "fcm_dpo/delta": -0.23319105803966522, + "fcm_dpo/margin": 1.9295209646224976, + "fcm_dpo/q_t": 0.295588880777359, + "grad_norm": 145.23736572265625, + "learning_rate": 8.232468292269479e-08, + "logits/chosen": 0.12036092579364777, + "logits/rejected": 0.0984935611486435, + "logps/chosen": -71.39968872070312, + "logps/ref_chosen": -68.65119934082031, + "logps/ref_rejected": -77.91394805908203, + "logps/rejected": -82.59195709228516, + "loss": 0.7956, + "margin_dpo/margin_mean": 1.929521083831787, + "margin_dpo/margin_std": 2.150576114654541, + "step": 504 + }, + { + "epoch": 0.763416477702192, + "fcm_dpo/beta": 0.6214442253112793, + "fcm_dpo/delta": 0.14665237069129944, + "fcm_dpo/margin": 1.393211007118225, + "fcm_dpo/q_t": 0.35855334997177124, + "grad_norm": 172.9752960205078, + "learning_rate": 8.134630621352483e-08, + "logits/chosen": 0.177079439163208, + "logits/rejected": 0.14601781964302063, + "logps/chosen": -62.98767852783203, + "logps/ref_chosen": -59.99884796142578, + "logps/ref_rejected": -76.88048553466797, + "logps/rejected": -81.26252746582031, + "loss": 1.0917, + "margin_dpo/margin_mean": 1.3932104110717773, + "margin_dpo/margin_std": 2.397246837615967, + "step": 505 + }, + { + "epoch": 0.764928193499622, + "fcm_dpo/beta": 0.627830982208252, + "fcm_dpo/delta": 0.032355912029743195, + "fcm_dpo/margin": 1.54330575466156, + "fcm_dpo/q_t": 0.3359745740890503, + "grad_norm": 169.81752014160156, + "learning_rate": 8.037264711071698e-08, + "logits/chosen": 0.1882736086845398, + "logits/rejected": 0.166158989071846, + "logps/chosen": -72.7292251586914, + "logps/ref_chosen": -70.07130432128906, + "logps/ref_rejected": -82.03775024414062, + "logps/rejected": -86.23898315429688, + "loss": 1.0458, + "margin_dpo/margin_mean": 1.54330575466156, + "margin_dpo/margin_std": 2.4680933952331543, + "step": 506 + }, + { + "epoch": 0.7664399092970522, + "fcm_dpo/beta": 0.6320427656173706, + "fcm_dpo/delta": -0.013070136308670044, + "fcm_dpo/margin": 1.600289225578308, + "fcm_dpo/q_t": 0.3405856490135193, + "grad_norm": 162.6297607421875, + "learning_rate": 7.940373284960933e-08, + "logits/chosen": 0.16370144486427307, + "logits/rejected": 0.1263059675693512, + "logps/chosen": -74.96257019042969, + "logps/ref_chosen": -72.00703430175781, + "logps/ref_rejected": -93.94987487792969, + "logps/rejected": -98.50569152832031, + "loss": 1.032, + "margin_dpo/margin_mean": 1.6002895832061768, + "margin_dpo/margin_std": 2.5143651962280273, + "step": 507 + }, + { + "epoch": 0.7679516250944822, + "fcm_dpo/beta": 0.6303126811981201, + "fcm_dpo/delta": -0.09110675752162933, + "fcm_dpo/margin": 1.7146742343902588, + "fcm_dpo/q_t": 0.3268600106239319, + "grad_norm": 160.6134033203125, + "learning_rate": 7.843959053281663e-08, + "logits/chosen": 0.1638413965702057, + "logits/rejected": 0.07367105782032013, + "logps/chosen": -62.97923278808594, + "logps/ref_chosen": -60.21992492675781, + "logps/ref_rejected": -95.9200668334961, + "logps/rejected": -100.39404296875, + "loss": 0.9646, + "margin_dpo/margin_mean": 1.7146737575531006, + "margin_dpo/margin_std": 2.4505600929260254, + "step": 508 + }, + { + "epoch": 0.7694633408919124, + "fcm_dpo/beta": 0.6267092227935791, + "fcm_dpo/delta": 0.0032510310411453247, + "fcm_dpo/margin": 1.5897384881973267, + "fcm_dpo/q_t": 0.3286612629890442, + "grad_norm": 157.1903839111328, + "learning_rate": 7.748024712947204e-08, + "logits/chosen": 0.13599814474582672, + "logits/rejected": 0.11172134429216385, + "logps/chosen": -69.07527160644531, + "logps/ref_chosen": -66.27017211914062, + "logps/ref_rejected": -71.73065185546875, + "logps/rejected": -76.12548828125, + "loss": 0.9953, + "margin_dpo/margin_mean": 1.589739203453064, + "margin_dpo/margin_std": 2.3416152000427246, + "step": 509 + }, + { + "epoch": 0.7709750566893424, + "fcm_dpo/beta": 0.6256568431854248, + "fcm_dpo/delta": 0.07200966775417328, + "fcm_dpo/margin": 1.4934487342834473, + "fcm_dpo/q_t": 0.3356286585330963, + "grad_norm": 166.45079040527344, + "learning_rate": 7.652572947447272e-08, + "logits/chosen": 0.22737839818000793, + "logits/rejected": 0.16066043078899384, + "logps/chosen": -56.61336898803711, + "logps/ref_chosen": -53.54487609863281, + "logps/ref_rejected": -91.36648559570312, + "logps/rejected": -95.92843627929688, + "loss": 1.005, + "margin_dpo/margin_mean": 1.4934483766555786, + "margin_dpo/margin_std": 2.2340614795684814, + "step": 510 + }, + { + "epoch": 0.7724867724867724, + "fcm_dpo/beta": 0.5967855453491211, + "fcm_dpo/delta": -0.3847648501396179, + "fcm_dpo/margin": 2.233816146850586, + "fcm_dpo/q_t": 0.2725888192653656, + "grad_norm": 127.65147399902344, + "learning_rate": 7.557606426772961e-08, + "logits/chosen": 0.19227483868598938, + "logits/rejected": 0.1546470820903778, + "logps/chosen": -58.66801452636719, + "logps/ref_chosen": -55.844383239746094, + "logps/ref_rejected": -86.49819946289062, + "logps/rejected": -91.55564880371094, + "loss": 0.7557, + "margin_dpo/margin_mean": 2.233816623687744, + "margin_dpo/margin_std": 2.2734181880950928, + "step": 511 + }, + { + "epoch": 0.7739984882842026, + "fcm_dpo/beta": 0.5729248523712158, + "fcm_dpo/delta": -0.2912992537021637, + "fcm_dpo/margin": 2.1851539611816406, + "fcm_dpo/q_t": 0.3602805733680725, + "grad_norm": 148.83749389648438, + "learning_rate": 7.463127807341966e-08, + "logits/chosen": 0.06669703125953674, + "logits/rejected": 0.05016005039215088, + "logps/chosen": -64.66706085205078, + "logps/ref_chosen": -61.653038024902344, + "logps/ref_rejected": -72.83148193359375, + "logps/rejected": -78.03065490722656, + "loss": 1.1281, + "margin_dpo/margin_mean": 2.185153007507324, + "margin_dpo/margin_std": 6.0767316818237305, + "step": 512 + }, + { + "epoch": 0.7755102040816326, + "fcm_dpo/beta": 0.5423198938369751, + "fcm_dpo/delta": -0.018883943557739258, + "fcm_dpo/margin": 1.8668808937072754, + "fcm_dpo/q_t": 0.3136303424835205, + "grad_norm": 92.23009490966797, + "learning_rate": 7.369139731924401e-08, + "logits/chosen": 0.27449309825897217, + "logits/rejected": 0.23453694581985474, + "logps/chosen": -53.48218536376953, + "logps/ref_chosen": -50.85256576538086, + "logps/ref_rejected": -69.21754455566406, + "logps/rejected": -73.71403503417969, + "loss": 0.8408, + "margin_dpo/margin_mean": 1.8668807744979858, + "margin_dpo/margin_std": 2.0499701499938965, + "step": 513 + }, + { + "epoch": 0.7770219198790628, + "fcm_dpo/beta": 0.5583192110061646, + "fcm_dpo/delta": 0.07093075662851334, + "fcm_dpo/margin": 1.6746280193328857, + "fcm_dpo/q_t": 0.3376282751560211, + "grad_norm": 142.2669677734375, + "learning_rate": 7.275644829568747e-08, + "logits/chosen": 0.20312434434890747, + "logits/rejected": 0.16620582342147827, + "logps/chosen": -72.64167785644531, + "logps/ref_chosen": -69.38493347167969, + "logps/ref_rejected": -83.32447814941406, + "logps/rejected": -88.25584411621094, + "loss": 1.0344, + "margin_dpo/margin_mean": 1.674628496170044, + "margin_dpo/margin_std": 2.6116552352905273, + "step": 514 + }, + { + "epoch": 0.7785336356764928, + "fcm_dpo/beta": 0.5502352118492126, + "fcm_dpo/delta": -0.08825686573982239, + "fcm_dpo/margin": 1.9603986740112305, + "fcm_dpo/q_t": 0.31272023916244507, + "grad_norm": 118.43267822265625, + "learning_rate": 7.182645715528435e-08, + "logits/chosen": 0.18149125576019287, + "logits/rejected": 0.11726510524749756, + "logps/chosen": -56.91847610473633, + "logps/ref_chosen": -53.687034606933594, + "logps/ref_rejected": -83.59614562988281, + "logps/rejected": -88.78797912597656, + "loss": 0.8794, + "margin_dpo/margin_mean": 1.9603983163833618, + "margin_dpo/margin_std": 2.4689695835113525, + "step": 515 + }, + { + "epoch": 0.780045351473923, + "fcm_dpo/beta": 0.5598210096359253, + "fcm_dpo/delta": 0.140054851770401, + "fcm_dpo/margin": 1.5577726364135742, + "fcm_dpo/q_t": 0.33335772156715393, + "grad_norm": 122.2513427734375, + "learning_rate": 7.090144991188568e-08, + "logits/chosen": 0.13685812056064606, + "logits/rejected": 0.0911111831665039, + "logps/chosen": -59.56391143798828, + "logps/ref_chosen": -56.9017219543457, + "logps/ref_rejected": -67.83477783203125, + "logps/rejected": -72.05474090576172, + "loss": 0.9382, + "margin_dpo/margin_mean": 1.557773470878601, + "margin_dpo/margin_std": 2.0453054904937744, + "step": 516 + }, + { + "epoch": 0.781557067271353, + "fcm_dpo/beta": 0.5620474815368652, + "fcm_dpo/delta": 0.013102632015943527, + "fcm_dpo/margin": 1.7575607299804688, + "fcm_dpo/q_t": 0.3347151577472687, + "grad_norm": 115.9723892211914, + "learning_rate": 6.998145243993284e-08, + "logits/chosen": 0.18582028150558472, + "logits/rejected": 0.17517045140266418, + "logps/chosen": -64.92308044433594, + "logps/ref_chosen": -61.775142669677734, + "logps/ref_rejected": -62.88270950317383, + "logps/rejected": -67.78820037841797, + "loss": 0.9368, + "margin_dpo/margin_mean": 1.7575602531433105, + "margin_dpo/margin_std": 2.4262712001800537, + "step": 517 + }, + { + "epoch": 0.783068783068783, + "fcm_dpo/beta": 0.5985446572303772, + "fcm_dpo/delta": 0.34289732575416565, + "fcm_dpo/margin": 1.131626844406128, + "fcm_dpo/q_t": 0.38726097345352173, + "grad_norm": 139.290283203125, + "learning_rate": 6.906649047373245e-08, + "logits/chosen": 0.19278889894485474, + "logits/rejected": 0.15261326730251312, + "logps/chosen": -64.96379089355469, + "logps/ref_chosen": -62.02523422241211, + "logps/ref_rejected": -79.06085205078125, + "logps/rejected": -83.13102722167969, + "loss": 1.1567, + "margin_dpo/margin_mean": 1.1316269636154175, + "margin_dpo/margin_std": 2.253190517425537, + "step": 518 + }, + { + "epoch": 0.7845804988662132, + "fcm_dpo/beta": 0.6441134214401245, + "fcm_dpo/delta": 0.37903302907943726, + "fcm_dpo/margin": 0.9974351525306702, + "fcm_dpo/q_t": 0.4084378778934479, + "grad_norm": 205.64553833007812, + "learning_rate": 6.815658960673781e-08, + "logits/chosen": 0.20801779627799988, + "logits/rejected": 0.15976354479789734, + "logps/chosen": -65.09986877441406, + "logps/ref_chosen": -61.60636901855469, + "logps/ref_rejected": -74.50727844238281, + "logps/rejected": -78.99821472167969, + "loss": 1.4602, + "margin_dpo/margin_mean": 0.9974346160888672, + "margin_dpo/margin_std": 2.888352870941162, + "step": 519 + }, + { + "epoch": 0.7860922146636432, + "fcm_dpo/beta": 0.6441489458084106, + "fcm_dpo/delta": -0.010032668709754944, + "fcm_dpo/margin": 1.562050461769104, + "fcm_dpo/q_t": 0.3334931433200836, + "grad_norm": 156.4764404296875, + "learning_rate": 6.725177529083209e-08, + "logits/chosen": 0.23202162981033325, + "logits/rejected": 0.18750135600566864, + "logps/chosen": -65.67581939697266, + "logps/ref_chosen": -62.87343215942383, + "logps/ref_rejected": -76.505615234375, + "logps/rejected": -80.87004852294922, + "loss": 0.9578, + "margin_dpo/margin_mean": 1.5620505809783936, + "margin_dpo/margin_std": 2.2463040351867676, + "step": 520 + }, + { + "epoch": 0.7876039304610734, + "fcm_dpo/beta": 0.6531983017921448, + "fcm_dpo/delta": 0.006205732002854347, + "fcm_dpo/margin": 1.5222787857055664, + "fcm_dpo/q_t": 0.32361727952957153, + "grad_norm": 157.93814086914062, + "learning_rate": 6.63520728356167e-08, + "logits/chosen": 0.1032562106847763, + "logits/rejected": 0.04845578968524933, + "logps/chosen": -67.12205505371094, + "logps/ref_chosen": -64.20668029785156, + "logps/ref_rejected": -92.28083038330078, + "logps/rejected": -96.71849060058594, + "loss": 0.9371, + "margin_dpo/margin_mean": 1.5222779512405396, + "margin_dpo/margin_std": 2.023488759994507, + "step": 521 + }, + { + "epoch": 0.7891156462585034, + "fcm_dpo/beta": 0.6555431485176086, + "fcm_dpo/delta": 0.03148447349667549, + "fcm_dpo/margin": 1.48207688331604, + "fcm_dpo/q_t": 0.32747435569763184, + "grad_norm": 147.4585723876953, + "learning_rate": 6.545750740770336e-08, + "logits/chosen": 0.16423772275447845, + "logits/rejected": 0.14849795401096344, + "logps/chosen": -61.12641525268555, + "logps/ref_chosen": -58.369720458984375, + "logps/ref_rejected": -68.79248046875, + "logps/rejected": -73.03125, + "loss": 1.0698, + "margin_dpo/margin_mean": 1.4820764064788818, + "margin_dpo/margin_std": 2.3686909675598145, + "step": 522 + }, + { + "epoch": 0.7906273620559335, + "fcm_dpo/beta": 0.6287499666213989, + "fcm_dpo/delta": -0.24582098424434662, + "fcm_dpo/margin": 1.9300764799118042, + "fcm_dpo/q_t": 0.2935262620449066, + "grad_norm": 161.8909454345703, + "learning_rate": 6.456810403001012e-08, + "logits/chosen": 0.17682617902755737, + "logits/rejected": 0.09023305773735046, + "logps/chosen": -68.99554443359375, + "logps/ref_chosen": -65.71324157714844, + "logps/ref_rejected": -91.98896789550781, + "logps/rejected": -97.20135498046875, + "loss": 0.9505, + "margin_dpo/margin_mean": 1.9300763607025146, + "margin_dpo/margin_std": 2.5030922889709473, + "step": 523 + }, + { + "epoch": 0.7921390778533636, + "fcm_dpo/beta": 0.642947256565094, + "fcm_dpo/delta": 0.15807999670505524, + "fcm_dpo/margin": 1.3285942077636719, + "fcm_dpo/q_t": 0.34164804220199585, + "grad_norm": 157.54954528808594, + "learning_rate": 6.368388758106134e-08, + "logits/chosen": 0.14576482772827148, + "logits/rejected": 0.12724286317825317, + "logps/chosen": -78.79303741455078, + "logps/ref_chosen": -76.35124969482422, + "logps/ref_rejected": -89.96072387695312, + "logps/rejected": -93.73110961914062, + "loss": 1.0368, + "margin_dpo/margin_mean": 1.328594446182251, + "margin_dpo/margin_std": 2.0358855724334717, + "step": 524 + }, + { + "epoch": 0.7936507936507936, + "fcm_dpo/beta": 0.6652133464813232, + "fcm_dpo/delta": 0.20262369513511658, + "fcm_dpo/margin": 1.2232202291488647, + "fcm_dpo/q_t": 0.3485579490661621, + "grad_norm": 169.54148864746094, + "learning_rate": 6.280488279429185e-08, + "logits/chosen": 0.0526951402425766, + "logits/rejected": 0.039441537111997604, + "logps/chosen": -78.24242401123047, + "logps/ref_chosen": -75.49578857421875, + "logps/ref_rejected": -84.04852294921875, + "logps/rejected": -88.01837921142578, + "loss": 0.9943, + "margin_dpo/margin_mean": 1.2232205867767334, + "margin_dpo/margin_std": 1.763154149055481, + "step": 525 + }, + { + "epoch": 0.7951625094482238, + "fcm_dpo/beta": 0.6876204609870911, + "fcm_dpo/delta": 0.2604686915874481, + "fcm_dpo/margin": 1.1028797626495361, + "fcm_dpo/q_t": 0.36391156911849976, + "grad_norm": 168.36891174316406, + "learning_rate": 6.193111425735515e-08, + "logits/chosen": 0.1737247258424759, + "logits/rejected": 0.12701740860939026, + "logps/chosen": -64.25382995605469, + "logps/ref_chosen": -61.29241943359375, + "logps/ref_rejected": -82.47763061523438, + "logps/rejected": -86.54191589355469, + "loss": 1.0833, + "margin_dpo/margin_mean": 1.1028800010681152, + "margin_dpo/margin_std": 1.7894057035446167, + "step": 526 + }, + { + "epoch": 0.7966742252456538, + "fcm_dpo/beta": 0.7351027727127075, + "fcm_dpo/delta": 0.22121518850326538, + "fcm_dpo/margin": 1.081107497215271, + "fcm_dpo/q_t": 0.3715837299823761, + "grad_norm": 250.8863067626953, + "learning_rate": 6.106260641143546e-08, + "logits/chosen": 0.2451198697090149, + "logits/rejected": 0.1942567229270935, + "logps/chosen": -64.66523742675781, + "logps/ref_chosen": -61.472625732421875, + "logps/ref_rejected": -90.52831268310547, + "logps/rejected": -94.8020248413086, + "loss": 1.3058, + "margin_dpo/margin_mean": 1.0811076164245605, + "margin_dpo/margin_std": 2.4224696159362793, + "step": 527 + }, + { + "epoch": 0.7981859410430839, + "fcm_dpo/beta": 0.7391092777252197, + "fcm_dpo/delta": 0.08262480795383453, + "fcm_dpo/margin": 1.248426914215088, + "fcm_dpo/q_t": 0.33980193734169006, + "grad_norm": 163.9738311767578, + "learning_rate": 6.019938355056422e-08, + "logits/chosen": 0.050329744815826416, + "logits/rejected": -0.005134463310241699, + "logps/chosen": -61.76133728027344, + "logps/ref_chosen": -58.792015075683594, + "logps/ref_rejected": -71.82516479492188, + "logps/rejected": -76.04290771484375, + "loss": 1.1046, + "margin_dpo/margin_mean": 1.2484264373779297, + "margin_dpo/margin_std": 2.0079092979431152, + "step": 528 + }, + { + "epoch": 0.799697656840514, + "fcm_dpo/beta": 0.6621348857879639, + "fcm_dpo/delta": -0.6627082824707031, + "fcm_dpo/margin": 2.306537628173828, + "fcm_dpo/q_t": 0.25794824957847595, + "grad_norm": 117.09664154052734, + "learning_rate": 5.934146982094049e-08, + "logits/chosen": 0.10348678380250931, + "logits/rejected": 0.05463102087378502, + "logps/chosen": -57.58445739746094, + "logps/ref_chosen": -55.070960998535156, + "logps/ref_rejected": -75.44007873535156, + "logps/rejected": -80.26010131835938, + "loss": 0.7618, + "margin_dpo/margin_mean": 2.306537389755249, + "margin_dpo/margin_std": 2.351503372192383, + "step": 529 + }, + { + "epoch": 0.8012093726379441, + "fcm_dpo/beta": 0.6574649214744568, + "fcm_dpo/delta": -0.030184239149093628, + "fcm_dpo/margin": 1.5620912313461304, + "fcm_dpo/q_t": 0.32785388827323914, + "grad_norm": 138.7952880859375, + "learning_rate": 5.848888922025552e-08, + "logits/chosen": 0.1791817992925644, + "logits/rejected": 0.14665716886520386, + "logps/chosen": -59.61901092529297, + "logps/ref_chosen": -56.743812561035156, + "logps/ref_rejected": -76.6692123413086, + "logps/rejected": -81.10650634765625, + "loss": 0.9455, + "margin_dpo/margin_mean": 1.5620914697647095, + "margin_dpo/margin_std": 2.117258071899414, + "step": 530 + }, + { + "epoch": 0.8027210884353742, + "fcm_dpo/beta": 0.6852550506591797, + "fcm_dpo/delta": 0.22120189666748047, + "fcm_dpo/margin": 1.1534594297409058, + "fcm_dpo/q_t": 0.3655146360397339, + "grad_norm": 180.98231506347656, + "learning_rate": 5.7641665597021435e-08, + "logits/chosen": 0.14505237340927124, + "logits/rejected": 0.09899041056632996, + "logps/chosen": -54.173072814941406, + "logps/ref_chosen": -51.116455078125, + "logps/ref_rejected": -79.52884674072266, + "logps/rejected": -83.73892211914062, + "loss": 1.0873, + "margin_dpo/margin_mean": 1.1534587144851685, + "margin_dpo/margin_std": 1.9886727333068848, + "step": 531 + }, + { + "epoch": 0.8042328042328042, + "fcm_dpo/beta": 0.6626486778259277, + "fcm_dpo/delta": -0.1822899430990219, + "fcm_dpo/margin": 1.750182867050171, + "fcm_dpo/q_t": 0.31126174330711365, + "grad_norm": 145.5829315185547, + "learning_rate": 5.679982264990424e-08, + "logits/chosen": 0.09937071800231934, + "logits/rejected": 0.05832071602344513, + "logps/chosen": -61.42143249511719, + "logps/ref_chosen": -58.279945373535156, + "logps/ref_rejected": -78.05426788330078, + "logps/rejected": -82.94593811035156, + "loss": 0.9066, + "margin_dpo/margin_mean": 1.7501822710037231, + "margin_dpo/margin_std": 2.264770030975342, + "step": 532 + }, + { + "epoch": 0.8057445200302343, + "fcm_dpo/beta": 0.6552125811576843, + "fcm_dpo/delta": -0.08179665356874466, + "fcm_dpo/margin": 1.6374917030334473, + "fcm_dpo/q_t": 0.3215191662311554, + "grad_norm": 148.30023193359375, + "learning_rate": 5.596338392706076e-08, + "logits/chosen": 0.25206273794174194, + "logits/rejected": 0.20781204104423523, + "logps/chosen": -58.997161865234375, + "logps/ref_chosen": -56.41801071166992, + "logps/ref_rejected": -73.89324951171875, + "logps/rejected": -78.1098861694336, + "loss": 0.9886, + "margin_dpo/margin_mean": 1.6374918222427368, + "margin_dpo/margin_std": 2.354870319366455, + "step": 533 + }, + { + "epoch": 0.8072562358276644, + "fcm_dpo/beta": 0.649742603302002, + "fcm_dpo/delta": 0.05204106122255325, + "fcm_dpo/margin": 1.4657820463180542, + "fcm_dpo/q_t": 0.32745662331581116, + "grad_norm": 144.36602783203125, + "learning_rate": 5.513237282548033e-08, + "logits/chosen": 0.17291076481342316, + "logits/rejected": 0.13234050571918488, + "logps/chosen": -63.49784851074219, + "logps/ref_chosen": -60.748687744140625, + "logps/ref_rejected": -73.8623046875, + "logps/rejected": -78.07723999023438, + "loss": 0.9544, + "margin_dpo/margin_mean": 1.4657821655273438, + "margin_dpo/margin_std": 1.9620198011398315, + "step": 534 + }, + { + "epoch": 0.8087679516250945, + "fcm_dpo/beta": 0.6593036651611328, + "fcm_dpo/delta": -0.019535936415195465, + "fcm_dpo/margin": 1.5417048931121826, + "fcm_dpo/q_t": 0.3396652340888977, + "grad_norm": 148.61146545410156, + "learning_rate": 5.430681259032957e-08, + "logits/chosen": 0.09707458317279816, + "logits/rejected": 0.04526631161570549, + "logps/chosen": -64.64535522460938, + "logps/ref_chosen": -61.637413024902344, + "logps/ref_rejected": -80.93138885498047, + "logps/rejected": -85.48104095458984, + "loss": 1.0638, + "margin_dpo/margin_mean": 1.5417053699493408, + "margin_dpo/margin_std": 2.5180535316467285, + "step": 535 + }, + { + "epoch": 0.8102796674225246, + "fcm_dpo/beta": 0.6411547064781189, + "fcm_dpo/delta": -0.05495788902044296, + "fcm_dpo/margin": 1.6322299242019653, + "fcm_dpo/q_t": 0.3149953782558441, + "grad_norm": 129.11102294921875, + "learning_rate": 5.3486726314303175e-08, + "logits/chosen": 0.20541507005691528, + "logits/rejected": 0.14478695392608643, + "logps/chosen": -54.65433120727539, + "logps/ref_chosen": -51.88897705078125, + "logps/ref_rejected": -73.34864044189453, + "logps/rejected": -77.74623107910156, + "loss": 0.8709, + "margin_dpo/margin_mean": 1.6322304010391235, + "margin_dpo/margin_std": 1.9500904083251953, + "step": 536 + }, + { + "epoch": 0.8117913832199547, + "fcm_dpo/beta": 0.6352528929710388, + "fcm_dpo/delta": -0.06024022772908211, + "fcm_dpo/margin": 1.6563501358032227, + "fcm_dpo/q_t": 0.32194915413856506, + "grad_norm": 129.3059844970703, + "learning_rate": 5.267213693697695e-08, + "logits/chosen": 0.24240414798259735, + "logits/rejected": 0.1752084195613861, + "logps/chosen": -57.34803771972656, + "logps/ref_chosen": -54.248619079589844, + "logps/ref_rejected": -94.94343566894531, + "logps/rejected": -99.69920349121094, + "loss": 1.0076, + "margin_dpo/margin_mean": 1.656351089477539, + "margin_dpo/margin_std": 2.3621954917907715, + "step": 537 + }, + { + "epoch": 0.8133030990173847, + "fcm_dpo/beta": 0.6266754865646362, + "fcm_dpo/delta": -0.23747026920318604, + "fcm_dpo/margin": 1.9266445636749268, + "fcm_dpo/q_t": 0.28824812173843384, + "grad_norm": 151.20790100097656, + "learning_rate": 5.1863067244167144e-08, + "logits/chosen": 0.17140713334083557, + "logits/rejected": 0.14127670228481293, + "logps/chosen": -73.17599487304688, + "logps/ref_chosen": -70.09353637695312, + "logps/ref_rejected": -79.49833679199219, + "logps/rejected": -84.5074462890625, + "loss": 0.8148, + "margin_dpo/margin_mean": 1.926644206047058, + "margin_dpo/margin_std": 2.1346817016601562, + "step": 538 + }, + { + "epoch": 0.8148148148148148, + "fcm_dpo/beta": 0.6051099300384521, + "fcm_dpo/delta": -0.027577966451644897, + "fcm_dpo/margin": 1.6932862997055054, + "fcm_dpo/q_t": 0.3206092119216919, + "grad_norm": 135.66448974609375, + "learning_rate": 5.105953986729195e-08, + "logits/chosen": 0.13851284980773926, + "logits/rejected": 0.08982232213020325, + "logps/chosen": -64.91091918945312, + "logps/ref_chosen": -61.93169403076172, + "logps/ref_rejected": -84.08946228027344, + "logps/rejected": -88.76197052001953, + "loss": 0.8703, + "margin_dpo/margin_mean": 1.6932868957519531, + "margin_dpo/margin_std": 2.0537302494049072, + "step": 539 + }, + { + "epoch": 0.8163265306122449, + "fcm_dpo/beta": 0.5865040421485901, + "fcm_dpo/delta": -0.2764972448348999, + "fcm_dpo/margin": 2.119068145751953, + "fcm_dpo/q_t": 0.29492413997650146, + "grad_norm": 142.99563598632812, + "learning_rate": 5.026157728273966e-08, + "logits/chosen": 0.1776391565799713, + "logits/rejected": 0.11434172093868256, + "logps/chosen": -65.55619812011719, + "logps/ref_chosen": -62.704254150390625, + "logps/ref_rejected": -95.63597106933594, + "logps/rejected": -100.60699462890625, + "loss": 0.841, + "margin_dpo/margin_mean": 2.1190683841705322, + "margin_dpo/margin_std": 2.443237781524658, + "step": 540 + }, + { + "epoch": 0.817838246409675, + "fcm_dpo/beta": 0.5594385266304016, + "fcm_dpo/delta": -0.11497347056865692, + "fcm_dpo/margin": 1.9675724506378174, + "fcm_dpo/q_t": 0.3052162230014801, + "grad_norm": 126.23677062988281, + "learning_rate": 4.9469201811239035e-08, + "logits/chosen": 0.12904441356658936, + "logits/rejected": 0.12872368097305298, + "logps/chosen": -65.19651794433594, + "logps/ref_chosen": -62.48084259033203, + "logps/ref_rejected": -57.55541229248047, + "logps/rejected": -62.23865509033203, + "loss": 0.8973, + "margin_dpo/margin_mean": 1.967572569847107, + "margin_dpo/margin_std": 2.4288015365600586, + "step": 541 + }, + { + "epoch": 0.8193499622071051, + "fcm_dpo/beta": 0.5571799278259277, + "fcm_dpo/delta": -0.033925510942935944, + "fcm_dpo/margin": 1.8493754863739014, + "fcm_dpo/q_t": 0.3220483660697937, + "grad_norm": 113.93995666503906, + "learning_rate": 4.868243561723534e-08, + "logits/chosen": 0.2040795087814331, + "logits/rejected": 0.1579345464706421, + "logps/chosen": -52.065277099609375, + "logps/ref_chosen": -49.454891204833984, + "logps/ref_rejected": -65.33275604248047, + "logps/rejected": -69.79252624511719, + "loss": 0.9384, + "margin_dpo/margin_mean": 1.8493754863739014, + "margin_dpo/margin_std": 2.4480319023132324, + "step": 542 + }, + { + "epoch": 0.8208616780045351, + "fcm_dpo/beta": 0.5707334280014038, + "fcm_dpo/delta": 0.13935577869415283, + "fcm_dpo/margin": 1.527040958404541, + "fcm_dpo/q_t": 0.33425813913345337, + "grad_norm": 109.5389175415039, + "learning_rate": 4.790130070827028e-08, + "logits/chosen": 0.1735750138759613, + "logits/rejected": 0.10716632753610611, + "logps/chosen": -53.960105895996094, + "logps/ref_chosen": -51.100860595703125, + "logps/ref_rejected": -76.06130981445312, + "logps/rejected": -80.44760131835938, + "loss": 0.9579, + "margin_dpo/margin_mean": 1.5270410776138306, + "margin_dpo/margin_std": 2.048778533935547, + "step": 543 + }, + { + "epoch": 0.8223733938019653, + "fcm_dpo/beta": 0.5578969717025757, + "fcm_dpo/delta": -0.18755921721458435, + "fcm_dpo/margin": 2.0900115966796875, + "fcm_dpo/q_t": 0.3165471851825714, + "grad_norm": 130.18556213378906, + "learning_rate": 4.7125818934366454e-08, + "logits/chosen": 0.14418599009513855, + "logits/rejected": 0.09046932309865952, + "logps/chosen": -63.256656646728516, + "logps/ref_chosen": -60.2772331237793, + "logps/ref_rejected": -88.40553283691406, + "logps/rejected": -93.4749755859375, + "loss": 0.9465, + "margin_dpo/margin_mean": 2.0900115966796875, + "margin_dpo/margin_std": 2.8377792835235596, + "step": 544 + }, + { + "epoch": 0.8238851095993953, + "fcm_dpo/beta": 0.5586047172546387, + "fcm_dpo/delta": 0.11241482198238373, + "fcm_dpo/margin": 1.607243299484253, + "fcm_dpo/q_t": 0.3465924859046936, + "grad_norm": 141.80880737304688, + "learning_rate": 4.635601198741607e-08, + "logits/chosen": 0.1261473000049591, + "logits/rejected": 0.07930372655391693, + "logps/chosen": -64.5820541381836, + "logps/ref_chosen": -61.61524963378906, + "logps/ref_rejected": -78.71266174316406, + "logps/rejected": -83.28670501708984, + "loss": 1.0031, + "margin_dpo/margin_mean": 1.6072428226470947, + "margin_dpo/margin_std": 2.3972063064575195, + "step": 545 + }, + { + "epoch": 0.8253968253968254, + "fcm_dpo/beta": 0.5760623216629028, + "fcm_dpo/delta": 0.1294553279876709, + "fcm_dpo/margin": 1.5299469232559204, + "fcm_dpo/q_t": 0.3359874188899994, + "grad_norm": 148.13609313964844, + "learning_rate": 4.559190140057428e-08, + "logits/chosen": 0.17436569929122925, + "logits/rejected": 0.15731996297836304, + "logps/chosen": -62.12708282470703, + "logps/ref_chosen": -59.313262939453125, + "logps/ref_rejected": -64.73631286621094, + "logps/rejected": -69.080078125, + "loss": 1.0087, + "margin_dpo/margin_mean": 1.52994704246521, + "margin_dpo/margin_std": 2.212397813796997, + "step": 546 + }, + { + "epoch": 0.8269085411942555, + "fcm_dpo/beta": 0.5641611814498901, + "fcm_dpo/delta": -0.1594116985797882, + "fcm_dpo/margin": 2.0226144790649414, + "fcm_dpo/q_t": 0.2903903126716614, + "grad_norm": 107.82057189941406, + "learning_rate": 4.483350854765672e-08, + "logits/chosen": 0.1362178921699524, + "logits/rejected": 0.08240213245153427, + "logps/chosen": -57.526573181152344, + "logps/ref_chosen": -54.97674560546875, + "logps/ref_rejected": -75.35922241210938, + "logps/rejected": -79.9316635131836, + "loss": 0.8307, + "margin_dpo/margin_mean": 2.0226151943206787, + "margin_dpo/margin_std": 2.2596964836120605, + "step": 547 + }, + { + "epoch": 0.8284202569916855, + "fcm_dpo/beta": 0.5807280540466309, + "fcm_dpo/delta": 0.16467252373695374, + "fcm_dpo/margin": 1.4542980194091797, + "fcm_dpo/q_t": 0.3473031222820282, + "grad_norm": 137.8083038330078, + "learning_rate": 4.4080854642541826e-08, + "logits/chosen": 0.09831206500530243, + "logits/rejected": 0.04538644477725029, + "logps/chosen": -66.22586822509766, + "logps/ref_chosen": -63.21067428588867, + "logps/ref_rejected": -81.23347473144531, + "logps/rejected": -85.70297241210938, + "loss": 0.9561, + "margin_dpo/margin_mean": 1.4542980194091797, + "margin_dpo/margin_std": 1.9517710208892822, + "step": 548 + }, + { + "epoch": 0.8299319727891157, + "fcm_dpo/beta": 0.5972309708595276, + "fcm_dpo/delta": 0.16186922788619995, + "fcm_dpo/margin": 1.4225797653198242, + "fcm_dpo/q_t": 0.35338258743286133, + "grad_norm": 165.7979736328125, + "learning_rate": 4.333396073857723e-08, + "logits/chosen": 0.2513388395309448, + "logits/rejected": 0.2025221437215805, + "logps/chosen": -67.16740417480469, + "logps/ref_chosen": -64.27351379394531, + "logps/ref_rejected": -92.31663513183594, + "logps/rejected": -96.63310241699219, + "loss": 1.0582, + "margin_dpo/margin_mean": 1.4225800037384033, + "margin_dpo/margin_std": 2.26432466506958, + "step": 549 + }, + { + "epoch": 0.8314436885865457, + "fcm_dpo/beta": 0.6041165590286255, + "fcm_dpo/delta": 0.15158365666866302, + "fcm_dpo/margin": 1.4242266416549683, + "fcm_dpo/q_t": 0.3457239270210266, + "grad_norm": 118.26493072509766, + "learning_rate": 4.259284772799099e-08, + "logits/chosen": 0.17192748188972473, + "logits/rejected": 0.14534920454025269, + "logps/chosen": -59.13628387451172, + "logps/ref_chosen": -56.230438232421875, + "logps/ref_rejected": -62.59788513183594, + "logps/rejected": -66.92796325683594, + "loss": 1.0043, + "margin_dpo/margin_mean": 1.4242260456085205, + "margin_dpo/margin_std": 2.080674171447754, + "step": 550 + }, + { + "epoch": 0.8329554043839759, + "fcm_dpo/beta": 0.6244951486587524, + "fcm_dpo/delta": 0.035569630563259125, + "fcm_dpo/margin": 1.5473127365112305, + "fcm_dpo/q_t": 0.33668047189712524, + "grad_norm": 132.6877899169922, + "learning_rate": 4.1857536341307176e-08, + "logits/chosen": 0.17000985145568848, + "logits/rejected": 0.14413632452487946, + "logps/chosen": -70.88568878173828, + "logps/ref_chosen": -67.74720764160156, + "logps/ref_rejected": -87.04285430908203, + "logps/rejected": -91.72865295410156, + "loss": 0.9498, + "margin_dpo/margin_mean": 1.5473123788833618, + "margin_dpo/margin_std": 2.1508963108062744, + "step": 551 + }, + { + "epoch": 0.8344671201814059, + "fcm_dpo/beta": 0.6188427209854126, + "fcm_dpo/delta": -0.11111941188573837, + "fcm_dpo/margin": 1.7723690271377563, + "fcm_dpo/q_t": 0.2986357808113098, + "grad_norm": 139.71243286132812, + "learning_rate": 4.112804714676593e-08, + "logits/chosen": 0.14941178262233734, + "logits/rejected": 0.10768507421016693, + "logps/chosen": -65.74885559082031, + "logps/ref_chosen": -62.92625427246094, + "logps/ref_rejected": -82.98365783691406, + "logps/rejected": -87.57861328125, + "loss": 0.887, + "margin_dpo/margin_mean": 1.7723690271377563, + "margin_dpo/margin_std": 2.1054043769836426, + "step": 552 + }, + { + "epoch": 0.8359788359788359, + "fcm_dpo/beta": 0.5973865985870361, + "fcm_dpo/delta": -0.033724166452884674, + "fcm_dpo/margin": 1.7203757762908936, + "fcm_dpo/q_t": 0.3467303514480591, + "grad_norm": 141.0452880859375, + "learning_rate": 4.0404400549748144e-08, + "logits/chosen": 0.1345456838607788, + "logits/rejected": 0.05725400522351265, + "logps/chosen": -59.39005661010742, + "logps/ref_chosen": -56.038490295410156, + "logps/ref_rejected": -84.48454284667969, + "logps/rejected": -89.55648803710938, + "loss": 1.1217, + "margin_dpo/margin_mean": 1.720375657081604, + "margin_dpo/margin_std": 2.994457721710205, + "step": 553 + }, + { + "epoch": 0.8374905517762661, + "fcm_dpo/beta": 0.61543208360672, + "fcm_dpo/delta": 0.10628563910722733, + "fcm_dpo/margin": 1.4661433696746826, + "fcm_dpo/q_t": 0.3466363847255707, + "grad_norm": 153.4082489013672, + "learning_rate": 3.968661679220467e-08, + "logits/chosen": 0.14405734837055206, + "logits/rejected": 0.12058596312999725, + "logps/chosen": -67.53119659423828, + "logps/ref_chosen": -64.53059387207031, + "logps/ref_rejected": -71.2155990600586, + "logps/rejected": -75.6823501586914, + "loss": 1.032, + "margin_dpo/margin_mean": 1.4661433696746826, + "margin_dpo/margin_std": 2.351094961166382, + "step": 554 + }, + { + "epoch": 0.8390022675736961, + "fcm_dpo/beta": 0.6489007472991943, + "fcm_dpo/delta": 0.20813655853271484, + "fcm_dpo/margin": 1.2255511283874512, + "fcm_dpo/q_t": 0.3576173484325409, + "grad_norm": 182.52911376953125, + "learning_rate": 3.89747159520904e-08, + "logits/chosen": 0.15571804344654083, + "logits/rejected": 0.12670589983463287, + "logps/chosen": -70.10320281982422, + "logps/ref_chosen": -66.65191650390625, + "logps/ref_rejected": -68.6667251586914, + "logps/rejected": -73.34355163574219, + "loss": 1.1729, + "margin_dpo/margin_mean": 1.225550651550293, + "margin_dpo/margin_std": 2.238006591796875, + "step": 555 + }, + { + "epoch": 0.8405139833711263, + "fcm_dpo/beta": 0.6440955400466919, + "fcm_dpo/delta": 0.016805479303002357, + "fcm_dpo/margin": 1.5289926528930664, + "fcm_dpo/q_t": 0.34293437004089355, + "grad_norm": 150.5496063232422, + "learning_rate": 3.826871794280192e-08, + "logits/chosen": 0.17429228127002716, + "logits/rejected": 0.13041889667510986, + "logps/chosen": -56.17012405395508, + "logps/ref_chosen": -52.832366943359375, + "logps/ref_rejected": -64.49044036865234, + "logps/rejected": -69.35718536376953, + "loss": 1.0381, + "margin_dpo/margin_mean": 1.5289928913116455, + "margin_dpo/margin_std": 2.3977890014648438, + "step": 556 + }, + { + "epoch": 0.8420256991685563, + "fcm_dpo/beta": 0.5984865427017212, + "fcm_dpo/delta": -0.35813382267951965, + "fcm_dpo/margin": 2.1672024726867676, + "fcm_dpo/q_t": 0.2816886305809021, + "grad_norm": 126.752197265625, + "learning_rate": 3.756864251262143e-08, + "logits/chosen": 0.21231526136398315, + "logits/rejected": 0.1561964750289917, + "logps/chosen": -58.17803955078125, + "logps/ref_chosen": -55.03598403930664, + "logps/ref_rejected": -75.80644989013672, + "logps/rejected": -81.11570739746094, + "loss": 0.7731, + "margin_dpo/margin_mean": 2.1672027111053467, + "margin_dpo/margin_std": 2.3840556144714355, + "step": 557 + }, + { + "epoch": 0.8435374149659864, + "fcm_dpo/beta": 0.5904332399368286, + "fcm_dpo/delta": -0.06778506934642792, + "fcm_dpo/margin": 1.794862151145935, + "fcm_dpo/q_t": 0.32028087973594666, + "grad_norm": 146.46681213378906, + "learning_rate": 3.687450924416341e-08, + "logits/chosen": 0.22109848260879517, + "logits/rejected": 0.17496977746486664, + "logps/chosen": -66.06083679199219, + "logps/ref_chosen": -63.226348876953125, + "logps/ref_rejected": -91.46881866455078, + "logps/rejected": -96.09817504882812, + "loss": 0.9082, + "margin_dpo/margin_mean": 1.7948615550994873, + "margin_dpo/margin_std": 2.3338146209716797, + "step": 558 + }, + { + "epoch": 0.8450491307634165, + "fcm_dpo/beta": 0.5790784358978271, + "fcm_dpo/delta": -0.06187023967504501, + "fcm_dpo/margin": 1.8165602684020996, + "fcm_dpo/q_t": 0.33412617444992065, + "grad_norm": 138.7181854248047, + "learning_rate": 3.6186337553827743e-08, + "logits/chosen": 0.11689116060733795, + "logits/rejected": 0.06340146064758301, + "logps/chosen": -64.47853088378906, + "logps/ref_chosen": -61.521644592285156, + "logps/ref_rejected": -82.83859252929688, + "logps/rejected": -87.61204528808594, + "loss": 1.0151, + "margin_dpo/margin_mean": 1.8165605068206787, + "margin_dpo/margin_std": 2.6852447986602783, + "step": 559 + }, + { + "epoch": 0.8465608465608465, + "fcm_dpo/beta": 0.6011730432510376, + "fcm_dpo/delta": 0.08525849133729935, + "fcm_dpo/margin": 1.5243772268295288, + "fcm_dpo/q_t": 0.345813125371933, + "grad_norm": 146.03248596191406, + "learning_rate": 3.550414669125573e-08, + "logits/chosen": 0.16141119599342346, + "logits/rejected": 0.13059382140636444, + "logps/chosen": -63.782081604003906, + "logps/ref_chosen": -60.64122009277344, + "logps/ref_rejected": -78.75474548339844, + "logps/rejected": -83.41998291015625, + "loss": 1.0243, + "margin_dpo/margin_mean": 1.5243771076202393, + "margin_dpo/margin_std": 2.256977081298828, + "step": 560 + }, + { + "epoch": 0.8480725623582767, + "fcm_dpo/beta": 0.5956799983978271, + "fcm_dpo/delta": 0.0049747563898563385, + "fcm_dpo/margin": 1.6711280345916748, + "fcm_dpo/q_t": 0.3338923752307892, + "grad_norm": 122.3641128540039, + "learning_rate": 3.482795573879241e-08, + "logits/chosen": 0.15920129418373108, + "logits/rejected": 0.13256219029426575, + "logps/chosen": -65.13980102539062, + "logps/ref_chosen": -62.49859619140625, + "logps/ref_rejected": -78.72064208984375, + "logps/rejected": -83.03296661376953, + "loss": 0.954, + "margin_dpo/margin_mean": 1.671128273010254, + "margin_dpo/margin_std": 2.351602077484131, + "step": 561 + }, + { + "epoch": 0.8495842781557067, + "fcm_dpo/beta": 0.5753499269485474, + "fcm_dpo/delta": -0.1565413773059845, + "fcm_dpo/margin": 1.973330020904541, + "fcm_dpo/q_t": 0.3080880045890808, + "grad_norm": 128.77142333984375, + "learning_rate": 3.415778361095226e-08, + "logits/chosen": 0.20189374685287476, + "logits/rejected": 0.17044463753700256, + "logps/chosen": -77.8094482421875, + "logps/ref_chosen": -74.78173828125, + "logps/ref_rejected": -92.63499450683594, + "logps/rejected": -97.63603210449219, + "loss": 0.8933, + "margin_dpo/margin_mean": 1.9733293056488037, + "margin_dpo/margin_std": 2.4358203411102295, + "step": 562 + }, + { + "epoch": 0.8510959939531368, + "fcm_dpo/beta": 0.5790232419967651, + "fcm_dpo/delta": 0.03163836523890495, + "fcm_dpo/margin": 1.6777245998382568, + "fcm_dpo/q_t": 0.3400580883026123, + "grad_norm": 139.59011840820312, + "learning_rate": 3.349364905389032e-08, + "logits/chosen": 0.2000666856765747, + "logits/rejected": 0.15850435197353363, + "logps/chosen": -53.24721145629883, + "logps/ref_chosen": -50.19850158691406, + "logps/ref_rejected": -66.76687622070312, + "logps/rejected": -71.4933090209961, + "loss": 1.0943, + "margin_dpo/margin_mean": 1.677725076675415, + "margin_dpo/margin_std": 2.819387912750244, + "step": 563 + }, + { + "epoch": 0.8526077097505669, + "fcm_dpo/beta": 0.5817031264305115, + "fcm_dpo/delta": -0.0036928579211235046, + "fcm_dpo/margin": 1.7244809865951538, + "fcm_dpo/q_t": 0.3220234513282776, + "grad_norm": 143.81246948242188, + "learning_rate": 3.283557064487785e-08, + "logits/chosen": 0.1359567940235138, + "logits/rejected": 0.10976006835699081, + "logps/chosen": -58.42416000366211, + "logps/ref_chosen": -55.7408447265625, + "logps/ref_rejected": -74.82323455810547, + "logps/rejected": -79.23103332519531, + "loss": 0.9548, + "margin_dpo/margin_mean": 1.7244811058044434, + "margin_dpo/margin_std": 2.30587100982666, + "step": 564 + }, + { + "epoch": 0.854119425547997, + "fcm_dpo/beta": 0.5901836156845093, + "fcm_dpo/delta": 0.04009624570608139, + "fcm_dpo/margin": 1.627647876739502, + "fcm_dpo/q_t": 0.32860618829727173, + "grad_norm": 144.67605590820312, + "learning_rate": 3.218356679178252e-08, + "logits/chosen": 0.16707636415958405, + "logits/rejected": 0.12742547690868378, + "logps/chosen": -61.788658142089844, + "logps/ref_chosen": -58.33738327026367, + "logps/ref_rejected": -78.31776428222656, + "logps/rejected": -83.39668273925781, + "loss": 0.9857, + "margin_dpo/margin_mean": 1.627647876739502, + "margin_dpo/margin_std": 2.2914857864379883, + "step": 565 + }, + { + "epoch": 0.8556311413454271, + "fcm_dpo/beta": 0.6040189266204834, + "fcm_dpo/delta": 0.15839658677577972, + "fcm_dpo/margin": 1.4104197025299072, + "fcm_dpo/q_t": 0.3656797707080841, + "grad_norm": 174.7579345703125, + "learning_rate": 3.1537655732553764e-08, + "logits/chosen": 0.1992546021938324, + "logits/rejected": 0.1789197027683258, + "logps/chosen": -74.33102416992188, + "logps/ref_chosen": -71.22373962402344, + "logps/ref_rejected": -71.11601257324219, + "logps/rejected": -75.63371276855469, + "loss": 1.2269, + "margin_dpo/margin_mean": 1.4104200601577759, + "margin_dpo/margin_std": 2.8447115421295166, + "step": 566 + }, + { + "epoch": 0.8571428571428571, + "fcm_dpo/beta": 0.579740047454834, + "fcm_dpo/delta": -0.17305630445480347, + "fcm_dpo/margin": 1.978826880455017, + "fcm_dpo/q_t": 0.3130984902381897, + "grad_norm": 123.3370132446289, + "learning_rate": 3.089785553471233e-08, + "logits/chosen": 0.1904851794242859, + "logits/rejected": 0.11687320470809937, + "logps/chosen": -55.760459899902344, + "logps/ref_chosen": -52.669273376464844, + "logps/ref_rejected": -74.34785461425781, + "logps/rejected": -79.41786193847656, + "loss": 0.8807, + "margin_dpo/margin_mean": 1.9788269996643066, + "margin_dpo/margin_std": 2.6479549407958984, + "step": 567 + }, + { + "epoch": 0.8586545729402872, + "fcm_dpo/beta": 0.5765559673309326, + "fcm_dpo/delta": -0.18778733909130096, + "fcm_dpo/margin": 2.0179710388183594, + "fcm_dpo/q_t": 0.29743558168411255, + "grad_norm": 112.51110076904297, + "learning_rate": 3.026418409484513e-08, + "logits/chosen": 0.20747870206832886, + "logits/rejected": 0.14340844750404358, + "logps/chosen": -54.99408721923828, + "logps/ref_chosen": -52.178001403808594, + "logps/ref_rejected": -85.8277587890625, + "logps/rejected": -90.66182708740234, + "loss": 0.8043, + "margin_dpo/margin_mean": 2.017970561981201, + "margin_dpo/margin_std": 2.1505703926086426, + "step": 568 + }, + { + "epoch": 0.8601662887377173, + "fcm_dpo/beta": 0.5608316659927368, + "fcm_dpo/delta": 0.08226889371871948, + "fcm_dpo/margin": 1.6459429264068604, + "fcm_dpo/q_t": 0.32129180431365967, + "grad_norm": 118.24040222167969, + "learning_rate": 2.963665913810451e-08, + "logits/chosen": 0.10791079699993134, + "logits/rejected": 0.08581716567277908, + "logps/chosen": -65.56758117675781, + "logps/ref_chosen": -62.649261474609375, + "logps/ref_rejected": -75.4298324584961, + "logps/rejected": -79.99409484863281, + "loss": 0.9399, + "margin_dpo/margin_mean": 1.6459429264068604, + "margin_dpo/margin_std": 2.062713623046875, + "step": 569 + }, + { + "epoch": 0.8616780045351474, + "fcm_dpo/beta": 0.552453875541687, + "fcm_dpo/delta": -0.3056313693523407, + "fcm_dpo/margin": 2.29406476020813, + "fcm_dpo/q_t": 0.271054744720459, + "grad_norm": 103.41246795654297, + "learning_rate": 2.9015298217712453e-08, + "logits/chosen": 0.12330185621976852, + "logits/rejected": 0.05149267241358757, + "logps/chosen": -52.63910675048828, + "logps/ref_chosen": -50.04179382324219, + "logps/ref_rejected": -78.27146911621094, + "logps/rejected": -83.162841796875, + "loss": 0.7286, + "margin_dpo/margin_mean": 2.294064521789551, + "margin_dpo/margin_std": 2.158341884613037, + "step": 570 + }, + { + "epoch": 0.8631897203325775, + "fcm_dpo/beta": 0.5561559200286865, + "fcm_dpo/delta": 0.2247203141450882, + "fcm_dpo/margin": 1.4249439239501953, + "fcm_dpo/q_t": 0.35770949721336365, + "grad_norm": 122.92935180664062, + "learning_rate": 2.840011871446962e-08, + "logits/chosen": 0.1378299742937088, + "logits/rejected": 0.10836352407932281, + "logps/chosen": -56.67723083496094, + "logps/ref_chosen": -53.65681457519531, + "logps/ref_rejected": -66.13298034667969, + "logps/rejected": -70.57833862304688, + "loss": 1.0635, + "margin_dpo/margin_mean": 1.4249444007873535, + "margin_dpo/margin_std": 2.3282651901245117, + "step": 571 + }, + { + "epoch": 0.8647014361300076, + "fcm_dpo/beta": 0.5793824195861816, + "fcm_dpo/delta": 0.19740980863571167, + "fcm_dpo/margin": 1.4121860265731812, + "fcm_dpo/q_t": 0.3518349528312683, + "grad_norm": 155.68394470214844, + "learning_rate": 2.7791137836269158e-08, + "logits/chosen": 0.18134906888008118, + "logits/rejected": 0.20107844471931458, + "logps/chosen": -77.97372436523438, + "logps/ref_chosen": -74.81792449951172, + "logps/ref_rejected": -65.88681030273438, + "logps/rejected": -70.45478820800781, + "loss": 1.0227, + "margin_dpo/margin_mean": 1.4121863842010498, + "margin_dpo/margin_std": 2.1890487670898438, + "step": 572 + }, + { + "epoch": 0.8662131519274376, + "fcm_dpo/beta": 0.5722247362136841, + "fcm_dpo/delta": -0.10332206636667252, + "fcm_dpo/margin": 1.9067519903182983, + "fcm_dpo/q_t": 0.3288191854953766, + "grad_norm": 141.57550048828125, + "learning_rate": 2.718837261761528e-08, + "logits/chosen": 0.16030460596084595, + "logits/rejected": 0.12534594535827637, + "logps/chosen": -71.95037078857422, + "logps/ref_chosen": -68.72564697265625, + "logps/ref_rejected": -88.16201782226562, + "logps/rejected": -93.29348754882812, + "loss": 1.0156, + "margin_dpo/margin_mean": 1.9067527055740356, + "margin_dpo/margin_std": 2.9433274269104004, + "step": 573 + }, + { + "epoch": 0.8677248677248677, + "fcm_dpo/beta": 0.5721937417984009, + "fcm_dpo/delta": -0.07496052980422974, + "fcm_dpo/margin": 1.8618470430374146, + "fcm_dpo/q_t": 0.3078283965587616, + "grad_norm": 114.2187271118164, + "learning_rate": 2.659183991914696e-08, + "logits/chosen": 0.2320360541343689, + "logits/rejected": 0.17432260513305664, + "logps/chosen": -59.44536209106445, + "logps/ref_chosen": -56.31340026855469, + "logps/ref_rejected": -83.91553497314453, + "logps/rejected": -88.90934753417969, + "loss": 0.8193, + "margin_dpo/margin_mean": 1.8618476390838623, + "margin_dpo/margin_std": 2.011713981628418, + "step": 574 + }, + { + "epoch": 0.8692365835222978, + "fcm_dpo/beta": 0.5716170072555542, + "fcm_dpo/delta": 0.18006719648838043, + "fcm_dpo/margin": 1.45901620388031, + "fcm_dpo/q_t": 0.3629787862300873, + "grad_norm": 139.16409301757812, + "learning_rate": 2.600155642716606e-08, + "logits/chosen": 0.24198350310325623, + "logits/rejected": 0.18358173966407776, + "logps/chosen": -67.59683990478516, + "logps/ref_chosen": -64.5841293334961, + "logps/ref_rejected": -93.47034454345703, + "logps/rejected": -97.94207000732422, + "loss": 1.1015, + "margin_dpo/margin_mean": 1.4590164422988892, + "margin_dpo/margin_std": 2.4919562339782715, + "step": 575 + }, + { + "epoch": 0.8707482993197279, + "fcm_dpo/beta": 0.5859131813049316, + "fcm_dpo/delta": 0.05836522579193115, + "fcm_dpo/margin": 1.6160252094268799, + "fcm_dpo/q_t": 0.34040236473083496, + "grad_norm": 173.32106018066406, + "learning_rate": 2.5417538653170754e-08, + "logits/chosen": 0.18478666245937347, + "logits/rejected": 0.1120123565196991, + "logps/chosen": -55.915428161621094, + "logps/ref_chosen": -53.28052520751953, + "logps/ref_rejected": -84.2000503540039, + "logps/rejected": -88.45097351074219, + "loss": 1.0198, + "margin_dpo/margin_mean": 1.6160247325897217, + "margin_dpo/margin_std": 2.3769047260284424, + "step": 576 + }, + { + "epoch": 0.872260015117158, + "fcm_dpo/beta": 0.6100517511367798, + "fcm_dpo/delta": 0.2396540343761444, + "fcm_dpo/margin": 1.2779208421707153, + "fcm_dpo/q_t": 0.36663320660591125, + "grad_norm": 141.51870727539062, + "learning_rate": 2.4839802933393607e-08, + "logits/chosen": 0.15433327853679657, + "logits/rejected": 0.13624653220176697, + "logps/chosen": -65.23735046386719, + "logps/ref_chosen": -62.32468795776367, + "logps/ref_rejected": -67.300537109375, + "logps/rejected": -71.49111938476562, + "loss": 1.1174, + "margin_dpo/margin_mean": 1.2779215574264526, + "margin_dpo/margin_std": 2.3165364265441895, + "step": 577 + }, + { + "epoch": 0.873771730914588, + "fcm_dpo/beta": 0.6398091316223145, + "fcm_dpo/delta": 0.2764911949634552, + "fcm_dpo/margin": 1.1640191078186035, + "fcm_dpo/q_t": 0.3820345997810364, + "grad_norm": 158.93179321289062, + "learning_rate": 2.4268365428344733e-08, + "logits/chosen": 0.20606505870819092, + "logits/rejected": 0.1820131242275238, + "logps/chosen": -59.829124450683594, + "logps/ref_chosen": -56.65557861328125, + "logps/ref_rejected": -68.21835327148438, + "logps/rejected": -72.55591583251953, + "loss": 1.1219, + "margin_dpo/margin_mean": 1.1640193462371826, + "margin_dpo/margin_std": 2.143388271331787, + "step": 578 + }, + { + "epoch": 0.8752834467120182, + "fcm_dpo/beta": 0.6147720217704773, + "fcm_dpo/delta": -0.3748418092727661, + "fcm_dpo/margin": 2.1486082077026367, + "fcm_dpo/q_t": 0.2804148197174072, + "grad_norm": 129.94529724121094, + "learning_rate": 2.3703242122359357e-08, + "logits/chosen": 0.1448242962360382, + "logits/rejected": 0.1141422688961029, + "logps/chosen": -59.766502380371094, + "logps/ref_chosen": -56.809661865234375, + "logps/ref_rejected": -68.09613037109375, + "logps/rejected": -73.20157623291016, + "loss": 0.779, + "margin_dpo/margin_mean": 2.1486077308654785, + "margin_dpo/margin_std": 2.415778636932373, + "step": 579 + }, + { + "epoch": 0.8767951625094482, + "fcm_dpo/beta": 0.6254961490631104, + "fcm_dpo/delta": 0.14091333746910095, + "fcm_dpo/margin": 1.389689326286316, + "fcm_dpo/q_t": 0.3568766117095947, + "grad_norm": 135.1801300048828, + "learning_rate": 2.3144448823151392e-08, + "logits/chosen": 0.1448507308959961, + "logits/rejected": 0.10793846845626831, + "logps/chosen": -60.63453674316406, + "logps/ref_chosen": -57.70011520385742, + "logps/ref_rejected": -77.90664672851562, + "logps/rejected": -82.23075866699219, + "loss": 1.1175, + "margin_dpo/margin_mean": 1.3896892070770264, + "margin_dpo/margin_std": 2.38545823097229, + "step": 580 + }, + { + "epoch": 0.8783068783068783, + "fcm_dpo/beta": 0.6277990341186523, + "fcm_dpo/delta": 0.06906095892190933, + "fcm_dpo/margin": 1.492587924003601, + "fcm_dpo/q_t": 0.3425326943397522, + "grad_norm": 166.3179473876953, + "learning_rate": 2.259200116137039e-08, + "logits/chosen": 0.22638601064682007, + "logits/rejected": 0.17741230130195618, + "logps/chosen": -62.56743240356445, + "logps/ref_chosen": -59.332359313964844, + "logps/ref_rejected": -83.64482116699219, + "logps/rejected": -88.37248229980469, + "loss": 1.0185, + "margin_dpo/margin_mean": 1.4925878047943115, + "margin_dpo/margin_std": 2.3351616859436035, + "step": 581 + }, + { + "epoch": 0.8798185941043084, + "fcm_dpo/beta": 0.6253724098205566, + "fcm_dpo/delta": -0.07393016666173935, + "fcm_dpo/margin": 1.7041115760803223, + "fcm_dpo/q_t": 0.31156644225120544, + "grad_norm": 143.69277954101562, + "learning_rate": 2.204591459016525e-08, + "logits/chosen": 0.1830708086490631, + "logits/rejected": 0.19862952828407288, + "logps/chosen": -66.9874267578125, + "logps/ref_chosen": -64.16285705566406, + "logps/ref_rejected": -58.632896423339844, + "logps/rejected": -63.161582946777344, + "loss": 0.8907, + "margin_dpo/margin_mean": 1.7041112184524536, + "margin_dpo/margin_std": 2.1104648113250732, + "step": 582 + }, + { + "epoch": 0.8813303099017384, + "fcm_dpo/beta": 0.6326082944869995, + "fcm_dpo/delta": 0.08823379874229431, + "fcm_dpo/margin": 1.4541677236557007, + "fcm_dpo/q_t": 0.35390377044677734, + "grad_norm": 158.56170654296875, + "learning_rate": 2.1506204384751064e-08, + "logits/chosen": 0.27797916531562805, + "logits/rejected": 0.20349468290805817, + "logps/chosen": -54.86329650878906, + "logps/ref_chosen": -51.87239456176758, + "logps/ref_rejected": -83.86331176757812, + "logps/rejected": -88.30838012695312, + "loss": 1.1301, + "margin_dpo/margin_mean": 1.4541676044464111, + "margin_dpo/margin_std": 2.6575491428375244, + "step": 583 + }, + { + "epoch": 0.8828420256991686, + "fcm_dpo/beta": 0.6200550198554993, + "fcm_dpo/delta": -0.1539473831653595, + "fcm_dpo/margin": 1.8320786952972412, + "fcm_dpo/q_t": 0.32225194573402405, + "grad_norm": 134.9540252685547, + "learning_rate": 2.09728856419826e-08, + "logits/chosen": 0.23410704731941223, + "logits/rejected": 0.15715327858924866, + "logps/chosen": -49.28221130371094, + "logps/ref_chosen": -46.571388244628906, + "logps/ref_rejected": -80.67969512939453, + "logps/rejected": -85.22259521484375, + "loss": 0.9929, + "margin_dpo/margin_mean": 1.8320791721343994, + "margin_dpo/margin_std": 2.6820805072784424, + "step": 584 + }, + { + "epoch": 0.8843537414965986, + "fcm_dpo/beta": 0.6162490844726562, + "fcm_dpo/delta": 0.03570966795086861, + "fcm_dpo/margin": 1.5696362257003784, + "fcm_dpo/q_t": 0.3259866535663605, + "grad_norm": 138.78614807128906, + "learning_rate": 2.044597327993153e-08, + "logits/chosen": 0.16352935135364532, + "logits/rejected": 0.13112275302410126, + "logps/chosen": -60.831146240234375, + "logps/ref_chosen": -58.124534606933594, + "logps/ref_rejected": -79.00538635253906, + "logps/rejected": -83.28163146972656, + "loss": 0.9773, + "margin_dpo/margin_mean": 1.5696361064910889, + "margin_dpo/margin_std": 2.2082860469818115, + "step": 585 + }, + { + "epoch": 0.8858654572940288, + "fcm_dpo/beta": 0.602626621723175, + "fcm_dpo/delta": -0.15734781324863434, + "fcm_dpo/margin": 1.8877205848693848, + "fcm_dpo/q_t": 0.30060410499572754, + "grad_norm": 121.3906021118164, + "learning_rate": 1.9925482037469187e-08, + "logits/chosen": 0.16395865380764008, + "logits/rejected": 0.12222093343734741, + "logps/chosen": -57.20876693725586, + "logps/ref_chosen": -54.10163879394531, + "logps/ref_rejected": -63.72113037109375, + "logps/rejected": -68.71598052978516, + "loss": 0.8797, + "margin_dpo/margin_mean": 1.8877204656600952, + "margin_dpo/margin_std": 2.3431496620178223, + "step": 586 + }, + { + "epoch": 0.8873771730914588, + "fcm_dpo/beta": 0.6007733941078186, + "fcm_dpo/delta": 0.015819646418094635, + "fcm_dpo/margin": 1.6403778791427612, + "fcm_dpo/q_t": 0.3280307352542877, + "grad_norm": 161.0563507080078, + "learning_rate": 1.9411426473854687e-08, + "logits/chosen": 0.20693828165531158, + "logits/rejected": 0.18662090599536896, + "logps/chosen": -66.15540313720703, + "logps/ref_chosen": -63.41719436645508, + "logps/ref_rejected": -63.47003936767578, + "logps/rejected": -67.84861755371094, + "loss": 1.0373, + "margin_dpo/margin_mean": 1.6403785943984985, + "margin_dpo/margin_std": 2.5440006256103516, + "step": 587 + }, + { + "epoch": 0.8888888888888888, + "fcm_dpo/beta": 0.5988498330116272, + "fcm_dpo/delta": -0.019005782902240753, + "fcm_dpo/margin": 1.6973689794540405, + "fcm_dpo/q_t": 0.3193732500076294, + "grad_norm": 139.44850158691406, + "learning_rate": 1.890382096832699e-08, + "logits/chosen": 0.2137565314769745, + "logits/rejected": 0.17403748631477356, + "logps/chosen": -65.03866577148438, + "logps/ref_chosen": -62.20103454589844, + "logps/ref_rejected": -82.10249328613281, + "logps/rejected": -86.63749694824219, + "loss": 0.9558, + "margin_dpo/margin_mean": 1.697368860244751, + "margin_dpo/margin_std": 2.3547918796539307, + "step": 588 + }, + { + "epoch": 0.890400604686319, + "fcm_dpo/beta": 0.5705356597900391, + "fcm_dpo/delta": -0.3349605202674866, + "fcm_dpo/margin": 2.2617807388305664, + "fcm_dpo/q_t": 0.2769893407821655, + "grad_norm": 125.83250427246094, + "learning_rate": 1.840267971970344e-08, + "logits/chosen": 0.16948330402374268, + "logits/rejected": 0.14072127640247345, + "logps/chosen": -59.358924865722656, + "logps/ref_chosen": -56.71361541748047, + "logps/ref_rejected": -76.7366943359375, + "logps/rejected": -81.64378356933594, + "loss": 0.7735, + "margin_dpo/margin_mean": 2.2617812156677246, + "margin_dpo/margin_std": 2.493180274963379, + "step": 589 + }, + { + "epoch": 0.891912320483749, + "fcm_dpo/beta": 0.5450751781463623, + "fcm_dpo/delta": -0.2230319082736969, + "fcm_dpo/margin": 2.1952528953552246, + "fcm_dpo/q_t": 0.2884211540222168, + "grad_norm": 120.71878814697266, + "learning_rate": 1.7908016745981856e-08, + "logits/chosen": 0.12301607429981232, + "logits/rejected": 0.09767352789640427, + "logps/chosen": -69.55258178710938, + "logps/ref_chosen": -66.5138168334961, + "logps/ref_rejected": -85.70820617675781, + "logps/rejected": -90.94223022460938, + "loss": 0.8243, + "margin_dpo/margin_mean": 2.1952528953552246, + "margin_dpo/margin_std": 2.4201831817626953, + "step": 590 + }, + { + "epoch": 0.8934240362811792, + "fcm_dpo/beta": 0.5360534191131592, + "fcm_dpo/delta": 0.06881964951753616, + "fcm_dpo/margin": 1.7440357208251953, + "fcm_dpo/q_t": 0.3370535969734192, + "grad_norm": 125.32640838623047, + "learning_rate": 1.7419845883949098e-08, + "logits/chosen": 0.2466006577014923, + "logits/rejected": 0.1991560012102127, + "logps/chosen": -63.56512451171875, + "logps/ref_chosen": -60.697181701660156, + "logps/ref_rejected": -86.12278747558594, + "logps/rejected": -90.73477172851562, + "loss": 0.9852, + "margin_dpo/margin_mean": 1.7440353631973267, + "margin_dpo/margin_std": 2.4317829608917236, + "step": 591 + }, + { + "epoch": 0.8949357520786092, + "fcm_dpo/beta": 0.5456318855285645, + "fcm_dpo/delta": 0.048897288739681244, + "fcm_dpo/margin": 1.750661849975586, + "fcm_dpo/q_t": 0.3427332043647766, + "grad_norm": 132.4198760986328, + "learning_rate": 1.6938180788793556e-08, + "logits/chosen": 0.18844714760780334, + "logits/rejected": 0.10343804955482483, + "logps/chosen": -54.16868591308594, + "logps/ref_chosen": -51.237327575683594, + "logps/ref_rejected": -81.60242462158203, + "logps/rejected": -86.2844467163086, + "loss": 0.9699, + "margin_dpo/margin_mean": 1.750661849975586, + "margin_dpo/margin_std": 2.502837657928467, + "step": 592 + }, + { + "epoch": 0.8964474678760394, + "fcm_dpo/beta": 0.5585802793502808, + "fcm_dpo/delta": 0.09174495190382004, + "fcm_dpo/margin": 1.640941858291626, + "fcm_dpo/q_t": 0.3366505205631256, + "grad_norm": 122.52076721191406, + "learning_rate": 1.6463034933723336e-08, + "logits/chosen": 0.13056322932243347, + "logits/rejected": 0.061800211668014526, + "logps/chosen": -44.626976013183594, + "logps/ref_chosen": -42.08000183105469, + "logps/ref_rejected": -68.47499084472656, + "logps/rejected": -72.66290283203125, + "loss": 1.0262, + "margin_dpo/margin_mean": 1.640941858291626, + "margin_dpo/margin_std": 2.4754600524902344, + "step": 593 + }, + { + "epoch": 0.8979591836734694, + "fcm_dpo/beta": 0.5791330337524414, + "fcm_dpo/delta": 0.23276039958000183, + "fcm_dpo/margin": 1.3571163415908813, + "fcm_dpo/q_t": 0.36690980195999146, + "grad_norm": 131.3136749267578, + "learning_rate": 1.5994421609589385e-08, + "logits/chosen": 0.12303361296653748, + "logits/rejected": 0.10529161989688873, + "logps/chosen": -66.68218994140625, + "logps/ref_chosen": -63.658668518066406, + "logps/ref_rejected": -70.35597229003906, + "logps/rejected": -74.73660278320312, + "loss": 1.0883, + "margin_dpo/margin_mean": 1.3571163415908813, + "margin_dpo/margin_std": 2.378408193588257, + "step": 594 + }, + { + "epoch": 0.8994708994708994, + "fcm_dpo/beta": 0.5620189905166626, + "fcm_dpo/delta": -0.30020615458488464, + "fcm_dpo/margin": 2.245738983154297, + "fcm_dpo/q_t": 0.2832089960575104, + "grad_norm": 123.73275756835938, + "learning_rate": 1.553235392451377e-08, + "logits/chosen": 0.2004849910736084, + "logits/rejected": 0.12713924050331116, + "logps/chosen": -59.19725799560547, + "logps/ref_chosen": -56.21875762939453, + "logps/ref_rejected": -83.95773315429688, + "logps/rejected": -89.18197631835938, + "loss": 0.8647, + "margin_dpo/margin_mean": 2.245739459991455, + "margin_dpo/margin_std": 2.714977741241455, + "step": 595 + }, + { + "epoch": 0.9009826152683296, + "fcm_dpo/beta": 0.5812000036239624, + "fcm_dpo/delta": 0.33480995893478394, + "fcm_dpo/margin": 1.184531569480896, + "fcm_dpo/q_t": 0.3911210000514984, + "grad_norm": 142.58895874023438, + "learning_rate": 1.507684480352292e-08, + "logits/chosen": 0.11143651604652405, + "logits/rejected": 0.11058718711137772, + "logps/chosen": -71.51290130615234, + "logps/ref_chosen": -68.48088073730469, + "logps/ref_rejected": -61.732967376708984, + "logps/rejected": -65.94952392578125, + "loss": 1.1753, + "margin_dpo/margin_mean": 1.1845312118530273, + "margin_dpo/margin_std": 2.4391579627990723, + "step": 596 + }, + { + "epoch": 0.9024943310657596, + "fcm_dpo/beta": 0.5947533249855042, + "fcm_dpo/delta": 0.0645606741309166, + "fcm_dpo/margin": 1.5828521251678467, + "fcm_dpo/q_t": 0.3390669822692871, + "grad_norm": 118.60302734375, + "learning_rate": 1.4627906988186111e-08, + "logits/chosen": 0.1252235770225525, + "logits/rejected": 0.09921297430992126, + "logps/chosen": -51.60657501220703, + "logps/ref_chosen": -48.85750961303711, + "logps/ref_rejected": -55.068084716796875, + "logps/rejected": -59.400001525878906, + "loss": 0.963, + "margin_dpo/margin_mean": 1.5828520059585571, + "margin_dpo/margin_std": 2.2610087394714355, + "step": 597 + }, + { + "epoch": 0.9040060468631897, + "fcm_dpo/beta": 0.6207314729690552, + "fcm_dpo/delta": 0.23146888613700867, + "fcm_dpo/margin": 1.2671819925308228, + "fcm_dpo/q_t": 0.38011521100997925, + "grad_norm": 151.75205993652344, + "learning_rate": 1.4185553036259095e-08, + "logits/chosen": 0.1728542447090149, + "logits/rejected": 0.11698366701602936, + "logps/chosen": -62.22666931152344, + "logps/ref_chosen": -58.88715362548828, + "logps/ref_rejected": -81.43145751953125, + "logps/rejected": -86.03816223144531, + "loss": 1.1674, + "margin_dpo/margin_mean": 1.2671819925308228, + "margin_dpo/margin_std": 2.4872426986694336, + "step": 598 + }, + { + "epoch": 0.9055177626606198, + "fcm_dpo/beta": 0.6450425386428833, + "fcm_dpo/delta": 0.09236402064561844, + "fcm_dpo/margin": 1.41303288936615, + "fcm_dpo/q_t": 0.3642594814300537, + "grad_norm": 166.78338623046875, + "learning_rate": 1.3749795321332885e-08, + "logits/chosen": 0.280083566904068, + "logits/rejected": 0.24155710637569427, + "logps/chosen": -60.974185943603516, + "logps/ref_chosen": -57.60719299316406, + "logps/ref_rejected": -71.80469512939453, + "logps/rejected": -76.584716796875, + "loss": 1.1191, + "margin_dpo/margin_mean": 1.4130332469940186, + "margin_dpo/margin_std": 2.563547134399414, + "step": 599 + }, + { + "epoch": 0.9070294784580499, + "fcm_dpo/beta": 0.6648072004318237, + "fcm_dpo/delta": 0.13435859978199005, + "fcm_dpo/margin": 1.3058445453643799, + "fcm_dpo/q_t": 0.35756915807724, + "grad_norm": 156.00218200683594, + "learning_rate": 1.3320646032487393e-08, + "logits/chosen": 0.2286624014377594, + "logits/rejected": 0.19494396448135376, + "logps/chosen": -61.69401550292969, + "logps/ref_chosen": -58.44231414794922, + "logps/ref_rejected": -83.64639282226562, + "logps/rejected": -88.20393371582031, + "loss": 1.0999, + "margin_dpo/margin_mean": 1.3058440685272217, + "margin_dpo/margin_std": 2.190995693206787, + "step": 600 + }, + { + "epoch": 0.9070294784580499, + "eval_fcm_dpo/beta": 0.6568994522094727, + "eval_logits/chosen": 0.19621142745018005, + "eval_logits/rejected": 0.15854774415493011, + "eval_logps/chosen": -77.82337951660156, + "eval_logps/ref_chosen": -74.85946655273438, + "eval_logps/ref_rejected": -79.54898834228516, + "eval_logps/rejected": -83.91117095947266, + "eval_loss": 0.5679579377174377, + "eval_margin_dpo/margin_mean": 1.3982763290405273, + "eval_margin_dpo/margin_std": 2.4811148643493652, + "eval_runtime": 37.9915, + "eval_samples_per_second": 60.619, + "eval_steps_per_second": 1.895, + "step": 600 + }, + { + "epoch": 0.90854119425548, + "fcm_dpo/beta": 0.6349242925643921, + "fcm_dpo/delta": -0.1545478105545044, + "fcm_dpo/margin": 1.7845826148986816, + "fcm_dpo/q_t": 0.3199031949043274, + "grad_norm": 141.2012176513672, + "learning_rate": 1.2898117173950868e-08, + "logits/chosen": 0.18726052343845367, + "logits/rejected": 0.13292908668518066, + "logps/chosen": -58.19509506225586, + "logps/ref_chosen": -55.59432601928711, + "logps/ref_rejected": -83.68630981445312, + "logps/rejected": -88.07167053222656, + "loss": 0.9784, + "margin_dpo/margin_mean": 1.7845829725265503, + "margin_dpo/margin_std": 2.5417721271514893, + "step": 601 + }, + { + "epoch": 0.91005291005291, + "fcm_dpo/beta": 0.6274067759513855, + "fcm_dpo/delta": -0.15826797485351562, + "fcm_dpo/margin": 1.8172000646591187, + "fcm_dpo/q_t": 0.3086685538291931, + "grad_norm": 139.8549346923828, + "learning_rate": 1.2482220564763667e-08, + "logits/chosen": 0.15303432941436768, + "logits/rejected": 0.11989377439022064, + "logps/chosen": -58.714149475097656, + "logps/ref_chosen": -56.349185943603516, + "logps/ref_rejected": -71.9959716796875, + "logps/rejected": -76.17813110351562, + "loss": 0.887, + "margin_dpo/margin_mean": 1.81719970703125, + "margin_dpo/margin_std": 2.2619805335998535, + "step": 602 + }, + { + "epoch": 0.9115646258503401, + "fcm_dpo/beta": 0.6145593523979187, + "fcm_dpo/delta": 0.003954831510782242, + "fcm_dpo/margin": 1.620744228363037, + "fcm_dpo/q_t": 0.32022157311439514, + "grad_norm": 121.14891052246094, + "learning_rate": 1.2072967838448051e-08, + "logits/chosen": 0.13416381180286407, + "logits/rejected": 0.08777206391096115, + "logps/chosen": -55.47536849975586, + "logps/ref_chosen": -53.16838836669922, + "logps/ref_rejected": -73.8604736328125, + "logps/rejected": -77.78820037841797, + "loss": 0.9039, + "margin_dpo/margin_mean": 1.620744228363037, + "margin_dpo/margin_std": 2.0299124717712402, + "step": 603 + }, + { + "epoch": 0.9130763416477702, + "fcm_dpo/beta": 0.6099786162376404, + "fcm_dpo/delta": -0.06591600924730301, + "fcm_dpo/margin": 1.7355551719665527, + "fcm_dpo/q_t": 0.3229469954967499, + "grad_norm": 138.081298828125, + "learning_rate": 1.1670370442682459e-08, + "logits/chosen": 0.1237378865480423, + "logits/rejected": 0.11915041506290436, + "logps/chosen": -75.28321838378906, + "logps/ref_chosen": -72.64942169189453, + "logps/ref_rejected": -69.8792724609375, + "logps/rejected": -74.24861907958984, + "loss": 0.9893, + "margin_dpo/margin_mean": 1.7355563640594482, + "margin_dpo/margin_std": 2.5649847984313965, + "step": 604 + }, + { + "epoch": 0.9145880574452003, + "fcm_dpo/beta": 0.6022388339042664, + "fcm_dpo/delta": -0.05076969414949417, + "fcm_dpo/margin": 1.7350056171417236, + "fcm_dpo/q_t": 0.3227270543575287, + "grad_norm": 143.35333251953125, + "learning_rate": 1.1274439638981532e-08, + "logits/chosen": 0.22395864129066467, + "logits/rejected": 0.17329740524291992, + "logps/chosen": -64.78287506103516, + "logps/ref_chosen": -61.61284637451172, + "logps/ref_rejected": -79.34398651123047, + "logps/rejected": -84.24903106689453, + "loss": 0.9715, + "margin_dpo/margin_mean": 1.7350056171417236, + "margin_dpo/margin_std": 2.434452772140503, + "step": 605 + }, + { + "epoch": 0.9160997732426304, + "fcm_dpo/beta": 0.6224014759063721, + "fcm_dpo/delta": 0.12130826711654663, + "fcm_dpo/margin": 1.4212950468063354, + "fcm_dpo/q_t": 0.35074251890182495, + "grad_norm": 151.14439392089844, + "learning_rate": 1.0885186502381016e-08, + "logits/chosen": 0.19038084149360657, + "logits/rejected": 0.14078065752983093, + "logps/chosen": -57.25608825683594, + "logps/ref_chosen": -54.46424102783203, + "logps/ref_rejected": -79.62708282470703, + "logps/rejected": -83.84022521972656, + "loss": 1.0273, + "margin_dpo/margin_mean": 1.421295404434204, + "margin_dpo/margin_std": 2.172295331954956, + "step": 606 + }, + { + "epoch": 0.9176114890400605, + "fcm_dpo/beta": 0.5959450006484985, + "fcm_dpo/delta": -0.13134704530239105, + "fcm_dpo/margin": 1.8611319065093994, + "fcm_dpo/q_t": 0.31175029277801514, + "grad_norm": 141.19546508789062, + "learning_rate": 1.0502621921127774e-08, + "logits/chosen": 0.14506211876869202, + "logits/rejected": 0.1156059056520462, + "logps/chosen": -65.67162322998047, + "logps/ref_chosen": -62.86086654663086, + "logps/ref_rejected": -72.5501937866211, + "logps/rejected": -77.22207641601562, + "loss": 0.9338, + "margin_dpo/margin_mean": 1.8611321449279785, + "margin_dpo/margin_std": 2.463554859161377, + "step": 607 + }, + { + "epoch": 0.9191232048374905, + "fcm_dpo/beta": 0.6229555606842041, + "fcm_dpo/delta": 0.24546337127685547, + "fcm_dpo/margin": 1.2424054145812988, + "fcm_dpo/q_t": 0.36555221676826477, + "grad_norm": 181.87330627441406, + "learning_rate": 1.0126756596375685e-08, + "logits/chosen": 0.16855892539024353, + "logits/rejected": 0.12452598661184311, + "logps/chosen": -66.26856994628906, + "logps/ref_chosen": -63.18071746826172, + "logps/ref_rejected": -99.15888214111328, + "logps/rejected": -103.48914337158203, + "loss": 1.0982, + "margin_dpo/margin_mean": 1.2424057722091675, + "margin_dpo/margin_std": 2.158616542816162, + "step": 608 + }, + { + "epoch": 0.9206349206349206, + "fcm_dpo/beta": 0.600549578666687, + "fcm_dpo/delta": -0.21868403255939484, + "fcm_dpo/margin": 1.9679713249206543, + "fcm_dpo/q_t": 0.2971384525299072, + "grad_norm": 103.79540252685547, + "learning_rate": 9.757601041885694e-09, + "logits/chosen": 0.21697968244552612, + "logits/rejected": 0.18763911724090576, + "logps/chosen": -51.205020904541016, + "logps/ref_chosen": -48.62322235107422, + "logps/ref_rejected": -68.28271484375, + "logps/rejected": -72.83248901367188, + "loss": 0.8665, + "margin_dpo/margin_mean": 1.9679714441299438, + "margin_dpo/margin_std": 2.3338661193847656, + "step": 609 + }, + { + "epoch": 0.9221466364323507, + "fcm_dpo/beta": 0.6042653322219849, + "fcm_dpo/delta": -0.03405376523733139, + "fcm_dpo/margin": 1.7052839994430542, + "fcm_dpo/q_t": 0.3533821403980255, + "grad_norm": 194.40943908691406, + "learning_rate": 9.395165583732379e-09, + "logits/chosen": 0.1579647958278656, + "logits/rejected": 0.1604270040988922, + "logps/chosen": -75.84585571289062, + "logps/ref_chosen": -72.66513061523438, + "logps/ref_rejected": -87.15310668945312, + "logps/rejected": -92.03912353515625, + "loss": 1.1081, + "margin_dpo/margin_mean": 1.7052838802337646, + "margin_dpo/margin_std": 3.0106663703918457, + "step": 610 + }, + { + "epoch": 0.9236583522297808, + "fcm_dpo/beta": 0.6121885776519775, + "fcm_dpo/delta": 0.25364020466804504, + "fcm_dpo/margin": 1.2458240985870361, + "fcm_dpo/q_t": 0.36857932806015015, + "grad_norm": 133.9265899658203, + "learning_rate": 9.03946036001449e-09, + "logits/chosen": 0.2268466055393219, + "logits/rejected": 0.1877264380455017, + "logps/chosen": -51.286346435546875, + "logps/ref_chosen": -48.30857849121094, + "logps/ref_rejected": -70.6141128540039, + "logps/rejected": -74.83771514892578, + "loss": 1.0992, + "margin_dpo/margin_mean": 1.2458235025405884, + "margin_dpo/margin_std": 2.115691661834717, + "step": 611 + }, + { + "epoch": 0.9251700680272109, + "fcm_dpo/beta": 0.6135225296020508, + "fcm_dpo/delta": -0.24519576132297516, + "fcm_dpo/margin": 1.9819927215576172, + "fcm_dpo/q_t": 0.30337560176849365, + "grad_norm": 141.30982971191406, + "learning_rate": 8.690495320571839e-09, + "logits/chosen": 0.10801693797111511, + "logits/rejected": 0.059915874153375626, + "logps/chosen": -64.04043579101562, + "logps/ref_chosen": -61.23155975341797, + "logps/ref_rejected": -94.37979888916016, + "logps/rejected": -99.17066955566406, + "loss": 0.9409, + "margin_dpo/margin_mean": 1.9819923639297485, + "margin_dpo/margin_std": 2.702681064605713, + "step": 612 + }, + { + "epoch": 0.926681783824641, + "fcm_dpo/beta": 0.5761626958847046, + "fcm_dpo/delta": -0.3495950698852539, + "fcm_dpo/margin": 2.265277862548828, + "fcm_dpo/q_t": 0.28574904799461365, + "grad_norm": 127.05760955810547, + "learning_rate": 8.348280226706722e-09, + "logits/chosen": 0.11358965933322906, + "logits/rejected": 0.09932907670736313, + "logps/chosen": -56.594276428222656, + "logps/ref_chosen": -53.98310852050781, + "logps/ref_rejected": -58.32208251953125, + "logps/rejected": -63.19852828979492, + "loss": 0.8489, + "margin_dpo/margin_mean": 2.2652783393859863, + "margin_dpo/margin_std": 2.772947072982788, + "step": 613 + }, + { + "epoch": 0.9281934996220711, + "fcm_dpo/beta": 0.569689929485321, + "fcm_dpo/delta": 0.031611911952495575, + "fcm_dpo/margin": 1.7016217708587646, + "fcm_dpo/q_t": 0.3136371970176697, + "grad_norm": 121.61019134521484, + "learning_rate": 8.012824650910937e-09, + "logits/chosen": 0.2070479393005371, + "logits/rejected": 0.1917956918478012, + "logps/chosen": -63.198665618896484, + "logps/ref_chosen": -60.24303436279297, + "logps/ref_rejected": -72.26258850097656, + "logps/rejected": -76.91984558105469, + "loss": 0.8664, + "margin_dpo/margin_mean": 1.701621174812317, + "margin_dpo/margin_std": 1.9565538167953491, + "step": 614 + }, + { + "epoch": 0.9297052154195011, + "fcm_dpo/beta": 0.5607982873916626, + "fcm_dpo/delta": -0.03319869935512543, + "fcm_dpo/margin": 1.835862398147583, + "fcm_dpo/q_t": 0.329486608505249, + "grad_norm": 150.09515380859375, + "learning_rate": 7.684137976598088e-09, + "logits/chosen": 0.1885606348514557, + "logits/rejected": 0.15545931458473206, + "logps/chosen": -75.15232849121094, + "logps/ref_chosen": -72.09467315673828, + "logps/ref_rejected": -104.02980041503906, + "logps/rejected": -108.92332458496094, + "loss": 0.9693, + "margin_dpo/margin_mean": 1.8358616828918457, + "margin_dpo/margin_std": 2.62037992477417, + "step": 615 + }, + { + "epoch": 0.9312169312169312, + "fcm_dpo/beta": 0.5787394046783447, + "fcm_dpo/delta": 0.22798365354537964, + "fcm_dpo/margin": 1.3659520149230957, + "fcm_dpo/q_t": 0.36905261874198914, + "grad_norm": 138.57786560058594, + "learning_rate": 7.36222939784098e-09, + "logits/chosen": 0.17158043384552002, + "logits/rejected": 0.1132676973938942, + "logps/chosen": -61.71527862548828, + "logps/ref_chosen": -58.530723571777344, + "logps/ref_rejected": -75.48025512695312, + "logps/rejected": -80.03076171875, + "loss": 1.0819, + "margin_dpo/margin_mean": 1.365952491760254, + "margin_dpo/margin_std": 2.3640403747558594, + "step": 616 + }, + { + "epoch": 0.9327286470143613, + "fcm_dpo/beta": 0.5880202651023865, + "fcm_dpo/delta": 0.12598100304603577, + "fcm_dpo/margin": 1.4980671405792236, + "fcm_dpo/q_t": 0.35036730766296387, + "grad_norm": 136.1953887939453, + "learning_rate": 7.047107919114586e-09, + "logits/chosen": 0.1694568246603012, + "logits/rejected": 0.1344485729932785, + "logps/chosen": -60.68329620361328, + "logps/ref_chosen": -57.608673095703125, + "logps/ref_rejected": -81.22109985351562, + "logps/rejected": -85.79379272460938, + "loss": 1.0066, + "margin_dpo/margin_mean": 1.4980677366256714, + "margin_dpo/margin_std": 2.2009902000427246, + "step": 617 + }, + { + "epoch": 0.9342403628117913, + "fcm_dpo/beta": 0.5951837301254272, + "fcm_dpo/delta": 0.013019606471061707, + "fcm_dpo/margin": 1.654120922088623, + "fcm_dpo/q_t": 0.326251745223999, + "grad_norm": 141.9059295654297, + "learning_rate": 6.738782355044048e-09, + "logits/chosen": 0.1482279747724533, + "logits/rejected": 0.07427319884300232, + "logps/chosen": -59.329376220703125, + "logps/ref_chosen": -56.69594192504883, + "logps/ref_rejected": -85.92362976074219, + "logps/rejected": -90.21118927001953, + "loss": 0.9343, + "margin_dpo/margin_mean": 1.6541210412979126, + "margin_dpo/margin_std": 2.201620578765869, + "step": 618 + }, + { + "epoch": 0.9357520786092215, + "fcm_dpo/beta": 0.6088467836380005, + "fcm_dpo/delta": 0.07530087977647781, + "fcm_dpo/margin": 1.53031587600708, + "fcm_dpo/q_t": 0.3448328375816345, + "grad_norm": 150.22421264648438, + "learning_rate": 6.437261330158206e-09, + "logits/chosen": 0.22948047518730164, + "logits/rejected": 0.18049129843711853, + "logps/chosen": -56.79576873779297, + "logps/ref_chosen": -54.05841827392578, + "logps/ref_rejected": -83.55493927001953, + "logps/rejected": -87.82260131835938, + "loss": 1.0339, + "margin_dpo/margin_mean": 1.53031587600708, + "margin_dpo/margin_std": 2.370837450027466, + "step": 619 + }, + { + "epoch": 0.9372637944066515, + "fcm_dpo/beta": 0.6322528123855591, + "fcm_dpo/delta": 0.09385835379362106, + "fcm_dpo/margin": 1.4362270832061768, + "fcm_dpo/q_t": 0.3465351164340973, + "grad_norm": 173.79412841796875, + "learning_rate": 6.142553278648238e-09, + "logits/chosen": 0.17980945110321045, + "logits/rejected": 0.16840043663978577, + "logps/chosen": -66.160888671875, + "logps/ref_chosen": -63.36971664428711, + "logps/ref_rejected": -65.68269348144531, + "logps/rejected": -69.91009521484375, + "loss": 1.0082, + "margin_dpo/margin_mean": 1.4362270832061768, + "margin_dpo/margin_std": 2.130248546600342, + "step": 620 + }, + { + "epoch": 0.9387755102040817, + "fcm_dpo/beta": 0.6423999071121216, + "fcm_dpo/delta": 0.0907469242811203, + "fcm_dpo/margin": 1.4205418825149536, + "fcm_dpo/q_t": 0.35109221935272217, + "grad_norm": 151.4163055419922, + "learning_rate": 5.854666444131934e-09, + "logits/chosen": 0.19905002415180206, + "logits/rejected": 0.12930849194526672, + "logps/chosen": -55.218666076660156, + "logps/ref_chosen": -52.321224212646484, + "logps/ref_rejected": -88.09001159667969, + "logps/rejected": -92.40798950195312, + "loss": 1.0421, + "margin_dpo/margin_mean": 1.420541524887085, + "margin_dpo/margin_std": 2.2282516956329346, + "step": 621 + }, + { + "epoch": 0.9402872260015117, + "fcm_dpo/beta": 0.640907347202301, + "fcm_dpo/delta": 0.03911030665040016, + "fcm_dpo/margin": 1.5050252676010132, + "fcm_dpo/q_t": 0.3320958614349365, + "grad_norm": 137.0905303955078, + "learning_rate": 5.573608879422875e-09, + "logits/chosen": 0.13063614070415497, + "logits/rejected": 0.1020088791847229, + "logps/chosen": -62.50457763671875, + "logps/ref_chosen": -59.86545944213867, + "logps/ref_rejected": -81.86668395996094, + "logps/rejected": -86.01081848144531, + "loss": 0.9325, + "margin_dpo/margin_mean": 1.5050253868103027, + "margin_dpo/margin_std": 2.0284576416015625, + "step": 622 + }, + { + "epoch": 0.9417989417989417, + "fcm_dpo/beta": 0.6268334984779358, + "fcm_dpo/delta": -0.16526103019714355, + "fcm_dpo/margin": 1.8289923667907715, + "fcm_dpo/q_t": 0.30245280265808105, + "grad_norm": 126.97518920898438, + "learning_rate": 5.299388446305342e-09, + "logits/chosen": 0.16525626182556152, + "logits/rejected": 0.11132755130529404, + "logps/chosen": -70.49075317382812, + "logps/ref_chosen": -67.36846160888672, + "logps/ref_rejected": -82.02733612060547, + "logps/rejected": -86.97863006591797, + "loss": 0.8618, + "margin_dpo/margin_mean": 1.828992247581482, + "margin_dpo/margin_std": 2.1596312522888184, + "step": 623 + }, + { + "epoch": 0.9433106575963719, + "fcm_dpo/beta": 0.6166424751281738, + "fcm_dpo/delta": -0.002019442617893219, + "fcm_dpo/margin": 1.6235092878341675, + "fcm_dpo/q_t": 0.33338773250579834, + "grad_norm": 134.533447265625, + "learning_rate": 5.03201281531429e-09, + "logits/chosen": 0.17911109328269958, + "logits/rejected": 0.10127080231904984, + "logps/chosen": -53.71633529663086, + "logps/ref_chosen": -51.02655029296875, + "logps/ref_rejected": -76.49203491210938, + "logps/rejected": -80.80532836914062, + "loss": 0.9784, + "margin_dpo/margin_mean": 1.623509407043457, + "margin_dpo/margin_std": 2.3413143157958984, + "step": 624 + }, + { + "epoch": 0.9448223733938019, + "fcm_dpo/beta": 0.6369531154632568, + "fcm_dpo/delta": 0.11569374054670334, + "fcm_dpo/margin": 1.400294542312622, + "fcm_dpo/q_t": 0.3541494905948639, + "grad_norm": 148.3859100341797, + "learning_rate": 4.7714894655209174e-09, + "logits/chosen": 0.20979532599449158, + "logits/rejected": 0.1446528434753418, + "logps/chosen": -57.27146530151367, + "logps/ref_chosen": -54.20761489868164, + "logps/ref_rejected": -84.93669128417969, + "logps/rejected": -89.40084075927734, + "loss": 1.0516, + "margin_dpo/margin_mean": 1.4002941846847534, + "margin_dpo/margin_std": 2.27569317817688, + "step": 625 + }, + { + "epoch": 0.9463340891912321, + "fcm_dpo/beta": 0.6106563806533813, + "fcm_dpo/delta": -0.2672712206840515, + "fcm_dpo/margin": 2.0213675498962402, + "fcm_dpo/q_t": 0.31362709403038025, + "grad_norm": 127.49701690673828, + "learning_rate": 4.517825684323323e-09, + "logits/chosen": 0.24240854382514954, + "logits/rejected": 0.1507807821035385, + "logps/chosen": -47.64917755126953, + "logps/ref_chosen": -45.06201934814453, + "logps/ref_rejected": -89.66368103027344, + "logps/rejected": -94.27220153808594, + "loss": 0.9523, + "margin_dpo/margin_mean": 2.0213675498962402, + "margin_dpo/margin_std": 2.8801679611206055, + "step": 626 + }, + { + "epoch": 0.9478458049886621, + "fcm_dpo/beta": 0.5770605802536011, + "fcm_dpo/delta": -0.23875784873962402, + "fcm_dpo/margin": 2.0926780700683594, + "fcm_dpo/q_t": 0.3067499101161957, + "grad_norm": 135.4554901123047, + "learning_rate": 4.271028567242818e-09, + "logits/chosen": 0.12116050720214844, + "logits/rejected": 0.04095185548067093, + "logps/chosen": -61.70726776123047, + "logps/ref_chosen": -58.791053771972656, + "logps/ref_rejected": -94.90802001953125, + "logps/rejected": -99.91691589355469, + "loss": 0.888, + "margin_dpo/margin_mean": 2.0926778316497803, + "margin_dpo/margin_std": 2.661421298980713, + "step": 627 + }, + { + "epoch": 0.9493575207860923, + "fcm_dpo/beta": 0.5617523193359375, + "fcm_dpo/delta": -0.23493322730064392, + "fcm_dpo/margin": 2.1469919681549072, + "fcm_dpo/q_t": 0.29745668172836304, + "grad_norm": 120.84452056884766, + "learning_rate": 4.0311050177251895e-09, + "logits/chosen": 0.19451142847537994, + "logits/rejected": 0.16487011313438416, + "logps/chosen": -55.42369842529297, + "logps/ref_chosen": -52.80357360839844, + "logps/ref_rejected": -76.49468994140625, + "logps/rejected": -81.26181030273438, + "loss": 0.9097, + "margin_dpo/margin_mean": 2.146991729736328, + "margin_dpo/margin_std": 2.7369041442871094, + "step": 628 + }, + { + "epoch": 0.9508692365835223, + "fcm_dpo/beta": 0.5566290616989136, + "fcm_dpo/delta": 0.07311487942934036, + "fcm_dpo/margin": 1.6754742860794067, + "fcm_dpo/q_t": 0.3302459716796875, + "grad_norm": 120.0956802368164, + "learning_rate": 3.798061746947995e-09, + "logits/chosen": 0.17901673913002014, + "logits/rejected": 0.17002899944782257, + "logps/chosen": -73.4151382446289, + "logps/ref_chosen": -70.71749877929688, + "logps/ref_rejected": -78.96273803710938, + "logps/rejected": -83.33586120605469, + "loss": 0.8944, + "margin_dpo/margin_mean": 1.6754742860794067, + "margin_dpo/margin_std": 2.052621603012085, + "step": 629 + }, + { + "epoch": 0.9523809523809523, + "fcm_dpo/beta": 0.5548304915428162, + "fcm_dpo/delta": -0.06957443058490753, + "fcm_dpo/margin": 1.912638783454895, + "fcm_dpo/q_t": 0.32286617159843445, + "grad_norm": 108.01695251464844, + "learning_rate": 3.5719052736323806e-09, + "logits/chosen": 0.1528751254081726, + "logits/rejected": 0.11582262814044952, + "logps/chosen": -58.81450653076172, + "logps/ref_chosen": -56.201412200927734, + "logps/ref_rejected": -74.69807434082031, + "logps/rejected": -79.22380065917969, + "loss": 0.891, + "margin_dpo/margin_mean": 1.9126391410827637, + "margin_dpo/margin_std": 2.4235522747039795, + "step": 630 + }, + { + "epoch": 0.9538926681783825, + "fcm_dpo/beta": 0.5212994813919067, + "fcm_dpo/delta": -0.20469579100608826, + "fcm_dpo/margin": 2.244609832763672, + "fcm_dpo/q_t": 0.2998984754085541, + "grad_norm": 126.16265869140625, + "learning_rate": 3.352641923861144e-09, + "logits/chosen": 0.24326127767562866, + "logits/rejected": 0.16665881872177124, + "logps/chosen": -61.588680267333984, + "logps/ref_chosen": -58.82059860229492, + "logps/ref_rejected": -96.51437377929688, + "logps/rejected": -101.5270767211914, + "loss": 0.8734, + "margin_dpo/margin_mean": 2.244609832763672, + "margin_dpo/margin_std": 2.739107370376587, + "step": 631 + }, + { + "epoch": 0.9554043839758125, + "fcm_dpo/beta": 0.5139273405075073, + "fcm_dpo/delta": -0.14226512610912323, + "fcm_dpo/margin": 2.19085693359375, + "fcm_dpo/q_t": 0.28850066661834717, + "grad_norm": 94.82600402832031, + "learning_rate": 3.140277830901428e-09, + "logits/chosen": 0.21174263954162598, + "logits/rejected": 0.18780021369457245, + "logps/chosen": -61.52200698852539, + "logps/ref_chosen": -58.786048889160156, + "logps/ref_rejected": -67.21923828125, + "logps/rejected": -72.14605712890625, + "loss": 0.8071, + "margin_dpo/margin_mean": 2.19085693359375, + "margin_dpo/margin_std": 2.2715227603912354, + "step": 632 + }, + { + "epoch": 0.9569160997732427, + "fcm_dpo/beta": 0.5195468664169312, + "fcm_dpo/delta": 0.10754405707120895, + "fcm_dpo/margin": 1.7362931966781616, + "fcm_dpo/q_t": 0.3419226408004761, + "grad_norm": 99.7951431274414, + "learning_rate": 2.9348189350335007e-09, + "logits/chosen": 0.1292242705821991, + "logits/rejected": 0.08500467240810394, + "logps/chosen": -54.51697540283203, + "logps/ref_chosen": -52.13019561767578, + "logps/ref_rejected": -67.23016357421875, + "logps/rejected": -71.35323333740234, + "loss": 0.971, + "margin_dpo/margin_mean": 1.7362935543060303, + "margin_dpo/margin_std": 2.4587457180023193, + "step": 633 + }, + { + "epoch": 0.9584278155706727, + "fcm_dpo/beta": 0.5588383674621582, + "fcm_dpo/delta": 0.42835086584091187, + "fcm_dpo/margin": 1.0649152994155884, + "fcm_dpo/q_t": 0.4058130383491516, + "grad_norm": 154.30062866210938, + "learning_rate": 2.736270983384276e-09, + "logits/chosen": 0.22938773036003113, + "logits/rejected": 0.22998389601707458, + "logps/chosen": -63.95368194580078, + "logps/ref_chosen": -60.97979736328125, + "logps/ref_rejected": -58.50825119018555, + "logps/rejected": -62.54705047607422, + "loss": 1.2187, + "margin_dpo/margin_mean": 1.0649151802062988, + "margin_dpo/margin_std": 2.3998215198516846, + "step": 634 + }, + { + "epoch": 0.9599395313681028, + "fcm_dpo/beta": 0.5766314268112183, + "fcm_dpo/delta": 0.07918489724397659, + "fcm_dpo/margin": 1.6092689037322998, + "fcm_dpo/q_t": 0.3603755235671997, + "grad_norm": 152.15786743164062, + "learning_rate": 2.5446395297668287e-09, + "logits/chosen": 0.10523584485054016, + "logits/rejected": 0.06159904971718788, + "logps/chosen": -69.22645568847656, + "logps/ref_chosen": -65.9730224609375, + "logps/ref_rejected": -85.61317443847656, + "logps/rejected": -90.47587585449219, + "loss": 1.0967, + "margin_dpo/margin_mean": 1.6092685461044312, + "margin_dpo/margin_std": 2.7506871223449707, + "step": 635 + }, + { + "epoch": 0.9614512471655329, + "fcm_dpo/beta": 0.5654884576797485, + "fcm_dpo/delta": -0.11426550894975662, + "fcm_dpo/margin": 1.9465041160583496, + "fcm_dpo/q_t": 0.2917436957359314, + "grad_norm": 109.79877471923828, + "learning_rate": 2.359929934524829e-09, + "logits/chosen": 0.1432669758796692, + "logits/rejected": 0.07593058794736862, + "logps/chosen": -51.8205680847168, + "logps/ref_chosen": -49.140167236328125, + "logps/ref_rejected": -81.26971435546875, + "logps/rejected": -85.89661407470703, + "loss": 0.7784, + "margin_dpo/margin_mean": 1.9465045928955078, + "margin_dpo/margin_std": 1.9250316619873047, + "step": 636 + }, + { + "epoch": 0.9629629629629629, + "fcm_dpo/beta": 0.5774806141853333, + "fcm_dpo/delta": 0.09339653700590134, + "fcm_dpo/margin": 1.5811973810195923, + "fcm_dpo/q_t": 0.3516439199447632, + "grad_norm": 156.06459045410156, + "learning_rate": 2.1821473643827137e-09, + "logits/chosen": 0.14332839846611023, + "logits/rejected": 0.08663806319236755, + "logps/chosen": -76.97013854980469, + "logps/ref_chosen": -73.69658660888672, + "logps/ref_rejected": -83.01487731933594, + "logps/rejected": -87.86962890625, + "loss": 1.0302, + "margin_dpo/margin_mean": 1.5811975002288818, + "margin_dpo/margin_std": 2.551753044128418, + "step": 637 + }, + { + "epoch": 0.9644746787603931, + "fcm_dpo/beta": 0.5492261648178101, + "fcm_dpo/delta": -0.27356475591659546, + "fcm_dpo/margin": 2.252800941467285, + "fcm_dpo/q_t": 0.28849977254867554, + "grad_norm": 118.49738311767578, + "learning_rate": 2.0112967923011646e-09, + "logits/chosen": 0.1485036015510559, + "logits/rejected": 0.10658858716487885, + "logps/chosen": -65.84213256835938, + "logps/ref_chosen": -62.78158187866211, + "logps/ref_rejected": -85.40478515625, + "logps/rejected": -90.71813201904297, + "loss": 0.8175, + "margin_dpo/margin_mean": 2.252800464630127, + "margin_dpo/margin_std": 2.519404888153076, + "step": 638 + }, + { + "epoch": 0.9659863945578231, + "fcm_dpo/beta": 0.5499449372291565, + "fcm_dpo/delta": 0.053651005029678345, + "fcm_dpo/margin": 1.7299044132232666, + "fcm_dpo/q_t": 0.33649200201034546, + "grad_norm": 118.28497314453125, + "learning_rate": 1.847382997337943e-09, + "logits/chosen": 0.14743350446224213, + "logits/rejected": 0.052376627922058105, + "logps/chosen": -56.702606201171875, + "logps/ref_chosen": -53.76658630371094, + "logps/ref_rejected": -72.30009460449219, + "logps/rejected": -76.96601867675781, + "loss": 0.9522, + "margin_dpo/margin_mean": 1.7299044132232666, + "margin_dpo/margin_std": 2.438140392303467, + "step": 639 + }, + { + "epoch": 0.9674981103552532, + "fcm_dpo/beta": 0.5619925260543823, + "fcm_dpo/delta": 0.15041983127593994, + "fcm_dpo/margin": 1.5351797342300415, + "fcm_dpo/q_t": 0.3423158526420593, + "grad_norm": 126.87297821044922, + "learning_rate": 1.690410564514244e-09, + "logits/chosen": 0.23225465416908264, + "logits/rejected": 0.19046524167060852, + "logps/chosen": -54.199310302734375, + "logps/ref_chosen": -51.41777801513672, + "logps/ref_rejected": -77.27879333496094, + "logps/rejected": -81.59550476074219, + "loss": 1.0331, + "margin_dpo/margin_mean": 1.5351800918579102, + "margin_dpo/margin_std": 2.3253612518310547, + "step": 640 + }, + { + "epoch": 0.9690098261526833, + "fcm_dpo/beta": 0.5881354212760925, + "fcm_dpo/delta": 0.17225059866905212, + "fcm_dpo/margin": 1.4259064197540283, + "fcm_dpo/q_t": 0.34505194425582886, + "grad_norm": 165.1323699951172, + "learning_rate": 1.5403838846864692e-09, + "logits/chosen": 0.13332295417785645, + "logits/rejected": 0.10575246065855026, + "logps/chosen": -74.10830688476562, + "logps/ref_chosen": -71.0546646118164, + "logps/ref_rejected": -82.2440185546875, + "logps/rejected": -86.72355651855469, + "loss": 1.0339, + "margin_dpo/margin_mean": 1.4259059429168701, + "margin_dpo/margin_std": 2.282135486602783, + "step": 641 + }, + { + "epoch": 0.9705215419501134, + "fcm_dpo/beta": 0.5971714854240417, + "fcm_dpo/delta": 0.17347897589206696, + "fcm_dpo/margin": 1.4081449508666992, + "fcm_dpo/q_t": 0.3561500310897827, + "grad_norm": 159.68527221679688, + "learning_rate": 1.3973071544233218e-09, + "logits/chosen": 0.12671303749084473, + "logits/rejected": 0.12140335142612457, + "logps/chosen": -72.1884994506836, + "logps/ref_chosen": -68.92927551269531, + "logps/ref_rejected": -70.85682678222656, + "logps/rejected": -75.52420043945312, + "loss": 1.078, + "margin_dpo/margin_mean": 1.4081450700759888, + "margin_dpo/margin_std": 2.3124876022338867, + "step": 642 + }, + { + "epoch": 0.9720332577475435, + "fcm_dpo/beta": 0.6271607875823975, + "fcm_dpo/delta": 0.3203769028186798, + "fcm_dpo/margin": 1.1184437274932861, + "fcm_dpo/q_t": 0.34422484040260315, + "grad_norm": 2246.781982421875, + "learning_rate": 1.261184375888541e-09, + "logits/chosen": 0.1141979843378067, + "logits/rejected": 0.05281982570886612, + "logps/chosen": -68.73892974853516, + "logps/ref_chosen": -65.30903625488281, + "logps/ref_rejected": -83.61613464355469, + "logps/rejected": -88.16447448730469, + "loss": 1.4846, + "margin_dpo/margin_mean": 1.1184438467025757, + "margin_dpo/margin_std": 3.4751362800598145, + "step": 643 + }, + { + "epoch": 0.9735449735449735, + "fcm_dpo/beta": 0.6507506966590881, + "fcm_dpo/delta": 0.037187736481428146, + "fcm_dpo/margin": 1.4850530624389648, + "fcm_dpo/q_t": 0.3464422821998596, + "grad_norm": 163.06446838378906, + "learning_rate": 1.1320193567288527e-09, + "logits/chosen": 0.2439633458852768, + "logits/rejected": 0.21551108360290527, + "logps/chosen": -53.948020935058594, + "logps/ref_chosen": -51.002601623535156, + "logps/ref_rejected": -64.46372985839844, + "logps/rejected": -68.89420318603516, + "loss": 1.0968, + "margin_dpo/margin_mean": 1.4850530624389648, + "margin_dpo/margin_std": 2.556273937225342, + "step": 644 + }, + { + "epoch": 0.9750566893424036, + "fcm_dpo/beta": 0.6438242197036743, + "fcm_dpo/delta": -0.08629032969474792, + "fcm_dpo/margin": 1.6724817752838135, + "fcm_dpo/q_t": 0.31522613763809204, + "grad_norm": 150.0731964111328, + "learning_rate": 1.0098157099674987e-09, + "logits/chosen": 0.0987434908747673, + "logits/rejected": 0.07496701180934906, + "logps/chosen": -64.18575286865234, + "logps/ref_chosen": -60.963409423828125, + "logps/ref_rejected": -69.73353576660156, + "logps/rejected": -74.62835693359375, + "loss": 0.8818, + "margin_dpo/margin_mean": 1.6724815368652344, + "margin_dpo/margin_std": 2.17503023147583, + "step": 645 + }, + { + "epoch": 0.9765684051398337, + "fcm_dpo/beta": 0.6224489212036133, + "fcm_dpo/delta": -0.2178870290517807, + "fcm_dpo/margin": 1.9151108264923096, + "fcm_dpo/q_t": 0.30455827713012695, + "grad_norm": 125.87390899658203, + "learning_rate": 8.945768539031783e-10, + "logits/chosen": 0.2028486728668213, + "logits/rejected": 0.16405922174453735, + "logps/chosen": -65.69281005859375, + "logps/ref_chosen": -62.290069580078125, + "logps/ref_rejected": -85.54812622070312, + "logps/rejected": -90.86598205566406, + "loss": 0.9126, + "margin_dpo/margin_mean": 1.9151103496551514, + "margin_dpo/margin_std": 2.5356059074401855, + "step": 646 + }, + { + "epoch": 0.9780801209372638, + "fcm_dpo/beta": 0.5961904525756836, + "fcm_dpo/delta": -0.26335006952285767, + "fcm_dpo/margin": 2.065751552581787, + "fcm_dpo/q_t": 0.30408942699432373, + "grad_norm": 186.32350158691406, + "learning_rate": 7.863060120144316e-10, + "logits/chosen": 0.20603081583976746, + "logits/rejected": 0.13498756289482117, + "logps/chosen": -70.44767761230469, + "logps/ref_chosen": -67.515869140625, + "logps/ref_rejected": -101.50871276855469, + "logps/rejected": -106.50627136230469, + "loss": 0.9259, + "margin_dpo/margin_mean": 2.065751075744629, + "margin_dpo/margin_std": 2.7231507301330566, + "step": 647 + }, + { + "epoch": 0.9795918367346939, + "fcm_dpo/beta": 0.5804147720336914, + "fcm_dpo/delta": 0.019788160920143127, + "fcm_dpo/margin": 1.6904387474060059, + "fcm_dpo/q_t": 0.33119359612464905, + "grad_norm": 137.32240295410156, + "learning_rate": 6.850062128694045e-10, + "logits/chosen": 0.13131186366081238, + "logits/rejected": 0.0793139785528183, + "logps/chosen": -67.7803726196289, + "logps/ref_chosen": -64.59593963623047, + "logps/ref_rejected": -83.384033203125, + "logps/rejected": -88.25890350341797, + "loss": 0.9714, + "margin_dpo/margin_mean": 1.6904385089874268, + "margin_dpo/margin_std": 2.3690028190612793, + "step": 648 + }, + { + "epoch": 0.981103552532124, + "fcm_dpo/beta": 0.5746303796768188, + "fcm_dpo/delta": -0.07765467464923859, + "fcm_dpo/margin": 1.8583083152770996, + "fcm_dpo/q_t": 0.3117806911468506, + "grad_norm": 126.6104736328125, + "learning_rate": 5.906802900412788e-10, + "logits/chosen": 0.17333395779132843, + "logits/rejected": 0.1271257847547531, + "logps/chosen": -52.0538330078125, + "logps/ref_chosen": -49.30964660644531, + "logps/ref_rejected": -73.73710632324219, + "logps/rejected": -78.339599609375, + "loss": 0.931, + "margin_dpo/margin_mean": 1.8583080768585205, + "margin_dpo/margin_std": 2.380646228790283, + "step": 649 + }, + { + "epoch": 0.982615268329554, + "fcm_dpo/beta": 0.5875668525695801, + "fcm_dpo/delta": 0.08222602307796478, + "fcm_dpo/margin": 1.5724918842315674, + "fcm_dpo/q_t": 0.3484124541282654, + "grad_norm": 169.66038513183594, + "learning_rate": 5.033308820289184e-10, + "logits/chosen": 0.2582772374153137, + "logits/rejected": 0.21315817534923553, + "logps/chosen": -57.925880432128906, + "logps/ref_chosen": -55.06325912475586, + "logps/ref_rejected": -77.39610290527344, + "logps/rejected": -81.83120727539062, + "loss": 1.0498, + "margin_dpo/margin_mean": 1.5724914073944092, + "margin_dpo/margin_std": 2.477647542953491, + "step": 650 + }, + { + "epoch": 0.9841269841269841, + "fcm_dpo/beta": 0.600640058517456, + "fcm_dpo/delta": 0.055977076292037964, + "fcm_dpo/margin": 1.5707225799560547, + "fcm_dpo/q_t": 0.3421096205711365, + "grad_norm": 148.89381408691406, + "learning_rate": 4.2296043218295606e-10, + "logits/chosen": 0.24641644954681396, + "logits/rejected": 0.1934581995010376, + "logps/chosen": -56.82018280029297, + "logps/ref_chosen": -54.065162658691406, + "logps/ref_rejected": -77.79080200195312, + "logps/rejected": -82.1165542602539, + "loss": 0.9553, + "margin_dpo/margin_mean": 1.5707224607467651, + "margin_dpo/margin_std": 2.1566340923309326, + "step": 651 + }, + { + "epoch": 0.9856386999244142, + "fcm_dpo/beta": 0.5921408534049988, + "fcm_dpo/delta": 0.02783304452896118, + "fcm_dpo/margin": 1.6457834243774414, + "fcm_dpo/q_t": 0.3308331370353699, + "grad_norm": 141.38235473632812, + "learning_rate": 3.4957118863768176e-10, + "logits/chosen": 0.20549950003623962, + "logits/rejected": 0.15806761384010315, + "logps/chosen": -66.84440612792969, + "logps/ref_chosen": -63.64030456542969, + "logps/ref_rejected": -78.86882019042969, + "logps/rejected": -83.71870422363281, + "loss": 0.9701, + "margin_dpo/margin_mean": 1.6457829475402832, + "margin_dpo/margin_std": 2.2666330337524414, + "step": 652 + }, + { + "epoch": 0.9871504157218443, + "fcm_dpo/beta": 0.6015132665634155, + "fcm_dpo/delta": 0.07994754612445831, + "fcm_dpo/margin": 1.541993260383606, + "fcm_dpo/q_t": 0.352220356464386, + "grad_norm": 151.9169464111328, + "learning_rate": 2.831652042480093e-10, + "logits/chosen": 0.18092171847820282, + "logits/rejected": 0.13986843824386597, + "logps/chosen": -64.58534240722656, + "logps/ref_chosen": -61.668373107910156, + "logps/ref_rejected": -73.83012390136719, + "logps/rejected": -78.28909301757812, + "loss": 1.0303, + "margin_dpo/margin_mean": 1.5419931411743164, + "margin_dpo/margin_std": 2.441131591796875, + "step": 653 + }, + { + "epoch": 0.9886621315192744, + "fcm_dpo/beta": 0.5887470245361328, + "fcm_dpo/delta": -0.13443875312805176, + "fcm_dpo/margin": 1.8974157571792603, + "fcm_dpo/q_t": 0.31610941886901855, + "grad_norm": 141.95655822753906, + "learning_rate": 2.2374433653205016e-10, + "logits/chosen": 0.170884370803833, + "logits/rejected": 0.10386738926172256, + "logps/chosen": -60.361976623535156, + "logps/ref_chosen": -57.568267822265625, + "logps/ref_rejected": -87.74789428710938, + "logps/rejected": -92.43901824951172, + "loss": 0.905, + "margin_dpo/margin_mean": 1.8974157571792603, + "margin_dpo/margin_std": 2.5552029609680176, + "step": 654 + }, + { + "epoch": 0.9901738473167044, + "fcm_dpo/beta": 0.5729016661643982, + "fcm_dpo/delta": -0.060607388615608215, + "fcm_dpo/margin": 1.8254590034484863, + "fcm_dpo/q_t": 0.309136301279068, + "grad_norm": 104.31941223144531, + "learning_rate": 1.7131024761923852e-10, + "logits/chosen": 0.1574726104736328, + "logits/rejected": 0.0878022313117981, + "logps/chosen": -54.848045349121094, + "logps/ref_chosen": -52.14714813232422, + "logps/ref_rejected": -80.85014343261719, + "logps/rejected": -85.37649536132812, + "loss": 0.8471, + "margin_dpo/margin_mean": 1.8254594802856445, + "margin_dpo/margin_std": 2.0447256565093994, + "step": 655 + }, + { + "epoch": 0.9916855631141346, + "fcm_dpo/beta": 0.555420458316803, + "fcm_dpo/delta": -0.24778330326080322, + "fcm_dpo/margin": 2.184002161026001, + "fcm_dpo/q_t": 0.29599303007125854, + "grad_norm": 112.63178253173828, + "learning_rate": 1.2586440420372934e-10, + "logits/chosen": 0.16302891075611115, + "logits/rejected": 0.12122651189565659, + "logps/chosen": -76.25328063964844, + "logps/ref_chosen": -73.25672912597656, + "logps/ref_rejected": -85.35127258300781, + "logps/rejected": -90.53182220458984, + "loss": 0.82, + "margin_dpo/margin_mean": 2.1840016841888428, + "margin_dpo/margin_std": 2.538489818572998, + "step": 656 + }, + { + "epoch": 0.9931972789115646, + "fcm_dpo/beta": 0.5423201322555542, + "fcm_dpo/delta": -0.10848333686590195, + "fcm_dpo/margin": 2.0198726654052734, + "fcm_dpo/q_t": 0.3146105408668518, + "grad_norm": 127.41387176513672, + "learning_rate": 8.740807750345913e-11, + "logits/chosen": 0.26283034682273865, + "logits/rejected": 0.19647637009620667, + "logps/chosen": -52.75443649291992, + "logps/ref_chosen": -49.72339630126953, + "logps/ref_rejected": -75.1568603515625, + "logps/rejected": -80.20777893066406, + "loss": 0.9165, + "margin_dpo/margin_mean": 2.0198724269866943, + "margin_dpo/margin_std": 2.636084794998169, + "step": 657 + }, + { + "epoch": 0.9947089947089947, + "fcm_dpo/beta": 0.556152880191803, + "fcm_dpo/delta": 0.08222407847642899, + "fcm_dpo/margin": 1.6563022136688232, + "fcm_dpo/q_t": 0.35666483640670776, + "grad_norm": 134.63629150390625, + "learning_rate": 5.594234322453539e-11, + "logits/chosen": 0.1849232316017151, + "logits/rejected": 0.15448370575904846, + "logps/chosen": -66.14913940429688, + "logps/ref_chosen": -63.04634094238281, + "logps/ref_rejected": -83.44963073730469, + "logps/rejected": -88.20873260498047, + "loss": 1.0747, + "margin_dpo/margin_mean": 1.656302809715271, + "margin_dpo/margin_std": 2.766972541809082, + "step": 658 + }, + { + "epoch": 0.9962207105064248, + "fcm_dpo/beta": 0.5667056441307068, + "fcm_dpo/delta": 0.22809654474258423, + "fcm_dpo/margin": 1.3948873281478882, + "fcm_dpo/q_t": 0.36462146043777466, + "grad_norm": 134.068603515625, + "learning_rate": 3.146808153123293e-11, + "logits/chosen": 0.2203397899866104, + "logits/rejected": 0.16544032096862793, + "logps/chosen": -58.36835479736328, + "logps/ref_chosen": -55.0802001953125, + "logps/ref_rejected": -71.91049194335938, + "logps/rejected": -76.59353637695312, + "loss": 1.1224, + "margin_dpo/margin_mean": 1.3948872089385986, + "margin_dpo/margin_std": 2.498440742492676, + "step": 659 + }, + { + "epoch": 0.9977324263038548, + "fcm_dpo/beta": 0.5684385895729065, + "fcm_dpo/delta": -0.10104553401470184, + "fcm_dpo/margin": 1.9175570011138916, + "fcm_dpo/q_t": 0.3092523217201233, + "grad_norm": 133.181396484375, + "learning_rate": 1.3985977021235829e-11, + "logits/chosen": 0.2678339183330536, + "logits/rejected": 0.2140858918428421, + "logps/chosen": -57.32783889770508, + "logps/ref_chosen": -54.525917053222656, + "logps/ref_rejected": -81.23604583740234, + "logps/rejected": -85.95552825927734, + "loss": 0.8659, + "margin_dpo/margin_mean": 1.9175567626953125, + "margin_dpo/margin_std": 2.366135597229004, + "step": 660 + }, + { + "epoch": 0.999244142101285, + "fcm_dpo/beta": 0.5669894218444824, + "fcm_dpo/delta": 0.021186619997024536, + "fcm_dpo/margin": 1.7300082445144653, + "fcm_dpo/q_t": 0.33315783739089966, + "grad_norm": 129.08389282226562, + "learning_rate": 3.4965187065971735e-12, + "logits/chosen": 0.15239998698234558, + "logits/rejected": 0.0887516513466835, + "logps/chosen": -63.79823684692383, + "logps/ref_chosen": -60.37263870239258, + "logps/ref_rejected": -77.42874145507812, + "logps/rejected": -82.5843505859375, + "loss": 1.0387, + "margin_dpo/margin_mean": 1.730008840560913, + "margin_dpo/margin_std": 2.6242144107818604, + "step": 661 + }, + { + "epoch": 0.999244142101285, + "step": 661, + "total_flos": 0.0, + "train_loss": 1.1173522615757363, + "train_runtime": 1752.852, + "train_samples_per_second": 24.153, + "train_steps_per_second": 0.377 + } + ], + "logging_steps": 1, + "max_steps": 661, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}